sql-glider 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -1,5 +1,5 @@
1
1
  sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
2
- sqlglider/_version.py,sha256=rLCrf4heo25FJtBY-2Ap7ZuWW-5FS7sqTjsolIUuI5c,704
2
+ sqlglider/_version.py,sha256=riGXiVTWXmtdoju9hVCWvTxpszEMAAIK0sZZWoLKlnU,704
3
3
  sqlglider/cli.py,sha256=9sweHRVLk2iBSzCzT2Gcj8y1g1XKzq26iApQsMaFbx4,51786
4
4
  sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
5
5
  sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
@@ -17,7 +17,7 @@ sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,933
17
17
  sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
18
18
  sqlglider/graph/serialization.py,sha256=7JJo31rwSlxnDhdqdTJdK4Dr_ZcSYetXfx3_CmndSac,2662
19
19
  sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
20
- sqlglider/lineage/analyzer.py,sha256=kRhGcGaiixxtrf9vO8g09omayjB2G3LA9hLCOLaTyPg,56811
20
+ sqlglider/lineage/analyzer.py,sha256=Vfh0g9xVEEUkQ87KZlCcZVPltDJ6Uos67PBtDyQ_i8U,64679
21
21
  sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
22
22
  sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
23
23
  sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
@@ -27,8 +27,8 @@ sqlglider/templating/variables.py,sha256=5593PtLBcOxsnMCSRm2pGAD5I0Y9f__VV3_J_Hf
27
27
  sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,276
28
28
  sqlglider/utils/config.py,sha256=iNJgSXFw3pmL2MCdvW3SJp4X2T3AQP2QyQuXIXT-6H0,4761
29
29
  sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
30
- sql_glider-0.1.4.dist-info/METADATA,sha256=-gzDzEyZ116YpDBNbIwWMgMO184s-WkDKMxMH92lOqA,28445
31
- sql_glider-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
- sql_glider-0.1.4.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
33
- sql_glider-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- sql_glider-0.1.4.dist-info/RECORD,,
30
+ sql_glider-0.1.6.dist-info/METADATA,sha256=CMsjG3MTf4wVrO07SpgpLqywymxCaQIpbmkt9G7OG9c,28445
31
+ sql_glider-0.1.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
+ sql_glider-0.1.6.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
33
+ sql_glider-0.1.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ sql_glider-0.1.6.dist-info/RECORD,,
sqlglider/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.4'
32
- __version_tuple__ = version_tuple = (0, 1, 4)
31
+ __version__ = version = '0.1.6'
32
+ __version_tuple__ = version_tuple = (0, 1, 6)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -182,20 +182,43 @@ class LineageAnalyzer:
182
182
  # For aliased columns, use the alias as the column name
183
183
  column_name = projection.alias
184
184
  lineage_name = column_name # SQLGlot lineage uses the alias
185
- else:
186
- source_expr = projection
187
- if isinstance(source_expr, exp.Column):
188
- column_name = source_expr.name
189
- lineage_name = column_name
185
+ # Qualify with target table
186
+ qualified_name = f"{target_table}.{column_name}"
187
+ columns.append(qualified_name)
188
+ self._column_mapping[qualified_name] = lineage_name
189
+ elif isinstance(projection, exp.Column):
190
+ # Check if this is a table-qualified star (e.g., t.*)
191
+ if isinstance(projection.this, exp.Star):
192
+ source_table = projection.table
193
+ qualified_star_cols: List[str] = []
194
+ if source_table and first_select:
195
+ qualified_star_cols = self._resolve_qualified_star(
196
+ source_table, first_select
197
+ )
198
+ for col in qualified_star_cols:
199
+ qualified_name = f"{target_table}.{col}"
200
+ columns.append(qualified_name)
201
+ self._column_mapping[qualified_name] = col
202
+ if not qualified_star_cols:
203
+ # Fallback: can't resolve t.*, use * as column name
204
+ qualified_name = f"{target_table}.*"
205
+ columns.append(qualified_name)
206
+ self._column_mapping[qualified_name] = "*"
190
207
  else:
191
- # For expressions, use the SQL representation
192
- column_name = source_expr.sql(dialect=self.dialect)
208
+ column_name = projection.name
193
209
  lineage_name = column_name
194
-
195
- # Qualify with target table
196
- qualified_name = f"{target_table}.{column_name}"
197
- columns.append(qualified_name)
198
- self._column_mapping[qualified_name] = lineage_name
210
+ # Qualify with target table
211
+ qualified_name = f"{target_table}.{column_name}"
212
+ columns.append(qualified_name)
213
+ self._column_mapping[qualified_name] = lineage_name
214
+ else:
215
+ # For expressions, use the SQL representation
216
+ column_name = projection.sql(dialect=self.dialect)
217
+ lineage_name = column_name
218
+ # Qualify with target table
219
+ qualified_name = f"{target_table}.{column_name}"
220
+ columns.append(qualified_name)
221
+ self._column_mapping[qualified_name] = lineage_name
199
222
 
200
223
  else:
201
224
  # DQL (pure SELECT): Use the SELECT columns as output
@@ -1342,8 +1365,18 @@ class LineageAnalyzer:
1342
1365
  # Use the alias name as the column name
1343
1366
  columns.append(projection.alias)
1344
1367
  elif isinstance(projection, exp.Column):
1345
- # Use the column name
1346
- columns.append(projection.name)
1368
+ # Check if this is a table-qualified star (e.g., t.*)
1369
+ if isinstance(projection.this, exp.Star):
1370
+ # Resolve table-qualified star from known schema
1371
+ table_name = projection.table
1372
+ if table_name and first_select:
1373
+ qualified_star_cols = self._resolve_qualified_star(
1374
+ table_name, first_select
1375
+ )
1376
+ columns.extend(qualified_star_cols)
1377
+ else:
1378
+ # Use the column name
1379
+ columns.append(projection.name)
1347
1380
  elif isinstance(projection, exp.Star):
1348
1381
  # Resolve SELECT * from known schema
1349
1382
  if first_select:
@@ -1375,6 +1408,64 @@ class LineageAnalyzer:
1375
1408
 
1376
1409
  source = from_clause.this
1377
1410
 
1411
+ # Handle table reference from FROM clause
1412
+ columns.extend(self._resolve_source_columns(source, select_node))
1413
+
1414
+ # Handle JOIN clauses - collect columns from all joined tables
1415
+ # EXCEPT for SEMI and ANTI joins which only return left table columns
1416
+ joins = select_node.args.get("joins")
1417
+ if joins:
1418
+ for join in joins:
1419
+ if isinstance(join, exp.Join):
1420
+ # SEMI and ANTI joins don't include right table columns in SELECT *
1421
+ join_kind = join.kind
1422
+ if join_kind in ("SEMI", "ANTI"):
1423
+ # Skip right table columns for SEMI/ANTI joins
1424
+ continue
1425
+ join_source = join.this
1426
+ columns.extend(
1427
+ self._resolve_source_columns(join_source, select_node)
1428
+ )
1429
+
1430
+ # Handle LATERAL VIEW clauses - collect generated columns
1431
+ laterals = select_node.args.get("laterals")
1432
+ if laterals:
1433
+ for lateral in laterals:
1434
+ if isinstance(lateral, exp.Lateral):
1435
+ lateral_cols = self._resolve_lateral_columns(lateral)
1436
+ columns.extend(lateral_cols)
1437
+
1438
+ return columns
1439
+
1440
+ def _resolve_lateral_columns(self, lateral: exp.Lateral) -> List[str]:
1441
+ """
1442
+ Extract generated column names from a LATERAL VIEW clause.
1443
+
1444
+ Args:
1445
+ lateral: The Lateral expression node
1446
+
1447
+ Returns:
1448
+ List of generated column names (e.g., ['elem'] for explode,
1449
+ ['pos', 'elem'] for posexplode)
1450
+ """
1451
+ # Use SQLGlot's built-in property to get alias column names
1452
+ return lateral.alias_column_names or []
1453
+
1454
+ def _resolve_source_columns(
1455
+ self, source: exp.Expression, select_node: exp.Select
1456
+ ) -> List[str]:
1457
+ """
1458
+ Resolve columns from a single source (table, subquery, etc.).
1459
+
1460
+ Args:
1461
+ source: The source expression (Table, Subquery, etc.)
1462
+ select_node: The containing SELECT node for CTE resolution
1463
+
1464
+ Returns:
1465
+ List of column names from the source
1466
+ """
1467
+ columns: List[str] = []
1468
+
1378
1469
  # Handle table reference
1379
1470
  if isinstance(source, exp.Table):
1380
1471
  source_name = self._get_qualified_table_name(source)
@@ -1387,11 +1478,100 @@ class LineageAnalyzer:
1387
1478
  cte_columns = self._resolve_cte_columns(source_name, select_node)
1388
1479
  columns.extend(cte_columns)
1389
1480
 
1390
- # Handle subquery - can't resolve without deeper analysis
1391
- elif isinstance(source, exp.Subquery) and source.alias:
1392
- # Check if this subquery alias is in file schema (unlikely)
1393
- if source.alias in self._file_schema:
1481
+ # Handle subquery with alias
1482
+ elif isinstance(source, exp.Subquery):
1483
+ # First check if this subquery alias is in file schema
1484
+ if source.alias and source.alias in self._file_schema:
1394
1485
  columns.extend(self._file_schema[source.alias].keys())
1486
+ else:
1487
+ # Extract columns from the subquery's SELECT
1488
+ inner_select = source.this
1489
+ if isinstance(inner_select, exp.Select):
1490
+ subquery_cols = self._extract_subquery_columns(inner_select)
1491
+ columns.extend(subquery_cols)
1492
+
1493
+ return columns
1494
+
1495
+ def _resolve_qualified_star(
1496
+ self, table_name: str, select_node: exp.Select
1497
+ ) -> List[str]:
1498
+ """
1499
+ Resolve a table-qualified star (e.g., t.*) to actual column names.
1500
+
1501
+ Args:
1502
+ table_name: The table/alias name qualifying the star
1503
+ select_node: The SELECT node for context
1504
+
1505
+ Returns:
1506
+ List of column names from the specified table
1507
+ """
1508
+ # First check file schema
1509
+ if table_name in self._file_schema:
1510
+ return list(self._file_schema[table_name].keys())
1511
+
1512
+ # Check if it's a CTE reference
1513
+ cte_columns = self._resolve_cte_columns(table_name, select_node)
1514
+ if cte_columns:
1515
+ return cte_columns
1516
+
1517
+ # Check if the table name is an alias - need to resolve the actual table
1518
+ from_clause = select_node.args.get("from")
1519
+ if from_clause and isinstance(from_clause, exp.From):
1520
+ source = from_clause.this
1521
+ if isinstance(source, exp.Table) and source.alias == table_name:
1522
+ actual_name = self._get_qualified_table_name(source)
1523
+ if actual_name in self._file_schema:
1524
+ return list(self._file_schema[actual_name].keys())
1525
+
1526
+ # Check JOIN clauses for aliased tables
1527
+ joins = select_node.args.get("joins")
1528
+ if joins:
1529
+ for join in joins:
1530
+ if isinstance(join, exp.Join):
1531
+ join_source = join.this
1532
+ if (
1533
+ isinstance(join_source, exp.Table)
1534
+ and join_source.alias == table_name
1535
+ ):
1536
+ actual_name = self._get_qualified_table_name(join_source)
1537
+ if actual_name in self._file_schema:
1538
+ return list(self._file_schema[actual_name].keys())
1539
+
1540
+ return []
1541
+
1542
+ def _extract_subquery_columns(self, subquery_select: exp.Select) -> List[str]:
1543
+ """
1544
+ Extract column names from a subquery's SELECT statement.
1545
+
1546
+ Args:
1547
+ subquery_select: The SELECT expression within the subquery
1548
+
1549
+ Returns:
1550
+ List of column names
1551
+ """
1552
+ columns: List[str] = []
1553
+
1554
+ for projection in subquery_select.expressions:
1555
+ if isinstance(projection, exp.Alias):
1556
+ columns.append(projection.alias)
1557
+ elif isinstance(projection, exp.Column):
1558
+ # Check for table-qualified star (t.*)
1559
+ if isinstance(projection.this, exp.Star):
1560
+ table_name = projection.table
1561
+ if table_name:
1562
+ qualified_cols = self._resolve_qualified_star(
1563
+ table_name, subquery_select
1564
+ )
1565
+ columns.extend(qualified_cols)
1566
+ else:
1567
+ columns.append(projection.name)
1568
+ elif isinstance(projection, exp.Star):
1569
+ # Resolve SELECT * in subquery
1570
+ star_columns = self._resolve_star_columns(subquery_select)
1571
+ columns.extend(star_columns)
1572
+ else:
1573
+ col_sql = projection.sql(dialect=self.dialect)
1574
+ columns.append(col_sql)
1395
1575
 
1396
1576
  return columns
1397
1577