sql-glider 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -1,5 +1,5 @@
1
1
  sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
2
- sqlglider/_version.py,sha256=rLCrf4heo25FJtBY-2Ap7ZuWW-5FS7sqTjsolIUuI5c,704
2
+ sqlglider/_version.py,sha256=rdxBMYpwzYxiWk08QbPLHSAxHoDfeKWwyaJIAM0lSic,704
3
3
  sqlglider/cli.py,sha256=9sweHRVLk2iBSzCzT2Gcj8y1g1XKzq26iApQsMaFbx4,51786
4
4
  sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
5
5
  sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
@@ -17,7 +17,7 @@ sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,933
17
17
  sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
18
18
  sqlglider/graph/serialization.py,sha256=7JJo31rwSlxnDhdqdTJdK4Dr_ZcSYetXfx3_CmndSac,2662
19
19
  sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
20
- sqlglider/lineage/analyzer.py,sha256=kRhGcGaiixxtrf9vO8g09omayjB2G3LA9hLCOLaTyPg,56811
20
+ sqlglider/lineage/analyzer.py,sha256=9Ly93TBcpc2sRP9NuwxB0jCL3YK-yCV1rih93MFM-S0,64313
21
21
  sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
22
22
  sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
23
23
  sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
@@ -27,8 +27,8 @@ sqlglider/templating/variables.py,sha256=5593PtLBcOxsnMCSRm2pGAD5I0Y9f__VV3_J_Hf
27
27
  sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,276
28
28
  sqlglider/utils/config.py,sha256=iNJgSXFw3pmL2MCdvW3SJp4X2T3AQP2QyQuXIXT-6H0,4761
29
29
  sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
30
- sql_glider-0.1.4.dist-info/METADATA,sha256=-gzDzEyZ116YpDBNbIwWMgMO184s-WkDKMxMH92lOqA,28445
31
- sql_glider-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
- sql_glider-0.1.4.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
33
- sql_glider-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- sql_glider-0.1.4.dist-info/RECORD,,
30
+ sql_glider-0.1.5.dist-info/METADATA,sha256=uLFCtpVoKp5F8ORuqJfJQtNZFjAJ3BQLavtRl-Bh0JA,28445
31
+ sql_glider-0.1.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
+ sql_glider-0.1.5.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
33
+ sql_glider-0.1.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ sql_glider-0.1.5.dist-info/RECORD,,
sqlglider/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.4'
32
- __version_tuple__ = version_tuple = (0, 1, 4)
31
+ __version__ = version = '0.1.5'
32
+ __version_tuple__ = version_tuple = (0, 1, 5)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -182,20 +182,43 @@ class LineageAnalyzer:
182
182
  # For aliased columns, use the alias as the column name
183
183
  column_name = projection.alias
184
184
  lineage_name = column_name # SQLGlot lineage uses the alias
185
- else:
186
- source_expr = projection
187
- if isinstance(source_expr, exp.Column):
188
- column_name = source_expr.name
189
- lineage_name = column_name
185
+ # Qualify with target table
186
+ qualified_name = f"{target_table}.{column_name}"
187
+ columns.append(qualified_name)
188
+ self._column_mapping[qualified_name] = lineage_name
189
+ elif isinstance(projection, exp.Column):
190
+ # Check if this is a table-qualified star (e.g., t.*)
191
+ if isinstance(projection.this, exp.Star):
192
+ source_table = projection.table
193
+ qualified_star_cols: List[str] = []
194
+ if source_table and first_select:
195
+ qualified_star_cols = self._resolve_qualified_star(
196
+ source_table, first_select
197
+ )
198
+ for col in qualified_star_cols:
199
+ qualified_name = f"{target_table}.{col}"
200
+ columns.append(qualified_name)
201
+ self._column_mapping[qualified_name] = col
202
+ if not qualified_star_cols:
203
+ # Fallback: can't resolve t.*, use * as column name
204
+ qualified_name = f"{target_table}.*"
205
+ columns.append(qualified_name)
206
+ self._column_mapping[qualified_name] = "*"
190
207
  else:
191
- # For expressions, use the SQL representation
192
- column_name = source_expr.sql(dialect=self.dialect)
208
+ column_name = projection.name
193
209
  lineage_name = column_name
194
-
195
- # Qualify with target table
196
- qualified_name = f"{target_table}.{column_name}"
197
- columns.append(qualified_name)
198
- self._column_mapping[qualified_name] = lineage_name
210
+ # Qualify with target table
211
+ qualified_name = f"{target_table}.{column_name}"
212
+ columns.append(qualified_name)
213
+ self._column_mapping[qualified_name] = lineage_name
214
+ else:
215
+ # For expressions, use the SQL representation
216
+ column_name = projection.sql(dialect=self.dialect)
217
+ lineage_name = column_name
218
+ # Qualify with target table
219
+ qualified_name = f"{target_table}.{column_name}"
220
+ columns.append(qualified_name)
221
+ self._column_mapping[qualified_name] = lineage_name
199
222
 
200
223
  else:
201
224
  # DQL (pure SELECT): Use the SELECT columns as output
@@ -1342,8 +1365,18 @@ class LineageAnalyzer:
1342
1365
  # Use the alias name as the column name
1343
1366
  columns.append(projection.alias)
1344
1367
  elif isinstance(projection, exp.Column):
1345
- # Use the column name
1346
- columns.append(projection.name)
1368
+ # Check if this is a table-qualified star (e.g., t.*)
1369
+ if isinstance(projection.this, exp.Star):
1370
+ # Resolve table-qualified star from known schema
1371
+ table_name = projection.table
1372
+ if table_name and first_select:
1373
+ qualified_star_cols = self._resolve_qualified_star(
1374
+ table_name, first_select
1375
+ )
1376
+ columns.extend(qualified_star_cols)
1377
+ else:
1378
+ # Use the column name
1379
+ columns.append(projection.name)
1347
1380
  elif isinstance(projection, exp.Star):
1348
1381
  # Resolve SELECT * from known schema
1349
1382
  if first_select:
@@ -1375,6 +1408,58 @@ class LineageAnalyzer:
1375
1408
 
1376
1409
  source = from_clause.this
1377
1410
 
1411
+ # Handle table reference from FROM clause
1412
+ columns.extend(self._resolve_source_columns(source, select_node))
1413
+
1414
+ # Handle JOIN clauses - collect columns from all joined tables
1415
+ joins = select_node.args.get("joins")
1416
+ if joins:
1417
+ for join in joins:
1418
+ if isinstance(join, exp.Join):
1419
+ join_source = join.this
1420
+ columns.extend(
1421
+ self._resolve_source_columns(join_source, select_node)
1422
+ )
1423
+
1424
+ # Handle LATERAL VIEW clauses - collect generated columns
1425
+ laterals = select_node.args.get("laterals")
1426
+ if laterals:
1427
+ for lateral in laterals:
1428
+ if isinstance(lateral, exp.Lateral):
1429
+ lateral_cols = self._resolve_lateral_columns(lateral)
1430
+ columns.extend(lateral_cols)
1431
+
1432
+ return columns
1433
+
1434
+ def _resolve_lateral_columns(self, lateral: exp.Lateral) -> List[str]:
1435
+ """
1436
+ Extract generated column names from a LATERAL VIEW clause.
1437
+
1438
+ Args:
1439
+ lateral: The Lateral expression node
1440
+
1441
+ Returns:
1442
+ List of generated column names (e.g., ['elem'] for explode,
1443
+ ['pos', 'elem'] for posexplode)
1444
+ """
1445
+ # Use SQLGlot's built-in property to get alias column names
1446
+ return lateral.alias_column_names or []
1447
+
1448
+ def _resolve_source_columns(
1449
+ self, source: exp.Expression, select_node: exp.Select
1450
+ ) -> List[str]:
1451
+ """
1452
+ Resolve columns from a single source (table, subquery, etc.).
1453
+
1454
+ Args:
1455
+ source: The source expression (Table, Subquery, etc.)
1456
+ select_node: The containing SELECT node for CTE resolution
1457
+
1458
+ Returns:
1459
+ List of column names from the source
1460
+ """
1461
+ columns: List[str] = []
1462
+
1378
1463
  # Handle table reference
1379
1464
  if isinstance(source, exp.Table):
1380
1465
  source_name = self._get_qualified_table_name(source)
@@ -1387,11 +1472,100 @@ class LineageAnalyzer:
1387
1472
  cte_columns = self._resolve_cte_columns(source_name, select_node)
1388
1473
  columns.extend(cte_columns)
1389
1474
 
1390
- # Handle subquery - can't resolve without deeper analysis
1391
- elif isinstance(source, exp.Subquery) and source.alias:
1392
- # Check if this subquery alias is in file schema (unlikely)
1393
- if source.alias in self._file_schema:
1475
+ # Handle subquery with alias
1476
+ elif isinstance(source, exp.Subquery):
1477
+ # First check if this subquery alias is in file schema
1478
+ if source.alias and source.alias in self._file_schema:
1394
1479
  columns.extend(self._file_schema[source.alias].keys())
1480
+ else:
1481
+ # Extract columns from the subquery's SELECT
1482
+ inner_select = source.this
1483
+ if isinstance(inner_select, exp.Select):
1484
+ subquery_cols = self._extract_subquery_columns(inner_select)
1485
+ columns.extend(subquery_cols)
1486
+
1487
+ return columns
1488
+
1489
+ def _resolve_qualified_star(
1490
+ self, table_name: str, select_node: exp.Select
1491
+ ) -> List[str]:
1492
+ """
1493
+ Resolve a table-qualified star (e.g., t.*) to actual column names.
1494
+
1495
+ Args:
1496
+ table_name: The table/alias name qualifying the star
1497
+ select_node: The SELECT node for context
1498
+
1499
+ Returns:
1500
+ List of column names from the specified table
1501
+ """
1502
+ # First check file schema
1503
+ if table_name in self._file_schema:
1504
+ return list(self._file_schema[table_name].keys())
1505
+
1506
+ # Check if it's a CTE reference
1507
+ cte_columns = self._resolve_cte_columns(table_name, select_node)
1508
+ if cte_columns:
1509
+ return cte_columns
1510
+
1511
+ # Check if the table name is an alias - need to resolve the actual table
1512
+ from_clause = select_node.args.get("from")
1513
+ if from_clause and isinstance(from_clause, exp.From):
1514
+ source = from_clause.this
1515
+ if isinstance(source, exp.Table) and source.alias == table_name:
1516
+ actual_name = self._get_qualified_table_name(source)
1517
+ if actual_name in self._file_schema:
1518
+ return list(self._file_schema[actual_name].keys())
1519
+
1520
+ # Check JOIN clauses for aliased tables
1521
+ joins = select_node.args.get("joins")
1522
+ if joins:
1523
+ for join in joins:
1524
+ if isinstance(join, exp.Join):
1525
+ join_source = join.this
1526
+ if (
1527
+ isinstance(join_source, exp.Table)
1528
+ and join_source.alias == table_name
1529
+ ):
1530
+ actual_name = self._get_qualified_table_name(join_source)
1531
+ if actual_name in self._file_schema:
1532
+ return list(self._file_schema[actual_name].keys())
1533
+
1534
+ return []
1535
+
1536
+ def _extract_subquery_columns(self, subquery_select: exp.Select) -> List[str]:
1537
+ """
1538
+ Extract column names from a subquery's SELECT statement.
1539
+
1540
+ Args:
1541
+ subquery_select: The SELECT expression within the subquery
1542
+
1543
+ Returns:
1544
+ List of column names
1545
+ """
1546
+ columns: List[str] = []
1547
+
1548
+ for projection in subquery_select.expressions:
1549
+ if isinstance(projection, exp.Alias):
1550
+ columns.append(projection.alias)
1551
+ elif isinstance(projection, exp.Column):
1552
+ # Check for table-qualified star (t.*)
1553
+ if isinstance(projection.this, exp.Star):
1554
+ table_name = projection.table
1555
+ if table_name:
1556
+ qualified_cols = self._resolve_qualified_star(
1557
+ table_name, subquery_select
1558
+ )
1559
+ columns.extend(qualified_cols)
1560
+ else:
1561
+ columns.append(projection.name)
1562
+ elif isinstance(projection, exp.Star):
1563
+ # Resolve SELECT * in subquery
1564
+ star_columns = self._resolve_star_columns(subquery_select)
1565
+ columns.extend(star_columns)
1566
+ else:
1567
+ col_sql = projection.sql(dialect=self.dialect)
1568
+ columns.append(col_sql)
1395
1569
 
1396
1570
  return columns
1397
1571