sql-glider 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -1,5 +1,5 @@
1
1
  sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
2
- sqlglider/_version.py,sha256=q5nF98G8SoVeJqaknL0xdyxtv0egsqb0fK06_84Izu8,704
2
+ sqlglider/_version.py,sha256=rLCrf4heo25FJtBY-2Ap7ZuWW-5FS7sqTjsolIUuI5c,704
3
3
  sqlglider/cli.py,sha256=9sweHRVLk2iBSzCzT2Gcj8y1g1XKzq26iApQsMaFbx4,51786
4
4
  sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
5
5
  sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
@@ -17,7 +17,7 @@ sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,933
17
17
  sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
18
18
  sqlglider/graph/serialization.py,sha256=7JJo31rwSlxnDhdqdTJdK4Dr_ZcSYetXfx3_CmndSac,2662
19
19
  sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
20
- sqlglider/lineage/analyzer.py,sha256=HyyjGMP7VvEmvt-V-qT48C-41Usj2OmT5FPYYKdJsSs,48218
20
+ sqlglider/lineage/analyzer.py,sha256=kRhGcGaiixxtrf9vO8g09omayjB2G3LA9hLCOLaTyPg,56811
21
21
  sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
22
22
  sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
23
23
  sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
@@ -27,8 +27,8 @@ sqlglider/templating/variables.py,sha256=5593PtLBcOxsnMCSRm2pGAD5I0Y9f__VV3_J_Hf
27
27
  sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,276
28
28
  sqlglider/utils/config.py,sha256=iNJgSXFw3pmL2MCdvW3SJp4X2T3AQP2QyQuXIXT-6H0,4761
29
29
  sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
30
- sql_glider-0.1.3.dist-info/METADATA,sha256=D83HzMc1l3AHbnR3Y9aOzwTbp6yMKR9tQMXWTWkH1Sw,28445
31
- sql_glider-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
- sql_glider-0.1.3.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
33
- sql_glider-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- sql_glider-0.1.3.dist-info/RECORD,,
30
+ sql_glider-0.1.4.dist-info/METADATA,sha256=-gzDzEyZ116YpDBNbIwWMgMO184s-WkDKMxMH92lOqA,28445
31
+ sql_glider-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
+ sql_glider-0.1.4.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
33
+ sql_glider-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ sql_glider-0.1.4.dist-info/RECORD,,
sqlglider/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.3'
32
- __version_tuple__ = version_tuple = (0, 1, 3)
31
+ __version__ = version = '0.1.4'
32
+ __version_tuple__ = version_tuple = (0, 1, 4)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,7 +1,7 @@
1
1
  """Core lineage analysis using SQLGlot."""
2
2
 
3
3
  from enum import Enum
4
- from typing import Callable, Iterator, List, Optional, Set, Tuple, Union
4
+ from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
5
5
 
6
6
  from pydantic import BaseModel, Field
7
7
  from sqlglot import exp, parse
@@ -99,6 +99,9 @@ class LineageAnalyzer:
99
99
  self.sql = sql
100
100
  self.dialect = dialect
101
101
  self._skipped_queries: List[SkippedQuery] = []
102
+ # File-scoped schema context for cross-statement lineage
103
+ # Maps table/view names to their column definitions
104
+ self._file_schema: Dict[str, Dict[str, str]] = {}
102
105
 
103
106
  try:
104
107
  # Parse all statements in the SQL string
@@ -156,7 +159,24 @@ class LineageAnalyzer:
156
159
  # DML/DDL: Use target table for output column qualification
157
160
  # The columns are from the SELECT, but qualified with the target table
158
161
  projections = self._get_select_projections(select_node)
162
+ first_select = self._get_first_select(select_node)
163
+
159
164
  for projection in projections:
165
+ # Handle SELECT * by resolving from file schema
166
+ if isinstance(projection, exp.Star):
167
+ if first_select:
168
+ star_columns = self._resolve_star_columns(first_select)
169
+ for star_col in star_columns:
170
+ qualified_name = f"{target_table}.{star_col}"
171
+ columns.append(qualified_name)
172
+ self._column_mapping[qualified_name] = star_col
173
+ if not columns:
174
+ # Fallback: can't resolve *, use * as column name
175
+ qualified_name = f"{target_table}.*"
176
+ columns.append(qualified_name)
177
+ self._column_mapping[qualified_name] = "*"
178
+ continue
179
+
160
180
  # Get the underlying expression (unwrap alias if present)
161
181
  if isinstance(projection, exp.Alias):
162
182
  # For aliased columns, use the alias as the column name
@@ -324,6 +344,7 @@ class LineageAnalyzer:
324
344
  """
325
345
  results = []
326
346
  self._skipped_queries = [] # Reset skipped queries for this analysis
347
+ self._file_schema = {} # Reset file schema for this analysis run
327
348
 
328
349
  for query_index, expr, preview in self._iterate_queries(table_filter):
329
350
  # Temporarily swap self.expr to analyze this query
@@ -375,6 +396,9 @@ class LineageAnalyzer:
375
396
  )
376
397
  )
377
398
  finally:
399
+ # Extract schema from this statement AFTER analysis
400
+ # This builds up context for subsequent statements to use
401
+ self._extract_schema_from_statement(expr)
378
402
  # Restore original expression
379
403
  self.expr = original_expr
380
404
 
@@ -702,7 +726,13 @@ class LineageAnalyzer:
702
726
  lineage_col = self._column_mapping.get(col, col)
703
727
 
704
728
  # Get lineage tree for this column using current query SQL only
705
- node = lineage(lineage_col, current_query_sql, dialect=self.dialect)
729
+ # Pass file schema to enable SELECT * expansion for known tables/views
730
+ node = lineage(
731
+ lineage_col,
732
+ current_query_sql,
733
+ dialect=self.dialect,
734
+ schema=self._file_schema if self._file_schema else None,
735
+ )
706
736
 
707
737
  # Collect all source columns
708
738
  sources: Set[str] = set()
@@ -1235,3 +1265,187 @@ class LineageAnalyzer:
1235
1265
  preview = self._generate_query_preview(expr)
1236
1266
 
1237
1267
  yield idx, expr, preview
1268
+
1269
+ # -------------------------------------------------------------------------
1270
+ # File-scoped schema context methods
1271
+ # -------------------------------------------------------------------------
1272
+
1273
+ def _extract_schema_from_statement(self, expr: exp.Expression) -> None:
1274
+ """
1275
+ Extract column definitions from CREATE VIEW/TABLE AS SELECT statements.
1276
+
1277
+ This method builds up file-scoped schema context as statements are processed,
1278
+ enabling SQLGlot to correctly expand SELECT * and trace cross-statement references.
1279
+
1280
+ Args:
1281
+ expr: The SQL expression to extract schema from
1282
+ """
1283
+ # Only handle CREATE VIEW or CREATE TABLE (AS SELECT)
1284
+ if not isinstance(expr, exp.Create):
1285
+ return
1286
+ if expr.kind not in ("VIEW", "TABLE"):
1287
+ return
1288
+
1289
+ # Get target table/view name
1290
+ target = expr.this
1291
+ if isinstance(target, exp.Schema):
1292
+ target = target.this
1293
+ if not isinstance(target, exp.Table):
1294
+ return
1295
+
1296
+ target_name = self._get_qualified_table_name(target)
1297
+
1298
+ # Get the SELECT node from the CREATE statement
1299
+ select_node = expr.expression
1300
+ if select_node is None:
1301
+ return
1302
+
1303
+ # Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
1304
+ if isinstance(select_node, exp.Subquery):
1305
+ select_node = select_node.this
1306
+
1307
+ if not isinstance(
1308
+ select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
1309
+ ):
1310
+ return
1311
+
1312
+ # Extract column names from the SELECT
1313
+ columns = self._extract_columns_from_select(select_node)
1314
+
1315
+ if columns:
1316
+ # Store with UNKNOWN type - SQLGlot only needs column names for expansion
1317
+ self._file_schema[target_name] = {col: "UNKNOWN" for col in columns}
1318
+
1319
+ def _extract_columns_from_select(
1320
+ self, select_node: Union[exp.Select, exp.Union, exp.Intersect, exp.Except]
1321
+ ) -> List[str]:
1322
+ """
1323
+ Extract column names from a SELECT statement.
1324
+
1325
+ Handles aliases, direct column references, and SELECT * by resolving
1326
+ against the known file schema.
1327
+
1328
+ Args:
1329
+ select_node: The SELECT or set operation expression
1330
+
1331
+ Returns:
1332
+ List of column names
1333
+ """
1334
+ columns: List[str] = []
1335
+
1336
+ # Get projections (for UNION, use first branch)
1337
+ projections = self._get_select_projections(select_node)
1338
+ first_select = self._get_first_select(select_node)
1339
+
1340
+ for projection in projections:
1341
+ if isinstance(projection, exp.Alias):
1342
+ # Use the alias name as the column name
1343
+ columns.append(projection.alias)
1344
+ elif isinstance(projection, exp.Column):
1345
+ # Use the column name
1346
+ columns.append(projection.name)
1347
+ elif isinstance(projection, exp.Star):
1348
+ # Resolve SELECT * from known schema
1349
+ if first_select:
1350
+ star_columns = self._resolve_star_columns(first_select)
1351
+ columns.extend(star_columns)
1352
+ else:
1353
+ # For expressions without alias, use SQL representation
1354
+ col_sql = projection.sql(dialect=self.dialect)
1355
+ columns.append(col_sql)
1356
+
1357
+ return columns
1358
+
1359
+ def _resolve_star_columns(self, select_node: exp.Select) -> List[str]:
1360
+ """
1361
+ Resolve SELECT * to actual column names from known file schema or CTEs.
1362
+
1363
+ Args:
1364
+ select_node: The SELECT node containing the * reference
1365
+
1366
+ Returns:
1367
+ List of column names if source is known, empty list otherwise
1368
+ """
1369
+ columns: List[str] = []
1370
+
1371
+ # Get the source table(s) from FROM clause
1372
+ from_clause = select_node.args.get("from")
1373
+ if not from_clause or not isinstance(from_clause, exp.From):
1374
+ return columns
1375
+
1376
+ source = from_clause.this
1377
+
1378
+ # Handle table reference
1379
+ if isinstance(source, exp.Table):
1380
+ source_name = self._get_qualified_table_name(source)
1381
+
1382
+ # First check file schema (views/tables from previous statements)
1383
+ if source_name in self._file_schema:
1384
+ columns.extend(self._file_schema[source_name].keys())
1385
+ else:
1386
+ # Check if this is a CTE reference within the same statement
1387
+ cte_columns = self._resolve_cte_columns(source_name, select_node)
1388
+ columns.extend(cte_columns)
1389
+
1390
+ # Handle subquery - can't resolve without deeper analysis
1391
+ elif isinstance(source, exp.Subquery) and source.alias:
1392
+ # Check if this subquery alias is in file schema (unlikely)
1393
+ if source.alias in self._file_schema:
1394
+ columns.extend(self._file_schema[source.alias].keys())
1395
+
1396
+ return columns
1397
+
1398
+ def _resolve_cte_columns(self, cte_name: str, select_node: exp.Select) -> List[str]:
1399
+ """
1400
+ Resolve columns from a CTE definition within the same statement.
1401
+
1402
+ Args:
1403
+ cte_name: Name of the CTE to resolve
1404
+ select_node: The SELECT node that references the CTE
1405
+
1406
+ Returns:
1407
+ List of column names from the CTE, empty if CTE not found
1408
+ """
1409
+ # Walk up the tree to find the WITH clause containing this CTE
1410
+ parent = select_node
1411
+ while parent:
1412
+ if hasattr(parent, "args") and parent.args.get("with"):
1413
+ with_clause = parent.args["with"]
1414
+ for cte in with_clause.expressions:
1415
+ if isinstance(cte, exp.CTE) and cte.alias == cte_name:
1416
+ # Found the CTE - extract its columns
1417
+ cte_select = cte.this
1418
+ if isinstance(cte_select, exp.Select):
1419
+ return self._extract_cte_select_columns(cte_select)
1420
+ parent = parent.parent if hasattr(parent, "parent") else None
1421
+
1422
+ return []
1423
+
1424
+ def _extract_cte_select_columns(self, cte_select: exp.Select) -> List[str]:
1425
+ """
1426
+ Extract column names from a CTE's SELECT statement.
1427
+
1428
+ This handles SELECT * within the CTE by resolving against file schema.
1429
+
1430
+ Args:
1431
+ cte_select: The SELECT expression within the CTE
1432
+
1433
+ Returns:
1434
+ List of column names
1435
+ """
1436
+ columns: List[str] = []
1437
+
1438
+ for projection in cte_select.expressions:
1439
+ if isinstance(projection, exp.Alias):
1440
+ columns.append(projection.alias)
1441
+ elif isinstance(projection, exp.Column):
1442
+ columns.append(projection.name)
1443
+ elif isinstance(projection, exp.Star):
1444
+ # Resolve SELECT * in CTE from file schema
1445
+ star_columns = self._resolve_star_columns(cte_select)
1446
+ columns.extend(star_columns)
1447
+ else:
1448
+ col_sql = projection.sql(dialect=self.dialect)
1449
+ columns.append(col_sql)
1450
+
1451
+ return columns