sql-glider 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {sql_glider-0.1.3 → sql_glider-0.1.4}/PKG-INFO +1 -1
  2. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/_version.py +2 -2
  3. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/lineage/analyzer.py +216 -2
  4. sql_glider-0.1.4/tests/fixtures/original_queries/test_view_window_cte.sql +27 -0
  5. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/graph/test_builder.py +150 -0
  6. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/lineage/test_analyzer.py +287 -0
  7. {sql_glider-0.1.3 → sql_glider-0.1.4}/.github/workflows/ci.yml +0 -0
  8. {sql_glider-0.1.3 → sql_glider-0.1.4}/.github/workflows/publish.yml +0 -0
  9. {sql_glider-0.1.3 → sql_glider-0.1.4}/.gitignore +0 -0
  10. {sql_glider-0.1.3 → sql_glider-0.1.4}/.python-version +0 -0
  11. {sql_glider-0.1.3 → sql_glider-0.1.4}/ARCHITECTURE.md +0 -0
  12. {sql_glider-0.1.3 → sql_glider-0.1.4}/CLAUDE.md +0 -0
  13. {sql_glider-0.1.3 → sql_glider-0.1.4}/LICENSE +0 -0
  14. {sql_glider-0.1.3 → sql_glider-0.1.4}/README.md +0 -0
  15. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-05-column-level-lineage.md +0 -0
  16. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-05-reverse-lineage.md +0 -0
  17. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-06-config-file-support.md +0 -0
  18. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-06-graph-lineage.md +0 -0
  19. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  20. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-07-sample-data-model.md +0 -0
  21. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-07-sql-templating.md +0 -0
  22. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-08-tables-command.md +0 -0
  23. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-09-graph-query-paths.md +0 -0
  24. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-13-dissect-command.md +0 -0
  25. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2025-12-14-tables-pull-command.md +0 -0
  26. {sql_glider-0.1.3 → sql_glider-0.1.4}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  27. {sql_glider-0.1.3 → sql_glider-0.1.4}/pyproject.toml +0 -0
  28. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/README.md +0 -0
  29. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/business/expire_dim_customer.sql +0 -0
  30. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/business/load_fact_orders.sql +0 -0
  31. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/business/load_fact_payments.sql +0 -0
  32. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/business/merge_dim_customer.sql +0 -0
  33. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/business/merge_dim_product.sql +0 -0
  34. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  35. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/complex/conditional_merge.sql +0 -0
  36. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/complex/cte_insert.sql +0 -0
  37. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/complex/multi_table_transform.sql +0 -0
  38. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/dim_customer.sql +0 -0
  39. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/dim_product.sql +0 -0
  40. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/fact_orders.sql +0 -0
  41. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/fact_payments.sql +0 -0
  42. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/raw_addresses.sql +0 -0
  43. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/raw_customers.sql +0 -0
  44. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/raw_order_items.sql +0 -0
  45. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/raw_orders.sql +0 -0
  46. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/raw_payments.sql +0 -0
  47. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/raw_products.sql +0 -0
  48. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/stg_customers.sql +0 -0
  49. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/stg_orders.sql +0 -0
  50. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/stg_payments.sql +0 -0
  51. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/ddl/stg_products.sql +0 -0
  52. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  53. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  54. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  55. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  56. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/maintenance/update_product_status.sql +0 -0
  57. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  58. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  59. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  60. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  61. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_customers.sql +0 -0
  62. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_orders.sql +0 -0
  63. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_payments.sql +0 -0
  64. {sql_glider-0.1.3 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_products.sql +0 -0
  65. {sql_glider-0.1.3 → sql_glider-0.1.4}/sqlglider.toml.example +0 -0
  66. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/__init__.py +0 -0
  67. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/catalog/__init__.py +0 -0
  68. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/catalog/base.py +0 -0
  69. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/catalog/databricks.py +0 -0
  70. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/catalog/registry.py +0 -0
  71. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/cli.py +0 -0
  72. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/dissection/__init__.py +0 -0
  73. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/dissection/analyzer.py +0 -0
  74. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/dissection/formatters.py +0 -0
  75. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/dissection/models.py +0 -0
  76. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/global_models.py +0 -0
  77. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/graph/__init__.py +0 -0
  78. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/graph/builder.py +0 -0
  79. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/graph/merge.py +0 -0
  80. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/graph/models.py +0 -0
  81. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/graph/query.py +0 -0
  82. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/graph/serialization.py +0 -0
  83. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/lineage/__init__.py +0 -0
  84. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/lineage/formatters.py +0 -0
  85. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/templating/__init__.py +0 -0
  86. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/templating/base.py +0 -0
  87. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/templating/jinja.py +0 -0
  88. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/templating/registry.py +0 -0
  89. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/templating/variables.py +0 -0
  90. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/utils/__init__.py +0 -0
  91. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/utils/config.py +0 -0
  92. {sql_glider-0.1.3 → sql_glider-0.1.4}/src/sqlglider/utils/file_utils.py +0 -0
  93. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/__init__.py +0 -0
  94. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  95. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  96. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  97. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  98. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  99. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  100. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_cte.sql +0 -0
  101. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  102. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  103. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_multi.sql +0 -0
  104. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  105. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  106. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  107. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_tables.sql +0 -0
  108. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_view.sql +0 -0
  109. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/fixtures/sample_manifest.csv +0 -0
  110. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/__init__.py +0 -0
  111. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/catalog/__init__.py +0 -0
  112. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/catalog/test_base.py +0 -0
  113. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/catalog/test_databricks.py +0 -0
  114. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/catalog/test_registry.py +0 -0
  115. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/dissection/__init__.py +0 -0
  116. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  117. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/dissection/test_formatters.py +0 -0
  118. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/dissection/test_models.py +0 -0
  119. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/graph/__init__.py +0 -0
  120. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/graph/test_merge.py +0 -0
  121. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/graph/test_models.py +0 -0
  122. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/graph/test_query.py +0 -0
  123. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/graph/test_serialization.py +0 -0
  124. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/lineage/__init__.py +0 -0
  125. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/lineage/test_formatters.py +0 -0
  126. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/templating/__init__.py +0 -0
  127. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/templating/test_base.py +0 -0
  128. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/templating/test_jinja.py +0 -0
  129. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/templating/test_registry.py +0 -0
  130. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/templating/test_variables.py +0 -0
  131. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/test_cli.py +0 -0
  132. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/utils/__init__.py +0 -0
  133. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/utils/test_config.py +0 -0
  134. {sql_glider-0.1.3 → sql_glider-0.1.4}/tests/sqlglider/utils/test_file_utils.py +0 -0
  135. {sql_glider-0.1.3 → sql_glider-0.1.4}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.3'
32
- __version_tuple__ = version_tuple = (0, 1, 3)
31
+ __version__ = version = '0.1.4'
32
+ __version_tuple__ = version_tuple = (0, 1, 4)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,7 +1,7 @@
1
1
  """Core lineage analysis using SQLGlot."""
2
2
 
3
3
  from enum import Enum
4
- from typing import Callable, Iterator, List, Optional, Set, Tuple, Union
4
+ from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
5
5
 
6
6
  from pydantic import BaseModel, Field
7
7
  from sqlglot import exp, parse
@@ -99,6 +99,9 @@ class LineageAnalyzer:
99
99
  self.sql = sql
100
100
  self.dialect = dialect
101
101
  self._skipped_queries: List[SkippedQuery] = []
102
+ # File-scoped schema context for cross-statement lineage
103
+ # Maps table/view names to their column definitions
104
+ self._file_schema: Dict[str, Dict[str, str]] = {}
102
105
 
103
106
  try:
104
107
  # Parse all statements in the SQL string
@@ -156,7 +159,24 @@ class LineageAnalyzer:
156
159
  # DML/DDL: Use target table for output column qualification
157
160
  # The columns are from the SELECT, but qualified with the target table
158
161
  projections = self._get_select_projections(select_node)
162
+ first_select = self._get_first_select(select_node)
163
+
159
164
  for projection in projections:
165
+ # Handle SELECT * by resolving from file schema
166
+ if isinstance(projection, exp.Star):
167
+ if first_select:
168
+ star_columns = self._resolve_star_columns(first_select)
169
+ for star_col in star_columns:
170
+ qualified_name = f"{target_table}.{star_col}"
171
+ columns.append(qualified_name)
172
+ self._column_mapping[qualified_name] = star_col
173
+ if not columns:
174
+ # Fallback: can't resolve *, use * as column name
175
+ qualified_name = f"{target_table}.*"
176
+ columns.append(qualified_name)
177
+ self._column_mapping[qualified_name] = "*"
178
+ continue
179
+
160
180
  # Get the underlying expression (unwrap alias if present)
161
181
  if isinstance(projection, exp.Alias):
162
182
  # For aliased columns, use the alias as the column name
@@ -324,6 +344,7 @@ class LineageAnalyzer:
324
344
  """
325
345
  results = []
326
346
  self._skipped_queries = [] # Reset skipped queries for this analysis
347
+ self._file_schema = {} # Reset file schema for this analysis run
327
348
 
328
349
  for query_index, expr, preview in self._iterate_queries(table_filter):
329
350
  # Temporarily swap self.expr to analyze this query
@@ -375,6 +396,9 @@ class LineageAnalyzer:
375
396
  )
376
397
  )
377
398
  finally:
399
+ # Extract schema from this statement AFTER analysis
400
+ # This builds up context for subsequent statements to use
401
+ self._extract_schema_from_statement(expr)
378
402
  # Restore original expression
379
403
  self.expr = original_expr
380
404
 
@@ -702,7 +726,13 @@ class LineageAnalyzer:
702
726
  lineage_col = self._column_mapping.get(col, col)
703
727
 
704
728
  # Get lineage tree for this column using current query SQL only
705
- node = lineage(lineage_col, current_query_sql, dialect=self.dialect)
729
+ # Pass file schema to enable SELECT * expansion for known tables/views
730
+ node = lineage(
731
+ lineage_col,
732
+ current_query_sql,
733
+ dialect=self.dialect,
734
+ schema=self._file_schema if self._file_schema else None,
735
+ )
706
736
 
707
737
  # Collect all source columns
708
738
  sources: Set[str] = set()
@@ -1235,3 +1265,187 @@ class LineageAnalyzer:
1235
1265
  preview = self._generate_query_preview(expr)
1236
1266
 
1237
1267
  yield idx, expr, preview
1268
+
1269
+ # -------------------------------------------------------------------------
1270
+ # File-scoped schema context methods
1271
+ # -------------------------------------------------------------------------
1272
+
1273
+ def _extract_schema_from_statement(self, expr: exp.Expression) -> None:
1274
+ """
1275
+ Extract column definitions from CREATE VIEW/TABLE AS SELECT statements.
1276
+
1277
+ This method builds up file-scoped schema context as statements are processed,
1278
+ enabling SQLGlot to correctly expand SELECT * and trace cross-statement references.
1279
+
1280
+ Args:
1281
+ expr: The SQL expression to extract schema from
1282
+ """
1283
+ # Only handle CREATE VIEW or CREATE TABLE (AS SELECT)
1284
+ if not isinstance(expr, exp.Create):
1285
+ return
1286
+ if expr.kind not in ("VIEW", "TABLE"):
1287
+ return
1288
+
1289
+ # Get target table/view name
1290
+ target = expr.this
1291
+ if isinstance(target, exp.Schema):
1292
+ target = target.this
1293
+ if not isinstance(target, exp.Table):
1294
+ return
1295
+
1296
+ target_name = self._get_qualified_table_name(target)
1297
+
1298
+ # Get the SELECT node from the CREATE statement
1299
+ select_node = expr.expression
1300
+ if select_node is None:
1301
+ return
1302
+
1303
+ # Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
1304
+ if isinstance(select_node, exp.Subquery):
1305
+ select_node = select_node.this
1306
+
1307
+ if not isinstance(
1308
+ select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
1309
+ ):
1310
+ return
1311
+
1312
+ # Extract column names from the SELECT
1313
+ columns = self._extract_columns_from_select(select_node)
1314
+
1315
+ if columns:
1316
+ # Store with UNKNOWN type - SQLGlot only needs column names for expansion
1317
+ self._file_schema[target_name] = {col: "UNKNOWN" for col in columns}
1318
+
1319
+ def _extract_columns_from_select(
1320
+ self, select_node: Union[exp.Select, exp.Union, exp.Intersect, exp.Except]
1321
+ ) -> List[str]:
1322
+ """
1323
+ Extract column names from a SELECT statement.
1324
+
1325
+ Handles aliases, direct column references, and SELECT * by resolving
1326
+ against the known file schema.
1327
+
1328
+ Args:
1329
+ select_node: The SELECT or set operation expression
1330
+
1331
+ Returns:
1332
+ List of column names
1333
+ """
1334
+ columns: List[str] = []
1335
+
1336
+ # Get projections (for UNION, use first branch)
1337
+ projections = self._get_select_projections(select_node)
1338
+ first_select = self._get_first_select(select_node)
1339
+
1340
+ for projection in projections:
1341
+ if isinstance(projection, exp.Alias):
1342
+ # Use the alias name as the column name
1343
+ columns.append(projection.alias)
1344
+ elif isinstance(projection, exp.Column):
1345
+ # Use the column name
1346
+ columns.append(projection.name)
1347
+ elif isinstance(projection, exp.Star):
1348
+ # Resolve SELECT * from known schema
1349
+ if first_select:
1350
+ star_columns = self._resolve_star_columns(first_select)
1351
+ columns.extend(star_columns)
1352
+ else:
1353
+ # For expressions without alias, use SQL representation
1354
+ col_sql = projection.sql(dialect=self.dialect)
1355
+ columns.append(col_sql)
1356
+
1357
+ return columns
1358
+
1359
+ def _resolve_star_columns(self, select_node: exp.Select) -> List[str]:
1360
+ """
1361
+ Resolve SELECT * to actual column names from known file schema or CTEs.
1362
+
1363
+ Args:
1364
+ select_node: The SELECT node containing the * reference
1365
+
1366
+ Returns:
1367
+ List of column names if source is known, empty list otherwise
1368
+ """
1369
+ columns: List[str] = []
1370
+
1371
+ # Get the source table(s) from FROM clause
1372
+ from_clause = select_node.args.get("from")
1373
+ if not from_clause or not isinstance(from_clause, exp.From):
1374
+ return columns
1375
+
1376
+ source = from_clause.this
1377
+
1378
+ # Handle table reference
1379
+ if isinstance(source, exp.Table):
1380
+ source_name = self._get_qualified_table_name(source)
1381
+
1382
+ # First check file schema (views/tables from previous statements)
1383
+ if source_name in self._file_schema:
1384
+ columns.extend(self._file_schema[source_name].keys())
1385
+ else:
1386
+ # Check if this is a CTE reference within the same statement
1387
+ cte_columns = self._resolve_cte_columns(source_name, select_node)
1388
+ columns.extend(cte_columns)
1389
+
1390
+ # Handle subquery - can't resolve without deeper analysis
1391
+ elif isinstance(source, exp.Subquery) and source.alias:
1392
+ # Check if this subquery alias is in file schema (unlikely)
1393
+ if source.alias in self._file_schema:
1394
+ columns.extend(self._file_schema[source.alias].keys())
1395
+
1396
+ return columns
1397
+
1398
+ def _resolve_cte_columns(self, cte_name: str, select_node: exp.Select) -> List[str]:
1399
+ """
1400
+ Resolve columns from a CTE definition within the same statement.
1401
+
1402
+ Args:
1403
+ cte_name: Name of the CTE to resolve
1404
+ select_node: The SELECT node that references the CTE
1405
+
1406
+ Returns:
1407
+ List of column names from the CTE, empty if CTE not found
1408
+ """
1409
+ # Walk up the tree to find the WITH clause containing this CTE
1410
+ parent = select_node
1411
+ while parent:
1412
+ if hasattr(parent, "args") and parent.args.get("with"):
1413
+ with_clause = parent.args["with"]
1414
+ for cte in with_clause.expressions:
1415
+ if isinstance(cte, exp.CTE) and cte.alias == cte_name:
1416
+ # Found the CTE - extract its columns
1417
+ cte_select = cte.this
1418
+ if isinstance(cte_select, exp.Select):
1419
+ return self._extract_cte_select_columns(cte_select)
1420
+ parent = parent.parent if hasattr(parent, "parent") else None
1421
+
1422
+ return []
1423
+
1424
+ def _extract_cte_select_columns(self, cte_select: exp.Select) -> List[str]:
1425
+ """
1426
+ Extract column names from a CTE's SELECT statement.
1427
+
1428
+ This handles SELECT * within the CTE by resolving against file schema.
1429
+
1430
+ Args:
1431
+ cte_select: The SELECT expression within the CTE
1432
+
1433
+ Returns:
1434
+ List of column names
1435
+ """
1436
+ columns: List[str] = []
1437
+
1438
+ for projection in cte_select.expressions:
1439
+ if isinstance(projection, exp.Alias):
1440
+ columns.append(projection.alias)
1441
+ elif isinstance(projection, exp.Column):
1442
+ columns.append(projection.name)
1443
+ elif isinstance(projection, exp.Star):
1444
+ # Resolve SELECT * in CTE from file schema
1445
+ star_columns = self._resolve_star_columns(cte_select)
1446
+ columns.extend(star_columns)
1447
+ else:
1448
+ col_sql = projection.sql(dialect=self.dialect)
1449
+ columns.append(col_sql)
1450
+
1451
+ return columns
@@ -0,0 +1,27 @@
1
+ CREATE TEMPORARY VIEW first_view AS (
2
+ SELECT
3
+ a,
4
+ b,
5
+ c
6
+ FROM source_table
7
+ );
8
+
9
+ CREATE TEMPORARY VIEW second_view AS
10
+ WITH first_view_cte AS (
11
+ SELECT
12
+ *,
13
+ row_number() OVER (
14
+ PARTITION BY a ORDER BY b DESC
15
+ ) AS row_num
16
+ FROM first_view
17
+ )
18
+ SELECT * FROM first_view_cte
19
+ WHERE c = 1;
20
+
21
+ INSERT OVERWRITE output_table
22
+ SELECT
23
+ a,
24
+ b,
25
+ c,
26
+ row_num
27
+ FROM second_view;
@@ -428,3 +428,153 @@ class TestGraphBuilderInsertWithUnion:
428
428
  assert "db.source_a.last" in upstream_ids
429
429
  assert "db.source_b.first" in upstream_ids
430
430
  assert "db.source_b.last" in upstream_ids
431
+
432
+
433
+ class TestGraphBuilderCreateViewWithCTEAndWindowFunction:
434
+ """Tests for CREATE VIEW statements with CTEs and window functions."""
435
+
436
+ def test_create_view_with_cte_and_row_number(self, tmp_path):
437
+ """CREATE VIEW with CTE and ROW_NUMBER() OVER (PARTITION BY ...) should work."""
438
+ sql_file = tmp_path / "query.sql"
439
+ sql_file.write_text("""
440
+ CREATE VIEW my_view AS
441
+ WITH ranked_orders AS (
442
+ SELECT
443
+ customer_id,
444
+ order_date,
445
+ amount,
446
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) as rn
447
+ FROM orders
448
+ )
449
+ SELECT customer_id, order_date, amount
450
+ FROM ranked_orders
451
+ WHERE rn = 1
452
+ """)
453
+
454
+ builder = GraphBuilder(dialect="spark")
455
+ builder.add_file(sql_file)
456
+ graph = builder.build()
457
+
458
+ # Should have nodes created successfully
459
+ assert graph.metadata.total_nodes > 0
460
+ assert graph.metadata.total_edges > 0
461
+
462
+ # Check that output columns are qualified with the view name
463
+ node_ids = {node.identifier for node in graph.nodes}
464
+ assert "my_view.customer_id" in node_ids
465
+ assert "my_view.order_date" in node_ids
466
+ assert "my_view.amount" in node_ids
467
+
468
+ # Source columns from orders table should exist
469
+ assert "orders.customer_id" in node_ids
470
+ assert "orders.order_date" in node_ids
471
+ assert "orders.amount" in node_ids
472
+
473
+ def test_create_view_with_cte_row_number_lineage_tracing(self, tmp_path):
474
+ """Test that lineage correctly traces through CTE with window function."""
475
+ from sqlglider.graph.query import GraphQuerier
476
+
477
+ sql_file = tmp_path / "query.sql"
478
+ sql_file.write_text("""
479
+ CREATE VIEW latest_orders AS
480
+ WITH ranked AS (
481
+ SELECT
482
+ o.customer_id,
483
+ o.order_date,
484
+ o.total_amount,
485
+ ROW_NUMBER() OVER (PARTITION BY o.customer_id ORDER BY o.order_date DESC) as rn
486
+ FROM sales.orders o
487
+ )
488
+ SELECT customer_id, order_date, total_amount
489
+ FROM ranked
490
+ WHERE rn = 1
491
+ """)
492
+
493
+ builder = GraphBuilder(dialect="spark")
494
+ builder.add_file(sql_file)
495
+ graph = builder.build()
496
+
497
+ # Query upstream from output columns
498
+ querier = GraphQuerier(graph)
499
+
500
+ # customer_id should trace back to sales.orders.customer_id
501
+ upstream_customer = querier.find_upstream("latest_orders.customer_id")
502
+ upstream_ids = {n.identifier for n in upstream_customer.related_columns}
503
+ assert "sales.orders.customer_id" in upstream_ids
504
+
505
+ # total_amount should trace back to sales.orders.total_amount
506
+ upstream_amount = querier.find_upstream("latest_orders.total_amount")
507
+ upstream_ids = {n.identifier for n in upstream_amount.related_columns}
508
+ assert "sales.orders.total_amount" in upstream_ids
509
+
510
+ def test_create_view_multiple_window_functions(self, tmp_path):
511
+ """Test CREATE VIEW with multiple window functions."""
512
+ sql_file = tmp_path / "query.sql"
513
+ sql_file.write_text("""
514
+ CREATE VIEW customer_rankings AS
515
+ WITH metrics AS (
516
+ SELECT
517
+ customer_id,
518
+ total_spend,
519
+ ROW_NUMBER() OVER (ORDER BY total_spend DESC) as spend_rank,
520
+ RANK() OVER (PARTITION BY region ORDER BY total_spend DESC) as region_rank,
521
+ LAG(total_spend) OVER (PARTITION BY customer_id ORDER BY order_date) as prev_spend
522
+ FROM customer_orders
523
+ )
524
+ SELECT customer_id, total_spend, spend_rank, region_rank
525
+ FROM metrics
526
+ """)
527
+
528
+ builder = GraphBuilder(dialect="spark")
529
+ builder.add_file(sql_file)
530
+ graph = builder.build()
531
+
532
+ # Should process successfully with multiple window functions
533
+ assert graph.metadata.total_nodes > 0
534
+
535
+ node_ids = {node.identifier for node in graph.nodes}
536
+ assert "customer_rankings.customer_id" in node_ids
537
+ assert "customer_rankings.total_spend" in node_ids
538
+ assert "customer_rankings.spend_rank" in node_ids
539
+ assert "customer_rankings.region_rank" in node_ids
540
+
541
+ def test_create_view_nested_ctes_with_window(self, tmp_path):
542
+ """Test CREATE VIEW with nested CTEs and window functions."""
543
+ sql_file = tmp_path / "query.sql"
544
+ sql_file.write_text("""
545
+ CREATE VIEW final_report AS
546
+ WITH base_data AS (
547
+ SELECT customer_id, product_id, quantity, sale_date
548
+ FROM raw_sales
549
+ ),
550
+ ranked_sales AS (
551
+ SELECT
552
+ customer_id,
553
+ product_id,
554
+ quantity,
555
+ ROW_NUMBER() OVER (
556
+ PARTITION BY customer_id, product_id
557
+ ORDER BY sale_date DESC
558
+ ) as sale_rank
559
+ FROM base_data
560
+ )
561
+ SELECT customer_id, product_id, quantity
562
+ FROM ranked_sales
563
+ WHERE sale_rank = 1
564
+ """)
565
+
566
+ builder = GraphBuilder(dialect="spark")
567
+ builder.add_file(sql_file)
568
+ graph = builder.build()
569
+
570
+ assert graph.metadata.total_nodes > 0
571
+
572
+ node_ids = {node.identifier for node in graph.nodes}
573
+ assert "final_report.customer_id" in node_ids
574
+ assert "final_report.product_id" in node_ids
575
+ assert "final_report.quantity" in node_ids
576
+
577
+ # Source should trace to raw_sales
578
+ assert "raw_sales.customer_id" in node_ids
579
+ assert "raw_sales.product_id" in node_ids
580
+ assert "raw_sales.quantity" in node_ids