sql-glider 0.1.18__tar.gz → 0.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sql_glider-0.1.18 → sql_glider-0.1.20}/PKG-INFO +1 -1
  2. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/_version.py +2 -2
  3. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/lineage/analyzer.py +43 -20
  4. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/lineage/test_analyzer.py +155 -0
  5. {sql_glider-0.1.18 → sql_glider-0.1.20}/.github/workflows/ci.yml +0 -0
  6. {sql_glider-0.1.18 → sql_glider-0.1.20}/.github/workflows/publish.yml +0 -0
  7. {sql_glider-0.1.18 → sql_glider-0.1.20}/.gitignore +0 -0
  8. {sql_glider-0.1.18 → sql_glider-0.1.20}/.python-version +0 -0
  9. {sql_glider-0.1.18 → sql_glider-0.1.20}/ARCHITECTURE.md +0 -0
  10. {sql_glider-0.1.18 → sql_glider-0.1.20}/CLAUDE.md +0 -0
  11. {sql_glider-0.1.18 → sql_glider-0.1.20}/LICENSE +0 -0
  12. {sql_glider-0.1.18 → sql_glider-0.1.20}/README.md +0 -0
  13. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-05-column-level-lineage.md +0 -0
  14. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-05-reverse-lineage.md +0 -0
  15. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-06-config-file-support.md +0 -0
  16. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-06-graph-lineage.md +0 -0
  17. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  18. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-07-sample-data-model.md +0 -0
  19. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-07-sql-templating.md +0 -0
  20. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-08-tables-command.md +0 -0
  21. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-09-graph-query-paths.md +0 -0
  22. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-13-dissect-command.md +0 -0
  23. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2025-12-14-tables-pull-command.md +0 -0
  24. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  25. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
  26. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
  27. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-29-no-star-flag.md +0 -0
  28. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-29-resolve-schema.md +0 -0
  29. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
  30. {sql_glider-0.1.18 → sql_glider-0.1.20}/plans/2026-01-29-tables-scrape-command.md +0 -0
  31. {sql_glider-0.1.18 → sql_glider-0.1.20}/pyproject.toml +0 -0
  32. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/README.md +0 -0
  33. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/business/expire_dim_customer.sql +0 -0
  34. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/business/load_fact_orders.sql +0 -0
  35. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/business/load_fact_payments.sql +0 -0
  36. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/business/merge_dim_customer.sql +0 -0
  37. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/business/merge_dim_product.sql +0 -0
  38. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  39. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/complex/conditional_merge.sql +0 -0
  40. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/complex/cte_insert.sql +0 -0
  41. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/complex/multi_table_transform.sql +0 -0
  42. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/dim_customer.sql +0 -0
  43. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/dim_product.sql +0 -0
  44. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/fact_orders.sql +0 -0
  45. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/fact_payments.sql +0 -0
  46. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/raw_addresses.sql +0 -0
  47. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/raw_customers.sql +0 -0
  48. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/raw_order_items.sql +0 -0
  49. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/raw_orders.sql +0 -0
  50. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/raw_payments.sql +0 -0
  51. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/raw_products.sql +0 -0
  52. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/stg_customers.sql +0 -0
  53. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/stg_orders.sql +0 -0
  54. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/stg_payments.sql +0 -0
  55. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/ddl/stg_products.sql +0 -0
  56. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  57. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  58. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  59. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  60. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/maintenance/update_product_status.sql +0 -0
  61. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  62. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  63. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  64. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  65. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/staging/load_stg_customers.sql +0 -0
  66. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/staging/load_stg_orders.sql +0 -0
  67. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/staging/load_stg_payments.sql +0 -0
  68. {sql_glider-0.1.18 → sql_glider-0.1.20}/sample_data_model/staging/load_stg_products.sql +0 -0
  69. {sql_glider-0.1.18 → sql_glider-0.1.20}/sqlglider.toml.example +0 -0
  70. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/__init__.py +0 -0
  71. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/catalog/__init__.py +0 -0
  72. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/catalog/base.py +0 -0
  73. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/catalog/databricks.py +0 -0
  74. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/catalog/registry.py +0 -0
  75. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/cli.py +0 -0
  76. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/dissection/__init__.py +0 -0
  77. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/dissection/analyzer.py +0 -0
  78. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/dissection/formatters.py +0 -0
  79. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/dissection/models.py +0 -0
  80. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/global_models.py +0 -0
  81. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/__init__.py +0 -0
  82. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/builder.py +0 -0
  83. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/formatters.py +0 -0
  84. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/merge.py +0 -0
  85. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/models.py +0 -0
  86. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/query.py +0 -0
  87. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/graph/serialization.py +0 -0
  88. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/lineage/__init__.py +0 -0
  89. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/lineage/formatters.py +0 -0
  90. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/schema/__init__.py +0 -0
  91. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/schema/extractor.py +0 -0
  92. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/templating/__init__.py +0 -0
  93. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/templating/base.py +0 -0
  94. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/templating/jinja.py +0 -0
  95. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/templating/registry.py +0 -0
  96. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/templating/variables.py +0 -0
  97. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/utils/__init__.py +0 -0
  98. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/utils/config.py +0 -0
  99. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/utils/file_utils.py +0 -0
  100. {sql_glider-0.1.18 → sql_glider-0.1.20}/src/sqlglider/utils/schema.py +0 -0
  101. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/__init__.py +0 -0
  102. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  103. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  104. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  105. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  106. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  107. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  108. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_cte.sql +0 -0
  109. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  110. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
  111. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  112. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_multi.sql +0 -0
  113. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  114. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  115. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  116. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_tables.sql +0 -0
  117. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_view.sql +0 -0
  118. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  119. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/fixtures/sample_manifest.csv +0 -0
  120. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/__init__.py +0 -0
  121. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/catalog/__init__.py +0 -0
  122. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/catalog/test_base.py +0 -0
  123. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/catalog/test_databricks.py +0 -0
  124. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/catalog/test_registry.py +0 -0
  125. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/dissection/__init__.py +0 -0
  126. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  127. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/dissection/test_formatters.py +0 -0
  128. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/dissection/test_models.py +0 -0
  129. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/__init__.py +0 -0
  130. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/test_builder.py +0 -0
  131. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/test_formatters.py +0 -0
  132. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/test_merge.py +0 -0
  133. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/test_models.py +0 -0
  134. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/test_query.py +0 -0
  135. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/graph/test_serialization.py +0 -0
  136. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/lineage/__init__.py +0 -0
  137. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/lineage/test_formatters.py +0 -0
  138. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/schema/__init__.py +0 -0
  139. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/schema/test_extractor.py +0 -0
  140. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/templating/__init__.py +0 -0
  141. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/templating/test_base.py +0 -0
  142. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/templating/test_jinja.py +0 -0
  143. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/templating/test_registry.py +0 -0
  144. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/templating/test_variables.py +0 -0
  145. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/test_cli.py +0 -0
  146. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/utils/__init__.py +0 -0
  147. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/utils/test_config.py +0 -0
  148. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/utils/test_file_utils.py +0 -0
  149. {sql_glider-0.1.18 → sql_glider-0.1.20}/tests/sqlglider/utils/test_schema.py +0 -0
  150. {sql_glider-0.1.18 → sql_glider-0.1.20}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.18
3
+ Version: 0.1.20
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.18'
32
- __version_tuple__ = version_tuple = (0, 1, 18)
31
+ __version__ = version = '0.1.20'
32
+ __version_tuple__ = version_tuple = (0, 1, 20)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1480,7 +1480,7 @@ class LineageAnalyzer:
1480
1480
 
1481
1481
  def _extract_schema_from_statement(self, expr: exp.Expression) -> None:
1482
1482
  """
1483
- Extract column definitions from CREATE VIEW/TABLE AS SELECT statements.
1483
+ Extract column definitions from CREATE VIEW/TABLE AS SELECT and CACHE TABLE statements.
1484
1484
 
1485
1485
  This method builds up file-scoped schema context as statements are processed,
1486
1486
  enabling SQLGlot to correctly expand SELECT * and trace cross-statement references.
@@ -1488,29 +1488,39 @@ class LineageAnalyzer:
1488
1488
  Args:
1489
1489
  expr: The SQL expression to extract schema from
1490
1490
  """
1491
- # Only handle CREATE VIEW or CREATE TABLE (AS SELECT)
1492
- if not isinstance(expr, exp.Create):
1493
- return
1494
- if expr.kind not in ("VIEW", "TABLE"):
1495
- return
1491
+ target_name: str | None = None
1492
+ select_node: exp.Expression | None = None
1496
1493
 
1497
- # Get target table/view name
1498
- target = expr.this
1499
- if isinstance(target, exp.Schema):
1500
- target = target.this
1501
- if not isinstance(target, exp.Table):
1502
- return
1494
+ if isinstance(expr, exp.Create):
1495
+ if expr.kind not in ("VIEW", "TABLE"):
1496
+ return
1497
+
1498
+ target = expr.this
1499
+ if isinstance(target, exp.Schema):
1500
+ target = target.this
1501
+ if not isinstance(target, exp.Table):
1502
+ return
1503
+
1504
+ target_name = self._get_qualified_table_name(target)
1505
+ select_node = expr.expression
1506
+
1507
+ # Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
1508
+ if isinstance(select_node, exp.Subquery):
1509
+ select_node = select_node.this
1510
+
1511
+ elif isinstance(expr, exp.Cache):
1512
+ target = expr.this
1513
+ if not isinstance(target, exp.Table):
1514
+ return
1503
1515
 
1504
- target_name = self._get_qualified_table_name(target)
1516
+ target_name = self._get_qualified_table_name(target)
1517
+ select_node = expr.expression
1505
1518
 
1506
- # Get the SELECT node from the CREATE statement
1507
- select_node = expr.expression
1508
- if select_node is None:
1519
+ else:
1509
1520
  return
1510
1521
 
1511
- # Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
1512
- if isinstance(select_node, exp.Subquery):
1513
- select_node = select_node.this
1522
+ if target_name is None or select_node is None:
1523
+ return
1514
1524
 
1515
1525
  if not isinstance(
1516
1526
  select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
@@ -1822,7 +1832,13 @@ class LineageAnalyzer:
1822
1832
  if actual_name in self._file_schema:
1823
1833
  return list(self._file_schema[actual_name].keys())
1824
1834
 
1825
- # Check JOIN clauses for aliased tables
1835
+ # Check if it's an aliased subquery (e.g., FROM (SELECT ...) sub)
1836
+ if isinstance(source, exp.Subquery) and source.alias == table_name:
1837
+ inner_select = source.this
1838
+ if isinstance(inner_select, exp.Select):
1839
+ return self._extract_subquery_columns(inner_select)
1840
+
1841
+ # Check JOIN clauses for aliased tables and subqueries
1826
1842
  joins = select_node.args.get("joins")
1827
1843
  if joins:
1828
1844
  for join in joins:
@@ -1835,6 +1851,13 @@ class LineageAnalyzer:
1835
1851
  actual_name = self._get_qualified_table_name(join_source)
1836
1852
  if actual_name in self._file_schema:
1837
1853
  return list(self._file_schema[actual_name].keys())
1854
+ if (
1855
+ isinstance(join_source, exp.Subquery)
1856
+ and join_source.alias == table_name
1857
+ ):
1858
+ inner_select = join_source.this
1859
+ if isinstance(inner_select, exp.Select):
1860
+ return self._extract_subquery_columns(inner_select)
1838
1861
 
1839
1862
  return []
1840
1863
 
@@ -2949,6 +2949,161 @@ class TestCacheTableStatements:
2949
2949
  assert len(skipped) == 1
2950
2950
  assert "DELETE" in skipped[0].statement_type
2951
2951
 
2952
+ def test_cache_table_star_resolution_in_subsequent_query(self):
2953
+ """SELECT * FROM a cached table should resolve columns from the CACHE statement."""
2954
+ sql = """
2955
+ CACHE TABLE cached_orders AS SELECT customer_id, order_total FROM orders;
2956
+ SELECT * FROM cached_orders;
2957
+ """
2958
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2959
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2960
+
2961
+ assert len(results) == 2
2962
+ # Second query should have resolved the star
2963
+ star_result = results[1]
2964
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2965
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2966
+ assert output_names == ["customer_id", "order_total"]
2967
+
2968
+ def test_cache_table_qualified_star_resolution(self):
2969
+ """table.* on a cached table should resolve columns."""
2970
+ sql = """
2971
+ CACHE TABLE cached_orders AS SELECT customer_id, order_total FROM orders;
2972
+ SELECT cached_orders.* FROM cached_orders;
2973
+ """
2974
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2975
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2976
+
2977
+ assert len(results) == 2
2978
+ star_result = results[1]
2979
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2980
+ assert output_names == ["customer_id", "order_total"]
2981
+
2982
+ def test_cache_lazy_table_star_resolution(self):
2983
+ """CACHE LAZY TABLE should also register schema for star resolution."""
2984
+ sql = """
2985
+ CACHE LAZY TABLE cached_users AS SELECT id, name, email FROM users;
2986
+ SELECT * FROM cached_users;
2987
+ """
2988
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2989
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2990
+
2991
+ assert len(results) == 2
2992
+ star_result = results[1]
2993
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2994
+ assert output_names == ["email", "id", "name"]
2995
+
2996
+ def test_cache_table_schema_registered(self):
2997
+ """Cached table schema should be registered for downstream star resolution."""
2998
+ sql = """
2999
+ CACHE TABLE cached_orders AS SELECT customer_id, order_total FROM orders;
3000
+ SELECT * FROM cached_orders;
3001
+ """
3002
+ analyzer = LineageAnalyzer(sql, dialect="spark")
3003
+ schema = analyzer.extract_schema_only()
3004
+ assert "cached_orders" in schema
3005
+ assert "customer_id" in schema["cached_orders"]
3006
+ assert "order_total" in schema["cached_orders"]
3007
+
3008
+
3009
+ class TestSubqueryAliasStarResolution:
3010
+ """Tests for resolving qualified star (sub.*) on aliased subqueries."""
3011
+
3012
+ def test_subquery_alias_star_in_from(self):
3013
+ """sub.* on an aliased subquery in FROM should resolve columns."""
3014
+ sql = """
3015
+ SELECT sub.*
3016
+ FROM (SELECT id, SUM(amount) AS total FROM orders GROUP BY id) sub
3017
+ """
3018
+ analyzer = LineageAnalyzer(sql, dialect="spark")
3019
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3020
+
3021
+ assert len(results) == 1
3022
+ output_names = sorted(item.output_name for item in results[0].lineage_items)
3023
+ assert output_names == ["id", "total"]
3024
+
3025
+ def test_subquery_alias_star_in_join(self):
3026
+ """sub.* on an aliased subquery in JOIN should resolve columns."""
3027
+ sql = """
3028
+ SELECT t.id, sub.*
3029
+ FROM db.schema.users t
3030
+ JOIN (SELECT user_id, SUM(amount) AS total FROM db.schema.orders GROUP BY user_id) sub
3031
+ ON t.id = sub.user_id
3032
+ """
3033
+ analyzer = LineageAnalyzer(
3034
+ sql,
3035
+ dialect="spark",
3036
+ schema={"db.schema.users": {"id": "INT"}},
3037
+ )
3038
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3039
+
3040
+ assert len(results) == 1
3041
+ output_names = sorted(item.output_name for item in results[0].lineage_items)
3042
+ assert output_names == ["db.schema.users.id", "total", "user_id"]
3043
+
3044
+ def test_subquery_alias_star_with_generated_columns(self):
3045
+ """sub.* plus generated columns should all resolve."""
3046
+ sql = """
3047
+ SELECT
3048
+ sub.*,
3049
+ sub.total * 0.1 AS commission
3050
+ FROM (SELECT id, SUM(amount) AS total FROM orders GROUP BY id) sub
3051
+ """
3052
+ analyzer = LineageAnalyzer(sql, dialect="spark")
3053
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3054
+
3055
+ assert len(results) == 1
3056
+ output_names = sorted(item.output_name for item in results[0].lineage_items)
3057
+ assert output_names == ["commission", "id", "total"]
3058
+
3059
+ def test_cache_table_with_subquery_alias_star(self):
3060
+ """CACHE TABLE using sub.* should register all columns for downstream use."""
3061
+ sql = """
3062
+ CACHE TABLE cached_result AS
3063
+ SELECT
3064
+ sub.*,
3065
+ d.dept_name
3066
+ FROM (
3067
+ SELECT id, SUM(amount) AS total
3068
+ FROM db.schema.orders
3069
+ GROUP BY id
3070
+ ) sub
3071
+ JOIN db.schema.departments d ON sub.id = d.id;
3072
+
3073
+ SELECT * FROM cached_result;
3074
+ """
3075
+ analyzer = LineageAnalyzer(sql, dialect="spark")
3076
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3077
+
3078
+ assert len(results) == 2
3079
+ # Second query should resolve all columns from the cached table
3080
+ star_result = results[1]
3081
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
3082
+ assert output_names == ["dept_name", "id", "total"]
3083
+
3084
+ def test_temp_view_with_subquery_alias_star(self):
3085
+ """CREATE TEMP VIEW using sub.* should register all columns for downstream use."""
3086
+ sql = """
3087
+ CREATE TEMPORARY VIEW enriched AS
3088
+ SELECT
3089
+ sub.*,
3090
+ sub.total * 0.1 AS commission
3091
+ FROM (
3092
+ SELECT id, SUM(amount) AS total
3093
+ FROM db.schema.orders
3094
+ GROUP BY id
3095
+ ) sub;
3096
+
3097
+ SELECT * FROM enriched;
3098
+ """
3099
+ analyzer = LineageAnalyzer(sql, dialect="spark")
3100
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3101
+
3102
+ assert len(results) == 2
3103
+ star_result = results[1]
3104
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
3105
+ assert output_names == ["commission", "id", "total"]
3106
+
2952
3107
 
2953
3108
  class TestNoStar:
2954
3109
  """Tests for the --no-star flag that fails on unresolvable SELECT *."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes