sql-glider 0.1.10__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. {sql_glider-0.1.10 → sql_glider-0.1.11}/PKG-INFO +1 -1
  2. sql_glider-0.1.11/plans/2026-01-28-sparksql-table-extraction.md +58 -0
  3. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/_version.py +2 -2
  4. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/cli.py +14 -1
  5. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/builder.py +6 -1
  6. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/lineage/analyzer.py +62 -1
  7. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/utils/config.py +1 -0
  8. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/lineage/test_analyzer.py +77 -1
  9. {sql_glider-0.1.10 → sql_glider-0.1.11}/.github/workflows/ci.yml +0 -0
  10. {sql_glider-0.1.10 → sql_glider-0.1.11}/.github/workflows/publish.yml +0 -0
  11. {sql_glider-0.1.10 → sql_glider-0.1.11}/.gitignore +0 -0
  12. {sql_glider-0.1.10 → sql_glider-0.1.11}/.python-version +0 -0
  13. {sql_glider-0.1.10 → sql_glider-0.1.11}/ARCHITECTURE.md +0 -0
  14. {sql_glider-0.1.10 → sql_glider-0.1.11}/CLAUDE.md +0 -0
  15. {sql_glider-0.1.10 → sql_glider-0.1.11}/LICENSE +0 -0
  16. {sql_glider-0.1.10 → sql_glider-0.1.11}/README.md +0 -0
  17. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-05-column-level-lineage.md +0 -0
  18. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-05-reverse-lineage.md +0 -0
  19. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-06-config-file-support.md +0 -0
  20. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-06-graph-lineage.md +0 -0
  21. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  22. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-07-sample-data-model.md +0 -0
  23. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-07-sql-templating.md +0 -0
  24. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-08-tables-command.md +0 -0
  25. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-09-graph-query-paths.md +0 -0
  26. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-13-dissect-command.md +0 -0
  27. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-14-tables-pull-command.md +0 -0
  28. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  29. {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
  30. {sql_glider-0.1.10 → sql_glider-0.1.11}/pyproject.toml +0 -0
  31. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/README.md +0 -0
  32. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/expire_dim_customer.sql +0 -0
  33. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/load_fact_orders.sql +0 -0
  34. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/load_fact_payments.sql +0 -0
  35. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/merge_dim_customer.sql +0 -0
  36. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/merge_dim_product.sql +0 -0
  37. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  38. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/complex/conditional_merge.sql +0 -0
  39. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/complex/cte_insert.sql +0 -0
  40. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/complex/multi_table_transform.sql +0 -0
  41. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/dim_customer.sql +0 -0
  42. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/dim_product.sql +0 -0
  43. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/fact_orders.sql +0 -0
  44. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/fact_payments.sql +0 -0
  45. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_addresses.sql +0 -0
  46. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_customers.sql +0 -0
  47. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_order_items.sql +0 -0
  48. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_orders.sql +0 -0
  49. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_payments.sql +0 -0
  50. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_products.sql +0 -0
  51. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_customers.sql +0 -0
  52. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_orders.sql +0 -0
  53. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_payments.sql +0 -0
  54. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_products.sql +0 -0
  55. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  56. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  57. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  58. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  59. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/maintenance/update_product_status.sql +0 -0
  60. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  61. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  62. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  63. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  64. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_customers.sql +0 -0
  65. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_orders.sql +0 -0
  66. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_payments.sql +0 -0
  67. {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_products.sql +0 -0
  68. {sql_glider-0.1.10 → sql_glider-0.1.11}/sqlglider.toml.example +0 -0
  69. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/__init__.py +0 -0
  70. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/__init__.py +0 -0
  71. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/base.py +0 -0
  72. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/databricks.py +0 -0
  73. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/registry.py +0 -0
  74. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/__init__.py +0 -0
  75. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/analyzer.py +0 -0
  76. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/formatters.py +0 -0
  77. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/models.py +0 -0
  78. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/global_models.py +0 -0
  79. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/__init__.py +0 -0
  80. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/merge.py +0 -0
  81. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/models.py +0 -0
  82. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/query.py +0 -0
  83. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/serialization.py +0 -0
  84. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/lineage/__init__.py +0 -0
  85. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/lineage/formatters.py +0 -0
  86. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/__init__.py +0 -0
  87. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/base.py +0 -0
  88. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/jinja.py +0 -0
  89. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/registry.py +0 -0
  90. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/variables.py +0 -0
  91. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/utils/__init__.py +0 -0
  92. {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/utils/file_utils.py +0 -0
  93. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/__init__.py +0 -0
  94. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  95. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  96. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  97. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  98. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  99. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  100. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte.sql +0 -0
  101. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  102. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
  103. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  104. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_multi.sql +0 -0
  105. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  106. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  107. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  108. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_tables.sql +0 -0
  109. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_view.sql +0 -0
  110. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  111. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/sample_manifest.csv +0 -0
  112. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/__init__.py +0 -0
  113. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/__init__.py +0 -0
  114. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/test_base.py +0 -0
  115. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/test_databricks.py +0 -0
  116. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/test_registry.py +0 -0
  117. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/__init__.py +0 -0
  118. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  119. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/test_formatters.py +0 -0
  120. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/test_models.py +0 -0
  121. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/__init__.py +0 -0
  122. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_builder.py +0 -0
  123. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_merge.py +0 -0
  124. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_models.py +0 -0
  125. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_query.py +0 -0
  126. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_serialization.py +0 -0
  127. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/lineage/__init__.py +0 -0
  128. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/lineage/test_formatters.py +0 -0
  129. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/__init__.py +0 -0
  130. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_base.py +0 -0
  131. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_jinja.py +0 -0
  132. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_registry.py +0 -0
  133. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_variables.py +0 -0
  134. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/test_cli.py +0 -0
  135. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/utils/__init__.py +0 -0
  136. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/utils/test_config.py +0 -0
  137. {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/utils/test_file_utils.py +0 -0
  138. {sql_glider-0.1.10 → sql_glider-0.1.11}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -0,0 +1,58 @@
1
+ # SparkSQL Table Extraction for Unsupported Statement Types
2
+
3
+ **Status:** Planned
4
+ **Date:** 2026-01-28
5
+
6
+ ## Overview
7
+
8
+ The `analyze_tables()` method in `LineageAnalyzer` currently extracts tables from a subset of statement types (SELECT, INSERT, CREATE, DELETE, DROP, TRUNCATE, CACHE). Several SparkSQL-specific statement types reference tables but are not captured during table extraction. This plan adds table extraction support for these missing types.
9
+
10
+ Column lineage is **not affected** — these statements contain no SELECT and cannot produce column-level lineage. The goal is to ensure `sqlglider tables overview` reports all tables referenced in a SQL file.
11
+
12
+ ## Statements to Add
13
+
14
+ | Statement | SQLGlot Expression | Table Location | Proposed Usage |
15
+ |-----------|-------------------|----------------|----------------|
16
+ | `UNCACHE TABLE t` | `exp.Uncache` | `expr.this` | `INPUT` |
17
+ | `REFRESH TABLE t` | `exp.Refresh` | `expr.this` | `INPUT` |
18
+ | `LOAD DATA INPATH '...' INTO TABLE t` | `exp.LoadData` | `expr.this` | `OUTPUT` |
19
+ | `ALTER TABLE t ...` | `exp.Alter` | `expr.this` | `OUTPUT` |
20
+ | `ANALYZE TABLE t COMPUTE STATISTICS` | `exp.Analyze` | `expr.this` | `INPUT` |
21
+
22
+ ### Usage Rationale
23
+
24
+ - **UNCACHE / REFRESH / ANALYZE**: Read-oriented metadata operations on an existing table → `INPUT`
25
+ - **LOAD DATA**: Writes data into a table → `OUTPUT`
26
+ - **ALTER TABLE**: Modifies table structure → `OUTPUT`
27
+
28
+ ## Implementation Steps
29
+
30
+ - [ ] Add extraction logic to `_get_target_table_info()` in [analyzer.py](src/sqlglider/lineage/analyzer.py) for each new expression type
31
+ - [ ] Add each type to the `_get_statement_type()` type_map for readable skip messages
32
+ - [ ] Add entries to `_is_target_table()` where applicable (LOAD DATA, ALTER)
33
+ - [ ] Ensure `_get_target_and_select()` returns `None` gracefully for these types (they have no SELECT)
34
+ - [ ] Add unit tests in [test_analyzer.py](tests/sqlglider/lineage/test_analyzer.py):
35
+ - Table extraction returns correct table name and usage for each type
36
+ - Column lineage correctly skips these with appropriate message
37
+ - Parameterized test covering all five statement types
38
+ - [ ] Verify graph build handles these gracefully (skipped queries warning)
39
+ - [ ] Run full test suite and coverage check
40
+
41
+ ## Files to Modify
42
+
43
+ - `src/sqlglider/lineage/analyzer.py` — extraction logic
44
+ - `tests/sqlglider/lineage/test_analyzer.py` — unit tests
45
+
46
+ ## Testing Strategy
47
+
48
+ - Parameterized tests with SparkSQL syntax for each statement type
49
+ - Verify `analyze_tables()` returns correct table name, usage, and object type
50
+ - Verify `analyze_queries()` adds these to `skipped_queries` with clear reason
51
+ - Ensure no regressions in existing tests
52
+ - Coverage threshold (80%) maintained
53
+
54
+ ## Notes
55
+
56
+ - These are all parsed by sqlglot's Spark dialect parser, so no custom parsing is needed
57
+ - Some of these (SHOW, DESCRIBE, EXPLAIN) parse as `exp.Command` — those are intentionally excluded since they don't reference tables in a structured way
58
+ - INSERT OVERWRITE and multi-INSERT patterns may warrant separate investigation
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.10'
32
- __version_tuple__ = version_tuple = (0, 1, 10)
31
+ __version__ = version = '0.1.11'
32
+ __version_tuple__ = version_tuple = (0, 1, 11)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -166,6 +166,11 @@ def lineage(
166
166
  exists=True,
167
167
  help="Path to variables file (JSON or YAML)",
168
168
  ),
169
+ no_star: bool = typer.Option(
170
+ False,
171
+ "--no-star",
172
+ help="Fail if SELECT * cannot be resolved to actual columns",
173
+ ),
169
174
  ) -> None:
170
175
  """
171
176
  Analyze column or table lineage for a SQL file.
@@ -207,6 +212,7 @@ def lineage(
207
212
  level_str = level or config.level or "column"
208
213
  output_format = output_format or config.output_format or "text"
209
214
  templater = templater or config.templater # None means no templating
215
+ no_star = no_star or config.no_star or False
210
216
  # Validate and convert level to enum
211
217
  try:
212
218
  analysis_level = AnalysisLevel(level_str)
@@ -261,7 +267,7 @@ def lineage(
261
267
  )
262
268
 
263
269
  # Create analyzer
264
- analyzer = LineageAnalyzer(sql, dialect=dialect)
270
+ analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
265
271
 
266
272
  # Unified lineage analysis (handles both single and multi-query files)
267
273
  results = analyzer.analyze_queries(
@@ -990,6 +996,11 @@ def graph_build(
990
996
  exists=True,
991
997
  help="Path to variables file (JSON or YAML)",
992
998
  ),
999
+ no_star: bool = typer.Option(
1000
+ False,
1001
+ "--no-star",
1002
+ help="Fail if SELECT * cannot be resolved to actual columns",
1003
+ ),
993
1004
  ) -> None:
994
1005
  """
995
1006
  Build a lineage graph from SQL files.
@@ -1024,6 +1035,7 @@ def graph_build(
1024
1035
  config = load_config()
1025
1036
  dialect = dialect or config.dialect or "spark"
1026
1037
  templater = templater or config.templater # None means no templating
1038
+ no_star = no_star or config.no_star or False
1027
1039
 
1028
1040
  # Validate and convert node format to enum
1029
1041
  try:
@@ -1080,6 +1092,7 @@ def graph_build(
1080
1092
  node_format=node_format_enum,
1081
1093
  dialect=dialect,
1082
1094
  sql_preprocessor=sql_preprocessor,
1095
+ no_star=no_star,
1083
1096
  )
1084
1097
 
1085
1098
  # Process manifest if provided
@@ -33,6 +33,7 @@ class GraphBuilder:
33
33
  node_format: NodeFormat = NodeFormat.QUALIFIED,
34
34
  dialect: str = "spark",
35
35
  sql_preprocessor: Optional[SqlPreprocessor] = None,
36
+ no_star: bool = False,
36
37
  ):
37
38
  """
38
39
  Initialize the graph builder.
@@ -43,10 +44,12 @@ class GraphBuilder:
43
44
  sql_preprocessor: Optional function to preprocess SQL before analysis.
44
45
  Takes (sql: str, file_path: Path) and returns processed SQL.
45
46
  Useful for templating (e.g., Jinja2 rendering).
47
+ no_star: If True, fail when SELECT * cannot be resolved to columns
46
48
  """
47
49
  self.node_format = node_format
48
50
  self.dialect = dialect
49
51
  self.sql_preprocessor = sql_preprocessor
52
+ self.no_star = no_star
50
53
  self.graph: rx.PyDiGraph = rx.PyDiGraph()
51
54
  self._node_index_map: Dict[str, int] = {} # identifier -> rustworkx node index
52
55
  self._source_files: Set[str] = set()
@@ -82,7 +85,9 @@ class GraphBuilder:
82
85
  if self.sql_preprocessor:
83
86
  sql_content = self.sql_preprocessor(sql_content, file_path)
84
87
 
85
- analyzer = LineageAnalyzer(sql_content, dialect=file_dialect)
88
+ analyzer = LineageAnalyzer(
89
+ sql_content, dialect=file_dialect, no_star=self.no_star
90
+ )
86
91
  results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
87
92
 
88
93
  # Print warnings for any skipped queries within the file
@@ -11,6 +11,10 @@ from sqlglot.lineage import Node, lineage
11
11
  from sqlglider.global_models import AnalysisLevel
12
12
 
13
13
 
14
+ class StarResolutionError(Exception):
15
+ """Raised when SELECT * cannot be resolved and no_star mode is enabled."""
16
+
17
+
14
18
  class TableUsage(str, Enum):
15
19
  """How a table is used in a query."""
16
20
 
@@ -85,19 +89,21 @@ WarningCallback = Callable[[str], None]
85
89
  class LineageAnalyzer:
86
90
  """Analyze column and table lineage for SQL queries."""
87
91
 
88
- def __init__(self, sql: str, dialect: str = "spark"):
92
+ def __init__(self, sql: str, dialect: str = "spark", no_star: bool = False):
89
93
  """
90
94
  Initialize the lineage analyzer.
91
95
 
92
96
  Args:
93
97
  sql: SQL query string to analyze (can contain multiple statements)
94
98
  dialect: SQL dialect (default: spark)
99
+ no_star: If True, fail when SELECT * cannot be resolved to columns
95
100
 
96
101
  Raises:
97
102
  ParseError: If the SQL cannot be parsed
98
103
  """
99
104
  self.sql = sql
100
105
  self.dialect = dialect
106
+ self._no_star = no_star
101
107
  self._skipped_queries: List[SkippedQuery] = []
102
108
  # File-scoped schema context for cross-statement lineage
103
109
  # Maps table/view names to their column definitions
@@ -171,6 +177,12 @@ class LineageAnalyzer:
171
177
  columns.append(qualified_name)
172
178
  self._column_mapping[qualified_name] = star_col
173
179
  if not columns:
180
+ if self._no_star:
181
+ raise StarResolutionError(
182
+ f"SELECT * could not be resolved to columns "
183
+ f"for target table '{target_table}'. "
184
+ f"Provide schema context or avoid using SELECT *."
185
+ )
174
186
  # Fallback: can't resolve *, use * as column name
175
187
  qualified_name = f"{target_table}.*"
176
188
  columns.append(qualified_name)
@@ -200,6 +212,12 @@ class LineageAnalyzer:
200
212
  columns.append(qualified_name)
201
213
  self._column_mapping[qualified_name] = col
202
214
  if not qualified_star_cols:
215
+ if self._no_star:
216
+ raise StarResolutionError(
217
+ f"SELECT {source_table}.* could not be resolved "
218
+ f"to columns for target table '{target_table}'. "
219
+ f"Provide schema context or avoid using SELECT *."
220
+ )
203
221
  # Fallback: can't resolve t.*, use * as column name
204
222
  qualified_name = f"{target_table}.*"
205
223
  columns.append(qualified_name)
@@ -226,6 +244,23 @@ class LineageAnalyzer:
226
244
  # Get the first SELECT for table resolution (handles UNION case)
227
245
  first_select = self._get_first_select(select_node)
228
246
  for projection in projections:
247
+ # Handle SELECT * in DQL context
248
+ if isinstance(projection, exp.Star):
249
+ if first_select:
250
+ star_columns = self._resolve_star_columns(first_select)
251
+ for star_col in star_columns:
252
+ columns.append(star_col)
253
+ self._column_mapping[star_col] = star_col
254
+ if not columns:
255
+ if self._no_star:
256
+ raise StarResolutionError(
257
+ "SELECT * could not be resolved to columns. "
258
+ "Provide schema context or avoid using SELECT *."
259
+ )
260
+ columns.append("*")
261
+ self._column_mapping["*"] = "*"
262
+ continue
263
+
229
264
  # Get the underlying expression (unwrap alias if present)
230
265
  if isinstance(projection, exp.Alias):
231
266
  source_expr = projection.this
@@ -236,6 +271,30 @@ class LineageAnalyzer:
236
271
  column_name = None
237
272
  lineage_name = None
238
273
 
274
+ # Handle table-qualified star in DQL context (e.g., t.*)
275
+ if isinstance(source_expr, exp.Column) and isinstance(
276
+ source_expr.this, exp.Star
277
+ ):
278
+ source_table = source_expr.table
279
+ dql_star_cols: List[str] = []
280
+ if source_table and first_select:
281
+ dql_star_cols = self._resolve_qualified_star(
282
+ source_table, first_select
283
+ )
284
+ for col in dql_star_cols:
285
+ columns.append(col)
286
+ self._column_mapping[col] = col
287
+ if not dql_star_cols:
288
+ if self._no_star:
289
+ raise StarResolutionError(
290
+ f"SELECT {source_table}.* could not be resolved "
291
+ f"to columns. "
292
+ f"Provide schema context or avoid using SELECT *."
293
+ )
294
+ columns.append("*")
295
+ self._column_mapping["*"] = "*"
296
+ continue
297
+
239
298
  # Try to extract fully qualified name
240
299
  if isinstance(source_expr, exp.Column):
241
300
  # Get table and column parts
@@ -407,6 +466,8 @@ class LineageAnalyzer:
407
466
  level=level,
408
467
  )
409
468
  )
469
+ except StarResolutionError:
470
+ raise
410
471
  except ValueError as e:
411
472
  # Unsupported statement type - track it and continue
412
473
  stmt_type = self._get_statement_type(expr)
@@ -60,6 +60,7 @@ class ConfigSettings(BaseModel):
60
60
  catalog_type: Optional[str] = None
61
61
  ddl_folder: Optional[str] = None
62
62
  catalog: Optional[CatalogConfig] = None
63
+ no_star: Optional[bool] = None
63
64
 
64
65
 
65
66
  def find_config_file(start_path: Optional[Path] = None) -> Optional[Path]:
@@ -3,7 +3,7 @@
3
3
  import pytest
4
4
 
5
5
  from sqlglider.global_models import AnalysisLevel
6
- from sqlglider.lineage.analyzer import LineageAnalyzer
6
+ from sqlglider.lineage.analyzer import LineageAnalyzer, StarResolutionError
7
7
 
8
8
 
9
9
  class TestCaseInsensitiveForwardLineage:
@@ -2897,6 +2897,27 @@ class TestCacheTableStatements:
2897
2897
  assert "orders" in tables_by_name
2898
2898
  assert tables_by_name["orders"].usage.value == "INPUT"
2899
2899
 
2900
+ def test_cache_table_with_inline_subquery_alias(self):
2901
+ """CACHE TABLE with an aliased inline subquery should trace through to sources."""
2902
+ sql = """
2903
+ CACHE TABLE cached_result AS
2904
+ SELECT s.customer_id, s.order_total
2905
+ FROM (
2906
+ SELECT c.id as customer_id, o.total as order_total
2907
+ FROM customers c
2908
+ JOIN orders o ON c.id = o.customer_id
2909
+ ) s
2910
+ """
2911
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2912
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2913
+
2914
+ assert len(results) == 1
2915
+ items = {
2916
+ item.output_name: item.source_name for item in results[0].lineage_items
2917
+ }
2918
+ assert items["cached_result.customer_id"] == "customers.id"
2919
+ assert items["cached_result.order_total"] == "orders.total"
2920
+
2900
2921
  def test_bare_cache_table_is_skipped(self):
2901
2922
  """CACHE TABLE t (without AS SELECT) should be skipped."""
2902
2923
  sql = "CACHE TABLE my_table"
@@ -2923,3 +2944,58 @@ class TestCacheTableStatements:
2923
2944
  skipped = analyzer.skipped_queries
2924
2945
  assert len(skipped) == 1
2925
2946
  assert "DELETE" in skipped[0].statement_type
2947
+
2948
+
2949
+ class TestNoStar:
2950
+ """Tests for the --no-star flag that fails on unresolvable SELECT *."""
2951
+
2952
+ def test_bare_star_no_star_raises(self):
2953
+ """SELECT * from unknown table should raise with no_star=True."""
2954
+ sql = "SELECT * FROM some_table"
2955
+ analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
2956
+ with pytest.raises(
2957
+ StarResolutionError, match="SELECT \\* could not be resolved"
2958
+ ):
2959
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2960
+
2961
+ def test_qualified_star_no_star_raises(self):
2962
+ """SELECT t.* from unknown table should raise with no_star=True."""
2963
+ sql = "SELECT t.* FROM some_table t"
2964
+ analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
2965
+ with pytest.raises(
2966
+ StarResolutionError, match="SELECT t\\.\\* could not be resolved"
2967
+ ):
2968
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2969
+
2970
+ def test_bare_star_default_falls_back(self):
2971
+ """SELECT * without no_star should fall back to table.*."""
2972
+ sql = "SELECT * FROM some_table"
2973
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2974
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2975
+ assert len(results) == 1
2976
+ output_names = [item.output_name for item in results[0].lineage_items]
2977
+ assert any("*" in name for name in output_names)
2978
+
2979
+ def test_resolvable_star_with_no_star_succeeds(self):
2980
+ """SELECT * from CTE should work with no_star=True since columns are known."""
2981
+ sql = """
2982
+ WITH cte AS (SELECT 1 AS id, 'alice' AS name)
2983
+ SELECT * FROM cte
2984
+ """
2985
+ analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
2986
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2987
+ assert len(results) == 1
2988
+ output_names = [item.output_name for item in results[0].lineage_items]
2989
+ assert not any("*" in name for name in output_names)
2990
+
2991
+ def test_resolvable_qualified_star_with_no_star_succeeds(self):
2992
+ """SELECT t.* from CTE should work with no_star=True since columns are known."""
2993
+ sql = """
2994
+ WITH cte AS (SELECT 1 AS id, 'alice' AS name)
2995
+ SELECT cte.* FROM cte
2996
+ """
2997
+ analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
2998
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2999
+ assert len(results) == 1
3000
+ output_names = [item.output_name for item in results[0].lineage_items]
3001
+ assert not any("*" in name for name in output_names)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes