sql-glider 0.1.10__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.10 → sql_glider-0.1.11}/PKG-INFO +1 -1
- sql_glider-0.1.11/plans/2026-01-28-sparksql-table-extraction.md +58 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/_version.py +2 -2
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/cli.py +14 -1
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/builder.py +6 -1
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/lineage/analyzer.py +62 -1
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/utils/config.py +1 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/lineage/test_analyzer.py +77 -1
- {sql_glider-0.1.10 → sql_glider-0.1.11}/.github/workflows/ci.yml +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/.github/workflows/publish.yml +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/.gitignore +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/.python-version +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/ARCHITECTURE.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/CLAUDE.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/LICENSE +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/README.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-05-column-level-lineage.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-05-reverse-lineage.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-06-config-file-support.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-06-graph-lineage.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-06-unify-single-multi-query.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-07-sample-data-model.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-07-sql-templating.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-08-tables-command.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-09-graph-query-paths.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-13-dissect-command.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2025-12-14-tables-pull-command.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/pyproject.toml +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/README.md +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/expire_dim_customer.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/load_fact_orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/load_fact_payments.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/merge_dim_customer.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/merge_dim_product.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/complex/conditional_merge.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/complex/cte_insert.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/complex/multi_table_transform.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/dim_customer.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/dim_product.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/fact_orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/fact_payments.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_addresses.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_customers.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_order_items.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_payments.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/raw_products.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_customers.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_payments.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/ddl/stg_products.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/maintenance/update_product_status.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_customers.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_payments.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/staging/load_stg_products.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/sqlglider.toml.example +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/base.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/databricks.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/catalog/registry.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/analyzer.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/formatters.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/dissection/models.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/global_models.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/merge.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/models.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/query.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/graph/serialization.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/lineage/formatters.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/base.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/jinja.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/registry.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/templating/variables.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/src/sqlglider/utils/file_utils.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/customers.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/orders.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/reports.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_multi.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_single_query.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_subquery.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_tables.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_view.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/sample_manifest.csv +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/test_base.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/test_databricks.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/catalog/test_registry.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/test_analyzer.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/test_formatters.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/dissection/test_models.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_builder.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_merge.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_models.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_query.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/graph/test_serialization.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/lineage/test_formatters.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_base.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_jinja.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_registry.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/templating/test_variables.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/test_cli.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/utils/test_config.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/tests/sqlglider/utils/test_file_utils.py +0 -0
- {sql_glider-0.1.10 → sql_glider-0.1.11}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# SparkSQL Table Extraction for Unsupported Statement Types
|
|
2
|
+
|
|
3
|
+
**Status:** Planned
|
|
4
|
+
**Date:** 2026-01-28
|
|
5
|
+
|
|
6
|
+
## Overview
|
|
7
|
+
|
|
8
|
+
The `analyze_tables()` method in `LineageAnalyzer` currently extracts tables from a subset of statement types (SELECT, INSERT, CREATE, DELETE, DROP, TRUNCATE, CACHE). Several SparkSQL-specific statement types reference tables but are not captured during table extraction. This plan adds table extraction support for these missing types.
|
|
9
|
+
|
|
10
|
+
Column lineage is **not affected** — these statements contain no SELECT and cannot produce column-level lineage. The goal is to ensure `sqlglider tables overview` reports all tables referenced in a SQL file.
|
|
11
|
+
|
|
12
|
+
## Statements to Add
|
|
13
|
+
|
|
14
|
+
| Statement | SQLGlot Expression | Table Location | Proposed Usage |
|
|
15
|
+
|-----------|-------------------|----------------|----------------|
|
|
16
|
+
| `UNCACHE TABLE t` | `exp.Uncache` | `expr.this` | `INPUT` |
|
|
17
|
+
| `REFRESH TABLE t` | `exp.Refresh` | `expr.this` | `INPUT` |
|
|
18
|
+
| `LOAD DATA INPATH '...' INTO TABLE t` | `exp.LoadData` | `expr.this` | `OUTPUT` |
|
|
19
|
+
| `ALTER TABLE t ...` | `exp.Alter` | `expr.this` | `OUTPUT` |
|
|
20
|
+
| `ANALYZE TABLE t COMPUTE STATISTICS` | `exp.Analyze` | `expr.this` | `INPUT` |
|
|
21
|
+
|
|
22
|
+
### Usage Rationale
|
|
23
|
+
|
|
24
|
+
- **UNCACHE / REFRESH / ANALYZE**: Read-oriented metadata operations on an existing table → `INPUT`
|
|
25
|
+
- **LOAD DATA**: Writes data into a table → `OUTPUT`
|
|
26
|
+
- **ALTER TABLE**: Modifies table structure → `OUTPUT`
|
|
27
|
+
|
|
28
|
+
## Implementation Steps
|
|
29
|
+
|
|
30
|
+
- [ ] Add extraction logic to `_get_target_table_info()` in [analyzer.py](src/sqlglider/lineage/analyzer.py) for each new expression type
|
|
31
|
+
- [ ] Add each type to the `_get_statement_type()` type_map for readable skip messages
|
|
32
|
+
- [ ] Add entries to `_is_target_table()` where applicable (LOAD DATA, ALTER)
|
|
33
|
+
- [ ] Ensure `_get_target_and_select()` returns `None` gracefully for these types (they have no SELECT)
|
|
34
|
+
- [ ] Add unit tests in [test_analyzer.py](tests/sqlglider/lineage/test_analyzer.py):
|
|
35
|
+
- Table extraction returns correct table name and usage for each type
|
|
36
|
+
- Column lineage correctly skips these with appropriate message
|
|
37
|
+
- Parameterized test covering all five statement types
|
|
38
|
+
- [ ] Verify graph build handles these gracefully (skipped queries warning)
|
|
39
|
+
- [ ] Run full test suite and coverage check
|
|
40
|
+
|
|
41
|
+
## Files to Modify
|
|
42
|
+
|
|
43
|
+
- `src/sqlglider/lineage/analyzer.py` — extraction logic
|
|
44
|
+
- `tests/sqlglider/lineage/test_analyzer.py` — unit tests
|
|
45
|
+
|
|
46
|
+
## Testing Strategy
|
|
47
|
+
|
|
48
|
+
- Parameterized tests with SparkSQL syntax for each statement type
|
|
49
|
+
- Verify `analyze_tables()` returns correct table name, usage, and object type
|
|
50
|
+
- Verify `analyze_queries()` adds these to `skipped_queries` with clear reason
|
|
51
|
+
- Ensure no regressions in existing tests
|
|
52
|
+
- Coverage threshold (80%) maintained
|
|
53
|
+
|
|
54
|
+
## Notes
|
|
55
|
+
|
|
56
|
+
- These are all parsed by sqlglot's Spark dialect parser, so no custom parsing is needed
|
|
57
|
+
- Some of these (SHOW, DESCRIBE, EXPLAIN) parse as `exp.Command` — those are intentionally excluded since they don't reference tables in a structured way
|
|
58
|
+
- INSERT OVERWRITE and multi-INSERT patterns may warrant separate investigation
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.11'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 11)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -166,6 +166,11 @@ def lineage(
|
|
|
166
166
|
exists=True,
|
|
167
167
|
help="Path to variables file (JSON or YAML)",
|
|
168
168
|
),
|
|
169
|
+
no_star: bool = typer.Option(
|
|
170
|
+
False,
|
|
171
|
+
"--no-star",
|
|
172
|
+
help="Fail if SELECT * cannot be resolved to actual columns",
|
|
173
|
+
),
|
|
169
174
|
) -> None:
|
|
170
175
|
"""
|
|
171
176
|
Analyze column or table lineage for a SQL file.
|
|
@@ -207,6 +212,7 @@ def lineage(
|
|
|
207
212
|
level_str = level or config.level or "column"
|
|
208
213
|
output_format = output_format or config.output_format or "text"
|
|
209
214
|
templater = templater or config.templater # None means no templating
|
|
215
|
+
no_star = no_star or config.no_star or False
|
|
210
216
|
# Validate and convert level to enum
|
|
211
217
|
try:
|
|
212
218
|
analysis_level = AnalysisLevel(level_str)
|
|
@@ -261,7 +267,7 @@ def lineage(
|
|
|
261
267
|
)
|
|
262
268
|
|
|
263
269
|
# Create analyzer
|
|
264
|
-
analyzer = LineageAnalyzer(sql, dialect=dialect)
|
|
270
|
+
analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
|
|
265
271
|
|
|
266
272
|
# Unified lineage analysis (handles both single and multi-query files)
|
|
267
273
|
results = analyzer.analyze_queries(
|
|
@@ -990,6 +996,11 @@ def graph_build(
|
|
|
990
996
|
exists=True,
|
|
991
997
|
help="Path to variables file (JSON or YAML)",
|
|
992
998
|
),
|
|
999
|
+
no_star: bool = typer.Option(
|
|
1000
|
+
False,
|
|
1001
|
+
"--no-star",
|
|
1002
|
+
help="Fail if SELECT * cannot be resolved to actual columns",
|
|
1003
|
+
),
|
|
993
1004
|
) -> None:
|
|
994
1005
|
"""
|
|
995
1006
|
Build a lineage graph from SQL files.
|
|
@@ -1024,6 +1035,7 @@ def graph_build(
|
|
|
1024
1035
|
config = load_config()
|
|
1025
1036
|
dialect = dialect or config.dialect or "spark"
|
|
1026
1037
|
templater = templater or config.templater # None means no templating
|
|
1038
|
+
no_star = no_star or config.no_star or False
|
|
1027
1039
|
|
|
1028
1040
|
# Validate and convert node format to enum
|
|
1029
1041
|
try:
|
|
@@ -1080,6 +1092,7 @@ def graph_build(
|
|
|
1080
1092
|
node_format=node_format_enum,
|
|
1081
1093
|
dialect=dialect,
|
|
1082
1094
|
sql_preprocessor=sql_preprocessor,
|
|
1095
|
+
no_star=no_star,
|
|
1083
1096
|
)
|
|
1084
1097
|
|
|
1085
1098
|
# Process manifest if provided
|
|
@@ -33,6 +33,7 @@ class GraphBuilder:
|
|
|
33
33
|
node_format: NodeFormat = NodeFormat.QUALIFIED,
|
|
34
34
|
dialect: str = "spark",
|
|
35
35
|
sql_preprocessor: Optional[SqlPreprocessor] = None,
|
|
36
|
+
no_star: bool = False,
|
|
36
37
|
):
|
|
37
38
|
"""
|
|
38
39
|
Initialize the graph builder.
|
|
@@ -43,10 +44,12 @@ class GraphBuilder:
|
|
|
43
44
|
sql_preprocessor: Optional function to preprocess SQL before analysis.
|
|
44
45
|
Takes (sql: str, file_path: Path) and returns processed SQL.
|
|
45
46
|
Useful for templating (e.g., Jinja2 rendering).
|
|
47
|
+
no_star: If True, fail when SELECT * cannot be resolved to columns
|
|
46
48
|
"""
|
|
47
49
|
self.node_format = node_format
|
|
48
50
|
self.dialect = dialect
|
|
49
51
|
self.sql_preprocessor = sql_preprocessor
|
|
52
|
+
self.no_star = no_star
|
|
50
53
|
self.graph: rx.PyDiGraph = rx.PyDiGraph()
|
|
51
54
|
self._node_index_map: Dict[str, int] = {} # identifier -> rustworkx node index
|
|
52
55
|
self._source_files: Set[str] = set()
|
|
@@ -82,7 +85,9 @@ class GraphBuilder:
|
|
|
82
85
|
if self.sql_preprocessor:
|
|
83
86
|
sql_content = self.sql_preprocessor(sql_content, file_path)
|
|
84
87
|
|
|
85
|
-
analyzer = LineageAnalyzer(
|
|
88
|
+
analyzer = LineageAnalyzer(
|
|
89
|
+
sql_content, dialect=file_dialect, no_star=self.no_star
|
|
90
|
+
)
|
|
86
91
|
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
87
92
|
|
|
88
93
|
# Print warnings for any skipped queries within the file
|
|
@@ -11,6 +11,10 @@ from sqlglot.lineage import Node, lineage
|
|
|
11
11
|
from sqlglider.global_models import AnalysisLevel
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
class StarResolutionError(Exception):
|
|
15
|
+
"""Raised when SELECT * cannot be resolved and no_star mode is enabled."""
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
class TableUsage(str, Enum):
|
|
15
19
|
"""How a table is used in a query."""
|
|
16
20
|
|
|
@@ -85,19 +89,21 @@ WarningCallback = Callable[[str], None]
|
|
|
85
89
|
class LineageAnalyzer:
|
|
86
90
|
"""Analyze column and table lineage for SQL queries."""
|
|
87
91
|
|
|
88
|
-
def __init__(self, sql: str, dialect: str = "spark"):
|
|
92
|
+
def __init__(self, sql: str, dialect: str = "spark", no_star: bool = False):
|
|
89
93
|
"""
|
|
90
94
|
Initialize the lineage analyzer.
|
|
91
95
|
|
|
92
96
|
Args:
|
|
93
97
|
sql: SQL query string to analyze (can contain multiple statements)
|
|
94
98
|
dialect: SQL dialect (default: spark)
|
|
99
|
+
no_star: If True, fail when SELECT * cannot be resolved to columns
|
|
95
100
|
|
|
96
101
|
Raises:
|
|
97
102
|
ParseError: If the SQL cannot be parsed
|
|
98
103
|
"""
|
|
99
104
|
self.sql = sql
|
|
100
105
|
self.dialect = dialect
|
|
106
|
+
self._no_star = no_star
|
|
101
107
|
self._skipped_queries: List[SkippedQuery] = []
|
|
102
108
|
# File-scoped schema context for cross-statement lineage
|
|
103
109
|
# Maps table/view names to their column definitions
|
|
@@ -171,6 +177,12 @@ class LineageAnalyzer:
|
|
|
171
177
|
columns.append(qualified_name)
|
|
172
178
|
self._column_mapping[qualified_name] = star_col
|
|
173
179
|
if not columns:
|
|
180
|
+
if self._no_star:
|
|
181
|
+
raise StarResolutionError(
|
|
182
|
+
f"SELECT * could not be resolved to columns "
|
|
183
|
+
f"for target table '{target_table}'. "
|
|
184
|
+
f"Provide schema context or avoid using SELECT *."
|
|
185
|
+
)
|
|
174
186
|
# Fallback: can't resolve *, use * as column name
|
|
175
187
|
qualified_name = f"{target_table}.*"
|
|
176
188
|
columns.append(qualified_name)
|
|
@@ -200,6 +212,12 @@ class LineageAnalyzer:
|
|
|
200
212
|
columns.append(qualified_name)
|
|
201
213
|
self._column_mapping[qualified_name] = col
|
|
202
214
|
if not qualified_star_cols:
|
|
215
|
+
if self._no_star:
|
|
216
|
+
raise StarResolutionError(
|
|
217
|
+
f"SELECT {source_table}.* could not be resolved "
|
|
218
|
+
f"to columns for target table '{target_table}'. "
|
|
219
|
+
f"Provide schema context or avoid using SELECT *."
|
|
220
|
+
)
|
|
203
221
|
# Fallback: can't resolve t.*, use * as column name
|
|
204
222
|
qualified_name = f"{target_table}.*"
|
|
205
223
|
columns.append(qualified_name)
|
|
@@ -226,6 +244,23 @@ class LineageAnalyzer:
|
|
|
226
244
|
# Get the first SELECT for table resolution (handles UNION case)
|
|
227
245
|
first_select = self._get_first_select(select_node)
|
|
228
246
|
for projection in projections:
|
|
247
|
+
# Handle SELECT * in DQL context
|
|
248
|
+
if isinstance(projection, exp.Star):
|
|
249
|
+
if first_select:
|
|
250
|
+
star_columns = self._resolve_star_columns(first_select)
|
|
251
|
+
for star_col in star_columns:
|
|
252
|
+
columns.append(star_col)
|
|
253
|
+
self._column_mapping[star_col] = star_col
|
|
254
|
+
if not columns:
|
|
255
|
+
if self._no_star:
|
|
256
|
+
raise StarResolutionError(
|
|
257
|
+
"SELECT * could not be resolved to columns. "
|
|
258
|
+
"Provide schema context or avoid using SELECT *."
|
|
259
|
+
)
|
|
260
|
+
columns.append("*")
|
|
261
|
+
self._column_mapping["*"] = "*"
|
|
262
|
+
continue
|
|
263
|
+
|
|
229
264
|
# Get the underlying expression (unwrap alias if present)
|
|
230
265
|
if isinstance(projection, exp.Alias):
|
|
231
266
|
source_expr = projection.this
|
|
@@ -236,6 +271,30 @@ class LineageAnalyzer:
|
|
|
236
271
|
column_name = None
|
|
237
272
|
lineage_name = None
|
|
238
273
|
|
|
274
|
+
# Handle table-qualified star in DQL context (e.g., t.*)
|
|
275
|
+
if isinstance(source_expr, exp.Column) and isinstance(
|
|
276
|
+
source_expr.this, exp.Star
|
|
277
|
+
):
|
|
278
|
+
source_table = source_expr.table
|
|
279
|
+
dql_star_cols: List[str] = []
|
|
280
|
+
if source_table and first_select:
|
|
281
|
+
dql_star_cols = self._resolve_qualified_star(
|
|
282
|
+
source_table, first_select
|
|
283
|
+
)
|
|
284
|
+
for col in dql_star_cols:
|
|
285
|
+
columns.append(col)
|
|
286
|
+
self._column_mapping[col] = col
|
|
287
|
+
if not dql_star_cols:
|
|
288
|
+
if self._no_star:
|
|
289
|
+
raise StarResolutionError(
|
|
290
|
+
f"SELECT {source_table}.* could not be resolved "
|
|
291
|
+
f"to columns. "
|
|
292
|
+
f"Provide schema context or avoid using SELECT *."
|
|
293
|
+
)
|
|
294
|
+
columns.append("*")
|
|
295
|
+
self._column_mapping["*"] = "*"
|
|
296
|
+
continue
|
|
297
|
+
|
|
239
298
|
# Try to extract fully qualified name
|
|
240
299
|
if isinstance(source_expr, exp.Column):
|
|
241
300
|
# Get table and column parts
|
|
@@ -407,6 +466,8 @@ class LineageAnalyzer:
|
|
|
407
466
|
level=level,
|
|
408
467
|
)
|
|
409
468
|
)
|
|
469
|
+
except StarResolutionError:
|
|
470
|
+
raise
|
|
410
471
|
except ValueError as e:
|
|
411
472
|
# Unsupported statement type - track it and continue
|
|
412
473
|
stmt_type = self._get_statement_type(expr)
|
|
@@ -60,6 +60,7 @@ class ConfigSettings(BaseModel):
|
|
|
60
60
|
catalog_type: Optional[str] = None
|
|
61
61
|
ddl_folder: Optional[str] = None
|
|
62
62
|
catalog: Optional[CatalogConfig] = None
|
|
63
|
+
no_star: Optional[bool] = None
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
def find_config_file(start_path: Optional[Path] = None) -> Optional[Path]:
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
5
|
from sqlglider.global_models import AnalysisLevel
|
|
6
|
-
from sqlglider.lineage.analyzer import LineageAnalyzer
|
|
6
|
+
from sqlglider.lineage.analyzer import LineageAnalyzer, StarResolutionError
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class TestCaseInsensitiveForwardLineage:
|
|
@@ -2897,6 +2897,27 @@ class TestCacheTableStatements:
|
|
|
2897
2897
|
assert "orders" in tables_by_name
|
|
2898
2898
|
assert tables_by_name["orders"].usage.value == "INPUT"
|
|
2899
2899
|
|
|
2900
|
+
def test_cache_table_with_inline_subquery_alias(self):
|
|
2901
|
+
"""CACHE TABLE with an aliased inline subquery should trace through to sources."""
|
|
2902
|
+
sql = """
|
|
2903
|
+
CACHE TABLE cached_result AS
|
|
2904
|
+
SELECT s.customer_id, s.order_total
|
|
2905
|
+
FROM (
|
|
2906
|
+
SELECT c.id as customer_id, o.total as order_total
|
|
2907
|
+
FROM customers c
|
|
2908
|
+
JOIN orders o ON c.id = o.customer_id
|
|
2909
|
+
) s
|
|
2910
|
+
"""
|
|
2911
|
+
analyzer = LineageAnalyzer(sql, dialect="spark")
|
|
2912
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
2913
|
+
|
|
2914
|
+
assert len(results) == 1
|
|
2915
|
+
items = {
|
|
2916
|
+
item.output_name: item.source_name for item in results[0].lineage_items
|
|
2917
|
+
}
|
|
2918
|
+
assert items["cached_result.customer_id"] == "customers.id"
|
|
2919
|
+
assert items["cached_result.order_total"] == "orders.total"
|
|
2920
|
+
|
|
2900
2921
|
def test_bare_cache_table_is_skipped(self):
|
|
2901
2922
|
"""CACHE TABLE t (without AS SELECT) should be skipped."""
|
|
2902
2923
|
sql = "CACHE TABLE my_table"
|
|
@@ -2923,3 +2944,58 @@ class TestCacheTableStatements:
|
|
|
2923
2944
|
skipped = analyzer.skipped_queries
|
|
2924
2945
|
assert len(skipped) == 1
|
|
2925
2946
|
assert "DELETE" in skipped[0].statement_type
|
|
2947
|
+
|
|
2948
|
+
|
|
2949
|
+
class TestNoStar:
|
|
2950
|
+
"""Tests for the --no-star flag that fails on unresolvable SELECT *."""
|
|
2951
|
+
|
|
2952
|
+
def test_bare_star_no_star_raises(self):
|
|
2953
|
+
"""SELECT * from unknown table should raise with no_star=True."""
|
|
2954
|
+
sql = "SELECT * FROM some_table"
|
|
2955
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
|
|
2956
|
+
with pytest.raises(
|
|
2957
|
+
StarResolutionError, match="SELECT \\* could not be resolved"
|
|
2958
|
+
):
|
|
2959
|
+
analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
2960
|
+
|
|
2961
|
+
def test_qualified_star_no_star_raises(self):
|
|
2962
|
+
"""SELECT t.* from unknown table should raise with no_star=True."""
|
|
2963
|
+
sql = "SELECT t.* FROM some_table t"
|
|
2964
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
|
|
2965
|
+
with pytest.raises(
|
|
2966
|
+
StarResolutionError, match="SELECT t\\.\\* could not be resolved"
|
|
2967
|
+
):
|
|
2968
|
+
analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
2969
|
+
|
|
2970
|
+
def test_bare_star_default_falls_back(self):
|
|
2971
|
+
"""SELECT * without no_star should fall back to table.*."""
|
|
2972
|
+
sql = "SELECT * FROM some_table"
|
|
2973
|
+
analyzer = LineageAnalyzer(sql, dialect="spark")
|
|
2974
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
2975
|
+
assert len(results) == 1
|
|
2976
|
+
output_names = [item.output_name for item in results[0].lineage_items]
|
|
2977
|
+
assert any("*" in name for name in output_names)
|
|
2978
|
+
|
|
2979
|
+
def test_resolvable_star_with_no_star_succeeds(self):
|
|
2980
|
+
"""SELECT * from CTE should work with no_star=True since columns are known."""
|
|
2981
|
+
sql = """
|
|
2982
|
+
WITH cte AS (SELECT 1 AS id, 'alice' AS name)
|
|
2983
|
+
SELECT * FROM cte
|
|
2984
|
+
"""
|
|
2985
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
|
|
2986
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
2987
|
+
assert len(results) == 1
|
|
2988
|
+
output_names = [item.output_name for item in results[0].lineage_items]
|
|
2989
|
+
assert not any("*" in name for name in output_names)
|
|
2990
|
+
|
|
2991
|
+
def test_resolvable_qualified_star_with_no_star_succeeds(self):
|
|
2992
|
+
"""SELECT t.* from CTE should work with no_star=True since columns are known."""
|
|
2993
|
+
sql = """
|
|
2994
|
+
WITH cte AS (SELECT 1 AS id, 'alice' AS name)
|
|
2995
|
+
SELECT cte.* FROM cte
|
|
2996
|
+
"""
|
|
2997
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", no_star=True)
|
|
2998
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
2999
|
+
assert len(results) == 1
|
|
3000
|
+
output_names = [item.output_name for item in results[0].lineage_items]
|
|
3001
|
+
assert not any("*" in name for name in output_names)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/business/update_dim_customer_metrics.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_fact_payments.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/incremental/incr_pres_sales_summary.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/maintenance/delete_expired_customers.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/maintenance/update_product_status.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_customer_360.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_customer_cohort.sql
RENAMED
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/sample_data_model/presentation/load_pres_sales_summary.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/analytics_pipeline.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/multi_file_queries/view_based_merge.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_cte_view_star.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_multi_query.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_single_query.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.10 → sql_glider-0.1.11}/tests/fixtures/original_queries/test_view_window_cte.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|