sql-glider 0.1.15__tar.gz → 0.1.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.15 → sql_glider-0.1.18}/PKG-INFO +1 -1
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/_version.py +2 -2
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/lineage/analyzer.py +54 -9
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/schema/extractor.py +13 -2
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/utils/schema.py +2 -2
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/lineage/test_analyzer.py +115 -17
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/schema/test_extractor.py +52 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/.github/workflows/ci.yml +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/.github/workflows/publish.yml +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/.gitignore +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/.python-version +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/ARCHITECTURE.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/CLAUDE.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/LICENSE +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/README.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-05-column-level-lineage.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-05-reverse-lineage.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-06-config-file-support.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-06-graph-lineage.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-06-unify-single-multi-query.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-07-sample-data-model.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-07-sql-templating.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-08-tables-command.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-09-graph-query-paths.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-13-dissect-command.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2025-12-14-tables-pull-command.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-29-no-star-flag.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-29-resolve-schema.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/plans/2026-01-29-tables-scrape-command.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/pyproject.toml +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/README.md +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/expire_dim_customer.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/load_fact_orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/load_fact_payments.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/merge_dim_customer.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/merge_dim_product.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/complex/conditional_merge.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/complex/cte_insert.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/complex/multi_table_transform.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/dim_customer.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/dim_product.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/fact_orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/fact_payments.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/raw_addresses.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/raw_customers.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/raw_order_items.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/raw_orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/raw_payments.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/raw_products.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/stg_customers.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/stg_orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/stg_payments.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/ddl/stg_products.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/maintenance/update_product_status.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/staging/load_stg_customers.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/staging/load_stg_orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/staging/load_stg_payments.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/staging/load_stg_products.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/sqlglider.toml.example +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/catalog/base.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/catalog/databricks.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/catalog/registry.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/cli.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/dissection/analyzer.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/dissection/formatters.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/dissection/models.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/global_models.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/builder.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/formatters.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/merge.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/models.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/query.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/graph/serialization.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/lineage/formatters.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/schema/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/templating/base.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/templating/jinja.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/templating/registry.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/templating/variables.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/utils/config.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/src/sqlglider/utils/file_utils.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/customers.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/orders.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/reports.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_cte.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_multi.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_single_query.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_subquery.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_tables.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_view.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/sample_manifest.csv +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/catalog/test_base.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/catalog/test_databricks.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/catalog/test_registry.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/dissection/test_analyzer.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/dissection/test_formatters.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/dissection/test_models.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/test_builder.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/test_formatters.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/test_merge.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/test_models.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/test_query.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/graph/test_serialization.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/lineage/test_formatters.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/schema/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/templating/test_base.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/templating/test_jinja.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/templating/test_registry.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/templating/test_variables.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/test_cli.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/utils/test_config.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/utils/test_file_utils.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/tests/sqlglider/utils/test_schema.py +0 -0
- {sql_glider-0.1.15 → sql_glider-0.1.18}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.18
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.18'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 18)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -11,6 +11,48 @@ from sqlglot.lineage import Node, lineage
|
|
|
11
11
|
from sqlglider.global_models import AnalysisLevel
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def _flat_schema_to_nested(
|
|
15
|
+
schema: Dict[str, Dict[str, str]],
|
|
16
|
+
) -> Dict[str, object]:
|
|
17
|
+
"""Convert flat dot-notation schema keys to the nested dict structure sqlglot expects.
|
|
18
|
+
|
|
19
|
+
sqlglot's MappingSchema requires consistent nesting depth across all tables.
|
|
20
|
+
Flat keys like ``"db.table"`` are split on dots and nested accordingly.
|
|
21
|
+
Shorter keys are padded with empty-string prefixes to match the max depth.
|
|
22
|
+
|
|
23
|
+
Examples::
|
|
24
|
+
|
|
25
|
+
{"users": {"id": "UNKNOWN"}}
|
|
26
|
+
→ {"users": {"id": "UNKNOWN"}} (depth 1, no change)
|
|
27
|
+
|
|
28
|
+
{"db.users": {"id": "UNKNOWN"}, "my_view": {"x": "UNKNOWN"}}
|
|
29
|
+
→ {"db": {"users": {"id": "UNKNOWN"}}, "": {"my_view": {"x": "UNKNOWN"}}}
|
|
30
|
+
"""
|
|
31
|
+
if not schema:
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
# Split all keys into parts
|
|
35
|
+
entries = [(key.split("."), cols) for key, cols in schema.items()]
|
|
36
|
+
max_depth = max(len(parts) for parts, _ in entries)
|
|
37
|
+
|
|
38
|
+
# If all keys are single-part (unqualified), return as-is
|
|
39
|
+
if max_depth == 1:
|
|
40
|
+
return schema # type: ignore[return-value]
|
|
41
|
+
|
|
42
|
+
# Pad shorter keys with empty-string prefixes to match max depth
|
|
43
|
+
nested: Dict[str, object] = {}
|
|
44
|
+
for parts, cols in entries:
|
|
45
|
+
while len(parts) < max_depth:
|
|
46
|
+
parts.insert(0, "")
|
|
47
|
+
d: Dict[str, object] = nested
|
|
48
|
+
for part in parts[:-1]:
|
|
49
|
+
if part not in d:
|
|
50
|
+
d[part] = {}
|
|
51
|
+
d = d[part] # type: ignore[assignment]
|
|
52
|
+
d[parts[-1]] = cols
|
|
53
|
+
return nested
|
|
54
|
+
|
|
55
|
+
|
|
14
56
|
class StarResolutionError(Exception):
|
|
15
57
|
"""Raised when SELECT * cannot be resolved and no_star mode is enabled."""
|
|
16
58
|
|
|
@@ -860,8 +902,10 @@ class LineageAnalyzer:
|
|
|
860
902
|
current_query_sql = self.expr.sql(dialect=self.dialect)
|
|
861
903
|
|
|
862
904
|
# Prune schema to only tables referenced in this query to avoid
|
|
863
|
-
# sqlglot.lineage() performance degradation with large schema dicts
|
|
864
|
-
|
|
905
|
+
# sqlglot.lineage() performance degradation with large schema dicts.
|
|
906
|
+
# Then convert from flat dot-notation keys to the nested dict structure
|
|
907
|
+
# that sqlglot's MappingSchema expects.
|
|
908
|
+
lineage_schema: Optional[Dict[str, object]] = None
|
|
865
909
|
if self._file_schema:
|
|
866
910
|
referenced = {t.lower() for t in self._get_query_tables()}
|
|
867
911
|
pruned_schema = {
|
|
@@ -869,8 +913,8 @@ class LineageAnalyzer:
|
|
|
869
913
|
for table, cols in self._file_schema.items()
|
|
870
914
|
if table.lower() in referenced
|
|
871
915
|
}
|
|
872
|
-
if
|
|
873
|
-
|
|
916
|
+
if pruned_schema:
|
|
917
|
+
lineage_schema = _flat_schema_to_nested(pruned_schema)
|
|
874
918
|
|
|
875
919
|
for col in columns_to_analyze:
|
|
876
920
|
try:
|
|
@@ -883,7 +927,7 @@ class LineageAnalyzer:
|
|
|
883
927
|
lineage_col,
|
|
884
928
|
current_query_sql,
|
|
885
929
|
dialect=self.dialect,
|
|
886
|
-
schema=
|
|
930
|
+
schema=lineage_schema,
|
|
887
931
|
)
|
|
888
932
|
|
|
889
933
|
# Collect all source columns
|
|
@@ -1133,7 +1177,7 @@ class LineageAnalyzer:
|
|
|
1133
1177
|
if table.db:
|
|
1134
1178
|
parts.append(table.db)
|
|
1135
1179
|
parts.append(table.name)
|
|
1136
|
-
return ".".join(parts)
|
|
1180
|
+
return ".".join(parts).lower()
|
|
1137
1181
|
|
|
1138
1182
|
def _resolve_table_reference(self, ref: str, select_node: exp.Select) -> str:
|
|
1139
1183
|
"""
|
|
@@ -1478,7 +1522,7 @@ class LineageAnalyzer:
|
|
|
1478
1522
|
|
|
1479
1523
|
if columns:
|
|
1480
1524
|
# Store with UNKNOWN type - SQLGlot only needs column names for expansion
|
|
1481
|
-
self._file_schema[target_name] = {col: "UNKNOWN" for col in columns}
|
|
1525
|
+
self._file_schema[target_name] = {col.lower(): "UNKNOWN" for col in columns}
|
|
1482
1526
|
|
|
1483
1527
|
def _extract_schema_from_dql(self, expr: exp.Expression) -> None:
|
|
1484
1528
|
"""Infer table schemas from column references in DQL.
|
|
@@ -1590,8 +1634,9 @@ class LineageAnalyzer:
|
|
|
1590
1634
|
|
|
1591
1635
|
if actual_table not in self._file_schema:
|
|
1592
1636
|
self._file_schema[actual_table] = {}
|
|
1593
|
-
|
|
1594
|
-
|
|
1637
|
+
col_lower = col_name.lower()
|
|
1638
|
+
if col_lower not in self._file_schema[actual_table]:
|
|
1639
|
+
self._file_schema[actual_table][col_lower] = "UNKNOWN"
|
|
1595
1640
|
|
|
1596
1641
|
def _extract_columns_from_select(
|
|
1597
1642
|
self, select_node: Union[exp.Select, exp.Union, exp.Intersect, exp.Except]
|
|
@@ -41,7 +41,14 @@ def extract_schemas_from_files(
|
|
|
41
41
|
if console is None:
|
|
42
42
|
console = Console(stderr=True)
|
|
43
43
|
|
|
44
|
-
schema: SchemaDict =
|
|
44
|
+
schema: SchemaDict = (
|
|
45
|
+
{
|
|
46
|
+
k.lower(): {c.lower(): v for c, v in cols.items()}
|
|
47
|
+
for k, cols in initial_schema.items()
|
|
48
|
+
}
|
|
49
|
+
if initial_schema
|
|
50
|
+
else {}
|
|
51
|
+
)
|
|
45
52
|
total = len(file_paths)
|
|
46
53
|
|
|
47
54
|
with Progress(
|
|
@@ -65,7 +72,11 @@ def extract_schemas_from_files(
|
|
|
65
72
|
strict_schema=strict_schema,
|
|
66
73
|
)
|
|
67
74
|
file_schema = analyzer.extract_schema_only()
|
|
68
|
-
|
|
75
|
+
for table_name, columns in file_schema.items():
|
|
76
|
+
if table_name in schema:
|
|
77
|
+
schema[table_name].update(columns)
|
|
78
|
+
else:
|
|
79
|
+
schema[table_name] = columns
|
|
69
80
|
except SchemaResolutionError:
|
|
70
81
|
raise
|
|
71
82
|
except Exception:
|
|
@@ -46,7 +46,7 @@ def parse_ddl_to_schema(ddl: str, dialect: str = "spark") -> Dict[str, Dict[str,
|
|
|
46
46
|
table_name = _get_qualified_name(target)
|
|
47
47
|
|
|
48
48
|
if columns:
|
|
49
|
-
schema[table_name] = {col: "UNKNOWN" for col in columns}
|
|
49
|
+
schema[table_name] = {col.lower(): "UNKNOWN" for col in columns}
|
|
50
50
|
|
|
51
51
|
return schema
|
|
52
52
|
|
|
@@ -59,4 +59,4 @@ def _get_qualified_name(table: exp.Table) -> str:
|
|
|
59
59
|
if table.db:
|
|
60
60
|
parts.append(table.db)
|
|
61
61
|
parts.append(table.name)
|
|
62
|
-
return ".".join(parts)
|
|
62
|
+
return ".".join(parts).lower()
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
5
|
from sqlglider.global_models import AnalysisLevel
|
|
6
|
-
from sqlglider.lineage.analyzer import
|
|
6
|
+
from sqlglider.lineage.analyzer import (
|
|
7
|
+
LineageAnalyzer,
|
|
8
|
+
StarResolutionError,
|
|
9
|
+
_flat_schema_to_nested,
|
|
10
|
+
)
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class TestCaseInsensitiveForwardLineage:
|
|
@@ -115,39 +119,39 @@ class TestCaseInsensitiveForwardLineage:
|
|
|
115
119
|
# Lowercase
|
|
116
120
|
(
|
|
117
121
|
"target_table.customer_name",
|
|
118
|
-
"
|
|
122
|
+
"target_table.customer_name",
|
|
119
123
|
["customers.customer_name"],
|
|
120
124
|
),
|
|
121
125
|
(
|
|
122
126
|
"target_table.region",
|
|
123
|
-
"
|
|
127
|
+
"target_table.region",
|
|
124
128
|
["customers.region"],
|
|
125
129
|
),
|
|
126
130
|
(
|
|
127
131
|
"target_table.total_amount",
|
|
128
|
-
"
|
|
132
|
+
"target_table.total_amount",
|
|
129
133
|
["orders.order_amount"],
|
|
130
134
|
),
|
|
131
135
|
# Uppercase
|
|
132
136
|
(
|
|
133
|
-
"
|
|
134
|
-
"
|
|
137
|
+
"target_table.CUSTOMER_NAME",
|
|
138
|
+
"target_table.customer_name",
|
|
135
139
|
["customers.customer_name"],
|
|
136
140
|
),
|
|
137
141
|
(
|
|
138
|
-
"
|
|
139
|
-
"
|
|
142
|
+
"target_table.REGION",
|
|
143
|
+
"target_table.region",
|
|
140
144
|
["customers.region"],
|
|
141
145
|
),
|
|
142
146
|
# Mixed case
|
|
143
147
|
(
|
|
144
148
|
"TaRgEt_TaBlE.CuStOmEr_NaMe",
|
|
145
|
-
"
|
|
149
|
+
"target_table.customer_name",
|
|
146
150
|
["customers.customer_name"],
|
|
147
151
|
),
|
|
148
152
|
(
|
|
149
153
|
"target_TABLE.REGION",
|
|
150
|
-
"
|
|
154
|
+
"target_table.region",
|
|
151
155
|
["customers.region"],
|
|
152
156
|
),
|
|
153
157
|
],
|
|
@@ -342,39 +346,39 @@ class TestCaseInsensitiveReverseLineage:
|
|
|
342
346
|
(
|
|
343
347
|
"customers.customer_name",
|
|
344
348
|
"customers.customer_name",
|
|
345
|
-
["
|
|
349
|
+
["target_table.customer_name"],
|
|
346
350
|
),
|
|
347
351
|
(
|
|
348
352
|
"customers.region",
|
|
349
353
|
"customers.region",
|
|
350
|
-
["
|
|
354
|
+
["target_table.region"],
|
|
351
355
|
),
|
|
352
356
|
(
|
|
353
357
|
"orders.order_amount",
|
|
354
358
|
"orders.order_amount",
|
|
355
|
-
["
|
|
359
|
+
["target_table.segment", "target_table.total_amount"],
|
|
356
360
|
),
|
|
357
361
|
# Uppercase
|
|
358
362
|
(
|
|
359
363
|
"CUSTOMERS.CUSTOMER_NAME",
|
|
360
364
|
"customers.customer_name",
|
|
361
|
-
["
|
|
365
|
+
["target_table.customer_name"],
|
|
362
366
|
),
|
|
363
367
|
(
|
|
364
368
|
"CUSTOMERS.REGION",
|
|
365
369
|
"customers.region",
|
|
366
|
-
["
|
|
370
|
+
["target_table.region"],
|
|
367
371
|
),
|
|
368
372
|
# Mixed case
|
|
369
373
|
(
|
|
370
374
|
"CuStOmErS.CuStOmEr_NaMe",
|
|
371
375
|
"customers.customer_name",
|
|
372
|
-
["
|
|
376
|
+
["target_table.customer_name"],
|
|
373
377
|
),
|
|
374
378
|
(
|
|
375
379
|
"cUsToMeRs.ReGiOn",
|
|
376
380
|
"customers.region",
|
|
377
|
-
["
|
|
381
|
+
["target_table.region"],
|
|
378
382
|
),
|
|
379
383
|
],
|
|
380
384
|
)
|
|
@@ -3181,3 +3185,97 @@ class TestSchemaPruning:
|
|
|
3181
3185
|
output_names = {item.output_name for r in results for item in r.lineage_items}
|
|
3182
3186
|
assert "id" in output_names
|
|
3183
3187
|
assert "email" in output_names
|
|
3188
|
+
|
|
3189
|
+
|
|
3190
|
+
class TestFlatSchemaToNested:
|
|
3191
|
+
"""Tests for _flat_schema_to_nested conversion utility."""
|
|
3192
|
+
|
|
3193
|
+
def test_empty(self):
|
|
3194
|
+
assert _flat_schema_to_nested({}) == {}
|
|
3195
|
+
|
|
3196
|
+
def test_unqualified_passthrough(self):
|
|
3197
|
+
schema = {"users": {"id": "UNKNOWN"}}
|
|
3198
|
+
assert _flat_schema_to_nested(schema) == schema
|
|
3199
|
+
|
|
3200
|
+
def test_two_part_keys(self):
|
|
3201
|
+
schema = {"db.users": {"id": "UNKNOWN"}}
|
|
3202
|
+
result = _flat_schema_to_nested(schema)
|
|
3203
|
+
assert result == {"db": {"users": {"id": "UNKNOWN"}}}
|
|
3204
|
+
|
|
3205
|
+
def test_three_part_keys(self):
|
|
3206
|
+
schema = {"cat.db.users": {"id": "UNKNOWN"}}
|
|
3207
|
+
result = _flat_schema_to_nested(schema)
|
|
3208
|
+
assert result == {"cat": {"db": {"users": {"id": "UNKNOWN"}}}}
|
|
3209
|
+
|
|
3210
|
+
def test_mixed_depth_pads_shorter_keys(self):
|
|
3211
|
+
schema = {
|
|
3212
|
+
"my_view": {"x": "UNKNOWN"},
|
|
3213
|
+
"db.users": {"id": "UNKNOWN"},
|
|
3214
|
+
}
|
|
3215
|
+
result = _flat_schema_to_nested(schema)
|
|
3216
|
+
assert result == {
|
|
3217
|
+
"": {"my_view": {"x": "UNKNOWN"}},
|
|
3218
|
+
"db": {"users": {"id": "UNKNOWN"}},
|
|
3219
|
+
}
|
|
3220
|
+
|
|
3221
|
+
|
|
3222
|
+
class TestQualifiedSchemaKeys:
|
|
3223
|
+
"""Tests for schema with qualified (dotted) table names."""
|
|
3224
|
+
|
|
3225
|
+
def test_qualified_star_expansion(self):
|
|
3226
|
+
"""SELECT * resolves correctly with qualified schema keys."""
|
|
3227
|
+
sql = "SELECT * FROM mydb.users"
|
|
3228
|
+
schema = {"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
|
|
3229
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3230
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3231
|
+
items = {
|
|
3232
|
+
(item.source_name, item.output_name)
|
|
3233
|
+
for r in results
|
|
3234
|
+
for item in r.lineage_items
|
|
3235
|
+
}
|
|
3236
|
+
assert ("mydb.users.id", "id") in items
|
|
3237
|
+
assert ("mydb.users.name", "name") in items
|
|
3238
|
+
|
|
3239
|
+
def test_qualified_explicit_columns(self):
|
|
3240
|
+
"""Explicit columns trace sources correctly with qualified schema keys."""
|
|
3241
|
+
sql = "SELECT id, name FROM mydb.users"
|
|
3242
|
+
schema = {"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
|
|
3243
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3244
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3245
|
+
items = {
|
|
3246
|
+
(item.source_name, item.output_name)
|
|
3247
|
+
for r in results
|
|
3248
|
+
for item in r.lineage_items
|
|
3249
|
+
}
|
|
3250
|
+
assert ("mydb.users.id", "mydb.users.id") in items
|
|
3251
|
+
assert ("mydb.users.name", "mydb.users.name") in items
|
|
3252
|
+
|
|
3253
|
+
def test_three_part_qualified(self):
|
|
3254
|
+
"""3-part qualified names (catalog.db.table) work correctly."""
|
|
3255
|
+
sql = "SELECT id FROM catalog.mydb.users"
|
|
3256
|
+
schema = {"catalog.mydb.users": {"id": "UNKNOWN"}}
|
|
3257
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3258
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3259
|
+
items = [
|
|
3260
|
+
(item.source_name, item.output_name)
|
|
3261
|
+
for r in results
|
|
3262
|
+
for item in r.lineage_items
|
|
3263
|
+
]
|
|
3264
|
+
assert len(items) == 1
|
|
3265
|
+
assert items[0] == ("catalog.mydb.users.id", "catalog.mydb.users.id")
|
|
3266
|
+
|
|
3267
|
+
def test_mixed_qualified_and_unqualified(self):
|
|
3268
|
+
"""Mix of qualified and unqualified table names in schema."""
|
|
3269
|
+
sql = "SELECT * FROM my_view"
|
|
3270
|
+
schema = {
|
|
3271
|
+
"my_view": {"id": "UNKNOWN"},
|
|
3272
|
+
"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"},
|
|
3273
|
+
}
|
|
3274
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3275
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3276
|
+
items = {
|
|
3277
|
+
(item.source_name, item.output_name)
|
|
3278
|
+
for r in results
|
|
3279
|
+
for item in r.lineage_items
|
|
3280
|
+
}
|
|
3281
|
+
assert ("my_view.id", "id") in items
|
|
@@ -81,6 +81,58 @@ class TestExtractSchemasFromFiles:
|
|
|
81
81
|
assert "id" in schema["customers"]
|
|
82
82
|
assert "order_id" in schema["orders"]
|
|
83
83
|
|
|
84
|
+
def test_merges_columns_for_same_table(self, tmp_path, console):
|
|
85
|
+
"""Test that columns are merged when the same table appears in multiple files."""
|
|
86
|
+
file1 = tmp_path / "a.sql"
|
|
87
|
+
file1.write_text("SELECT c.id, c.name FROM customers c;")
|
|
88
|
+
|
|
89
|
+
file2 = tmp_path / "b.sql"
|
|
90
|
+
file2.write_text("SELECT c.id, c.age FROM customers c;")
|
|
91
|
+
|
|
92
|
+
schema = extract_schemas_from_files(
|
|
93
|
+
[file1, file2], dialect="spark", console=console
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
assert "customers" in schema
|
|
97
|
+
assert "id" in schema["customers"]
|
|
98
|
+
assert "name" in schema["customers"]
|
|
99
|
+
assert "age" in schema["customers"]
|
|
100
|
+
|
|
101
|
+
def test_merges_columns_case_insensitive(self, tmp_path, console):
|
|
102
|
+
"""Test that tables with different casing are merged into one entry."""
|
|
103
|
+
file1 = tmp_path / "a.sql"
|
|
104
|
+
file1.write_text("SELECT c.id, c.name FROM Customers c;")
|
|
105
|
+
|
|
106
|
+
file2 = tmp_path / "b.sql"
|
|
107
|
+
file2.write_text("SELECT c.id, c.AGE FROM customers c;")
|
|
108
|
+
|
|
109
|
+
schema = extract_schemas_from_files(
|
|
110
|
+
[file1, file2], dialect="spark", console=console
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
assert "customers" in schema
|
|
114
|
+
assert len([k for k in schema if k.lower() == "customers"]) == 1
|
|
115
|
+
assert "id" in schema["customers"]
|
|
116
|
+
assert "name" in schema["customers"]
|
|
117
|
+
assert "age" in schema["customers"]
|
|
118
|
+
|
|
119
|
+
def test_initial_schema_normalized(self, tmp_path, console):
|
|
120
|
+
"""Test that initial schema keys are normalized to lowercase."""
|
|
121
|
+
sql_file = tmp_path / "query.sql"
|
|
122
|
+
sql_file.write_text("SELECT o.id FROM orders o;")
|
|
123
|
+
|
|
124
|
+
initial = {"Existing_Table": {"Col1": "UNKNOWN"}}
|
|
125
|
+
schema = extract_schemas_from_files(
|
|
126
|
+
[sql_file],
|
|
127
|
+
dialect="spark",
|
|
128
|
+
initial_schema=initial,
|
|
129
|
+
console=console,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
assert "existing_table" in schema
|
|
133
|
+
assert "col1" in schema["existing_table"]
|
|
134
|
+
assert "orders" in schema
|
|
135
|
+
|
|
84
136
|
def test_initial_schema_preserved(self, tmp_path, console):
|
|
85
137
|
"""Test that initial schema is included in result."""
|
|
86
138
|
sql_file = tmp_path / "query.sql"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/business/update_dim_customer_metrics.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/incremental/incr_fact_payments.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/incremental/incr_pres_sales_summary.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/maintenance/delete_expired_customers.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/maintenance/update_product_status.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_customer_360.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_customer_cohort.sql
RENAMED
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/sample_data_model/presentation/load_pres_sales_summary.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/analytics_pipeline.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/multi_file_queries/view_based_merge.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_cte_view_star.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_multi_query.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_single_query.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.15 → sql_glider-0.1.18}/tests/fixtures/original_queries/test_view_window_cte.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|