sql-glider 0.1.14__tar.gz → 0.1.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.14 → sql_glider-0.1.16}/PKG-INFO +1 -1
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/_version.py +2 -2
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/cli.py +32 -1
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/builder.py +16 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/formatters.py +92 -1
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/lineage/analyzer.py +49 -5
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/schema/extractor.py +3 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_formatters.py +62 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/lineage/test_analyzer.py +99 -1
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/test_cli.py +138 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/.github/workflows/ci.yml +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/.github/workflows/publish.yml +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/.gitignore +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/.python-version +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/ARCHITECTURE.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/CLAUDE.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/LICENSE +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/README.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-05-column-level-lineage.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-05-reverse-lineage.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-06-config-file-support.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-06-graph-lineage.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-06-unify-single-multi-query.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-07-sample-data-model.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-07-sql-templating.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-08-tables-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-09-graph-query-paths.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-13-dissect-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-14-tables-pull-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-no-star-flag.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-resolve-schema.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-tables-scrape-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/pyproject.toml +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/README.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/expire_dim_customer.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/load_fact_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/load_fact_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/merge_dim_customer.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/merge_dim_product.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/complex/conditional_merge.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/complex/cte_insert.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/complex/multi_table_transform.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/dim_customer.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/dim_product.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/fact_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/fact_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_addresses.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_order_items.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_products.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_products.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/maintenance/update_product_status.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_products.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/sqlglider.toml.example +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/databricks.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/analyzer.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/global_models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/merge.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/query.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/serialization.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/lineage/formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/schema/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/jinja.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/variables.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/config.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/file_utils.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/schema.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/reports.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_multi.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_single_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_subquery.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_tables.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_view.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/sample_manifest.csv +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/test_base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/test_databricks.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/test_registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/test_analyzer.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/test_formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/test_models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_builder.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_merge.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_query.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_serialization.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/lineage/test_formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/schema/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/schema/test_extractor.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_jinja.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_variables.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/test_config.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/test_file_utils.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/test_schema.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.16}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.16
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.16'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 16)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -171,6 +171,12 @@ def lineage(
|
|
|
171
171
|
"--no-star",
|
|
172
172
|
help="Fail if SELECT * cannot be resolved to actual columns",
|
|
173
173
|
),
|
|
174
|
+
provide_schema: Optional[Path] = typer.Option(
|
|
175
|
+
None,
|
|
176
|
+
"--provide-schema",
|
|
177
|
+
exists=True,
|
|
178
|
+
help="Path to a schema file (JSON, CSV, or text) for star resolution",
|
|
179
|
+
),
|
|
174
180
|
) -> None:
|
|
175
181
|
"""
|
|
176
182
|
Analyze column or table lineage for a SQL file.
|
|
@@ -266,8 +272,15 @@ def lineage(
|
|
|
266
272
|
source_path=source_path,
|
|
267
273
|
)
|
|
268
274
|
|
|
275
|
+
# Load provided schema if specified
|
|
276
|
+
schema = None
|
|
277
|
+
if provide_schema:
|
|
278
|
+
from sqlglider.graph.formatters import load_schema_file
|
|
279
|
+
|
|
280
|
+
schema = load_schema_file(provide_schema)
|
|
281
|
+
|
|
269
282
|
# Create analyzer
|
|
270
|
-
analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
|
|
283
|
+
analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star, schema=schema)
|
|
271
284
|
|
|
272
285
|
# Unified lineage analysis (handles both single and multi-query files)
|
|
273
286
|
results = analyzer.analyze_queries(
|
|
@@ -1292,6 +1305,13 @@ def graph_build(
|
|
|
1292
1305
|
"--dump-schema-format",
|
|
1293
1306
|
help="Format for dumped schema: 'text' (default), 'json', or 'csv'",
|
|
1294
1307
|
),
|
|
1308
|
+
provide_schema: Optional[Path] = typer.Option(
|
|
1309
|
+
None,
|
|
1310
|
+
"--provide-schema",
|
|
1311
|
+
exists=True,
|
|
1312
|
+
help="Path to a schema file (JSON, CSV, or text) to use for star resolution. "
|
|
1313
|
+
"Can be combined with --resolve-schema to merge file-extracted schema on top.",
|
|
1314
|
+
),
|
|
1295
1315
|
strict_schema: bool = typer.Option(
|
|
1296
1316
|
False,
|
|
1297
1317
|
"--strict-schema",
|
|
@@ -1434,6 +1454,17 @@ def graph_build(
|
|
|
1434
1454
|
strict_schema=strict_schema,
|
|
1435
1455
|
)
|
|
1436
1456
|
|
|
1457
|
+
# Load provided schema file if specified
|
|
1458
|
+
if provide_schema:
|
|
1459
|
+
from sqlglider.graph.formatters import load_schema_file
|
|
1460
|
+
|
|
1461
|
+
loaded_schema = load_schema_file(provide_schema)
|
|
1462
|
+
builder.set_schema(loaded_schema)
|
|
1463
|
+
console.print(
|
|
1464
|
+
f"[green]Loaded schema from {provide_schema} "
|
|
1465
|
+
f"({len(loaded_schema)} table(s))[/green]"
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1437
1468
|
# Collect file paths for schema extraction
|
|
1438
1469
|
manifest_files, path_files = _collect_sql_files(
|
|
1439
1470
|
paths, manifest, recursive, glob_pattern
|
|
@@ -303,6 +303,21 @@ class GraphBuilder:
|
|
|
303
303
|
self.add_file(file_path, dialect)
|
|
304
304
|
return self
|
|
305
305
|
|
|
306
|
+
def set_schema(self, schema: Dict[str, Dict[str, str]]) -> "GraphBuilder":
|
|
307
|
+
"""Pre-seed the resolved schema from an external source.
|
|
308
|
+
|
|
309
|
+
This allows skipping the schema extraction pass when the schema
|
|
310
|
+
is already known (e.g., loaded from a file).
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
schema: Schema dictionary mapping table names to column dicts.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
self for method chaining
|
|
317
|
+
"""
|
|
318
|
+
self._resolved_schema = schema
|
|
319
|
+
return self
|
|
320
|
+
|
|
306
321
|
def extract_schemas(
|
|
307
322
|
self,
|
|
308
323
|
file_paths: List[Path],
|
|
@@ -325,6 +340,7 @@ class GraphBuilder:
|
|
|
325
340
|
file_paths,
|
|
326
341
|
dialect=file_dialect,
|
|
327
342
|
sql_preprocessor=self.sql_preprocessor,
|
|
343
|
+
initial_schema=self._resolved_schema if self._resolved_schema else None,
|
|
328
344
|
strict_schema=self.strict_schema,
|
|
329
345
|
catalog_type=self.catalog_type,
|
|
330
346
|
catalog_config=self.catalog_config,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
"""Output formatters for resolved schema data."""
|
|
1
|
+
"""Output formatters and parsers for resolved schema data."""
|
|
2
2
|
|
|
3
3
|
import csv
|
|
4
4
|
import json
|
|
5
5
|
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import Dict
|
|
7
8
|
|
|
8
9
|
SchemaDict = Dict[str, Dict[str, str]]
|
|
@@ -96,3 +97,93 @@ def format_schema(schema: SchemaDict, output_format: str = "text") -> str:
|
|
|
96
97
|
f"Invalid schema format '{output_format}'. Use 'text', 'json', or 'csv'."
|
|
97
98
|
)
|
|
98
99
|
return formatter(schema)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_schema_json(content: str) -> SchemaDict:
|
|
103
|
+
"""Parse schema from JSON format.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
content: JSON string with table -> {column -> type} structure.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Parsed schema dictionary.
|
|
110
|
+
"""
|
|
111
|
+
return json.loads(content) # type: ignore[no-any-return]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def parse_schema_csv(content: str) -> SchemaDict:
|
|
115
|
+
"""Parse schema from CSV format.
|
|
116
|
+
|
|
117
|
+
Expects columns: table, column, type.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
content: CSV string with header row.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Parsed schema dictionary.
|
|
124
|
+
"""
|
|
125
|
+
schema: SchemaDict = {}
|
|
126
|
+
reader = csv.DictReader(StringIO(content))
|
|
127
|
+
for row in reader:
|
|
128
|
+
table = row["table"]
|
|
129
|
+
column = row["column"]
|
|
130
|
+
col_type = row.get("type", "UNKNOWN")
|
|
131
|
+
if table not in schema:
|
|
132
|
+
schema[table] = {}
|
|
133
|
+
schema[table][column] = col_type
|
|
134
|
+
return schema
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def parse_schema_text(content: str) -> SchemaDict:
|
|
138
|
+
"""Parse schema from indented text format.
|
|
139
|
+
|
|
140
|
+
Expected format:
|
|
141
|
+
table_name
|
|
142
|
+
column1
|
|
143
|
+
column2
|
|
144
|
+
|
|
145
|
+
other_table
|
|
146
|
+
col_a
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
content: Text-formatted schema string.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Parsed schema dictionary.
|
|
153
|
+
"""
|
|
154
|
+
schema: SchemaDict = {}
|
|
155
|
+
current_table: str | None = None
|
|
156
|
+
for line in content.splitlines():
|
|
157
|
+
if not line or not line.strip():
|
|
158
|
+
continue
|
|
159
|
+
if line.startswith(" "):
|
|
160
|
+
if current_table is not None:
|
|
161
|
+
schema[current_table][line.strip()] = "UNKNOWN"
|
|
162
|
+
else:
|
|
163
|
+
current_table = line.strip()
|
|
164
|
+
schema[current_table] = {}
|
|
165
|
+
return schema
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def load_schema_file(path: Path) -> SchemaDict:
|
|
169
|
+
"""Load a schema file, auto-detecting format from extension.
|
|
170
|
+
|
|
171
|
+
`.json` → JSON, `.csv` → CSV, otherwise text.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
path: Path to schema file.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Parsed schema dictionary.
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
FileNotFoundError: If the file does not exist.
|
|
181
|
+
"""
|
|
182
|
+
content = path.read_text(encoding="utf-8")
|
|
183
|
+
suffix = path.suffix.lower()
|
|
184
|
+
if suffix == ".json":
|
|
185
|
+
return parse_schema_json(content)
|
|
186
|
+
elif suffix == ".csv":
|
|
187
|
+
return parse_schema_csv(content)
|
|
188
|
+
else:
|
|
189
|
+
return parse_schema_text(content)
|
|
@@ -11,6 +11,48 @@ from sqlglot.lineage import Node, lineage
|
|
|
11
11
|
from sqlglider.global_models import AnalysisLevel
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def _flat_schema_to_nested(
|
|
15
|
+
schema: Dict[str, Dict[str, str]],
|
|
16
|
+
) -> Dict[str, object]:
|
|
17
|
+
"""Convert flat dot-notation schema keys to the nested dict structure sqlglot expects.
|
|
18
|
+
|
|
19
|
+
sqlglot's MappingSchema requires consistent nesting depth across all tables.
|
|
20
|
+
Flat keys like ``"db.table"`` are split on dots and nested accordingly.
|
|
21
|
+
Shorter keys are padded with empty-string prefixes to match the max depth.
|
|
22
|
+
|
|
23
|
+
Examples::
|
|
24
|
+
|
|
25
|
+
{"users": {"id": "UNKNOWN"}}
|
|
26
|
+
→ {"users": {"id": "UNKNOWN"}} (depth 1, no change)
|
|
27
|
+
|
|
28
|
+
{"db.users": {"id": "UNKNOWN"}, "my_view": {"x": "UNKNOWN"}}
|
|
29
|
+
→ {"db": {"users": {"id": "UNKNOWN"}}, "": {"my_view": {"x": "UNKNOWN"}}}
|
|
30
|
+
"""
|
|
31
|
+
if not schema:
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
# Split all keys into parts
|
|
35
|
+
entries = [(key.split("."), cols) for key, cols in schema.items()]
|
|
36
|
+
max_depth = max(len(parts) for parts, _ in entries)
|
|
37
|
+
|
|
38
|
+
# If all keys are single-part (unqualified), return as-is
|
|
39
|
+
if max_depth == 1:
|
|
40
|
+
return schema # type: ignore[return-value]
|
|
41
|
+
|
|
42
|
+
# Pad shorter keys with empty-string prefixes to match max depth
|
|
43
|
+
nested: Dict[str, object] = {}
|
|
44
|
+
for parts, cols in entries:
|
|
45
|
+
while len(parts) < max_depth:
|
|
46
|
+
parts.insert(0, "")
|
|
47
|
+
d: Dict[str, object] = nested
|
|
48
|
+
for part in parts[:-1]:
|
|
49
|
+
if part not in d:
|
|
50
|
+
d[part] = {}
|
|
51
|
+
d = d[part] # type: ignore[assignment]
|
|
52
|
+
d[parts[-1]] = cols
|
|
53
|
+
return nested
|
|
54
|
+
|
|
55
|
+
|
|
14
56
|
class StarResolutionError(Exception):
|
|
15
57
|
"""Raised when SELECT * cannot be resolved and no_star mode is enabled."""
|
|
16
58
|
|
|
@@ -860,8 +902,10 @@ class LineageAnalyzer:
|
|
|
860
902
|
current_query_sql = self.expr.sql(dialect=self.dialect)
|
|
861
903
|
|
|
862
904
|
# Prune schema to only tables referenced in this query to avoid
|
|
863
|
-
# sqlglot.lineage() performance degradation with large schema dicts
|
|
864
|
-
|
|
905
|
+
# sqlglot.lineage() performance degradation with large schema dicts.
|
|
906
|
+
# Then convert from flat dot-notation keys to the nested dict structure
|
|
907
|
+
# that sqlglot's MappingSchema expects.
|
|
908
|
+
lineage_schema: Optional[Dict[str, object]] = None
|
|
865
909
|
if self._file_schema:
|
|
866
910
|
referenced = {t.lower() for t in self._get_query_tables()}
|
|
867
911
|
pruned_schema = {
|
|
@@ -869,8 +913,8 @@ class LineageAnalyzer:
|
|
|
869
913
|
for table, cols in self._file_schema.items()
|
|
870
914
|
if table.lower() in referenced
|
|
871
915
|
}
|
|
872
|
-
if
|
|
873
|
-
|
|
916
|
+
if pruned_schema:
|
|
917
|
+
lineage_schema = _flat_schema_to_nested(pruned_schema)
|
|
874
918
|
|
|
875
919
|
for col in columns_to_analyze:
|
|
876
920
|
try:
|
|
@@ -883,7 +927,7 @@ class LineageAnalyzer:
|
|
|
883
927
|
lineage_col,
|
|
884
928
|
current_query_sql,
|
|
885
929
|
dialect=self.dialect,
|
|
886
|
-
schema=
|
|
930
|
+
schema=lineage_schema,
|
|
887
931
|
)
|
|
888
932
|
|
|
889
933
|
# Collect all source columns
|
|
@@ -153,6 +153,7 @@ def extract_and_resolve_schema(
|
|
|
153
153
|
file_paths: List[Path],
|
|
154
154
|
dialect: str = "spark",
|
|
155
155
|
sql_preprocessor: Optional[SqlPreprocessor] = None,
|
|
156
|
+
initial_schema: Optional[SchemaDict] = None,
|
|
156
157
|
strict_schema: bool = False,
|
|
157
158
|
catalog_type: Optional[str] = None,
|
|
158
159
|
catalog_config: Optional[Dict[str, object]] = None,
|
|
@@ -167,6 +168,7 @@ def extract_and_resolve_schema(
|
|
|
167
168
|
file_paths: SQL files to extract schema from.
|
|
168
169
|
dialect: SQL dialect.
|
|
169
170
|
sql_preprocessor: Optional SQL preprocessor.
|
|
171
|
+
initial_schema: Optional starting schema to build upon.
|
|
170
172
|
strict_schema: If True, fail on ambiguous column attribution.
|
|
171
173
|
catalog_type: Optional catalog provider name.
|
|
172
174
|
catalog_config: Optional provider-specific configuration dict.
|
|
@@ -183,6 +185,7 @@ def extract_and_resolve_schema(
|
|
|
183
185
|
file_paths,
|
|
184
186
|
dialect=dialect,
|
|
185
187
|
sql_preprocessor=sql_preprocessor,
|
|
188
|
+
initial_schema=initial_schema,
|
|
186
189
|
strict_schema=strict_schema,
|
|
187
190
|
console=console,
|
|
188
191
|
)
|
|
@@ -7,6 +7,10 @@ from sqlglider.graph.formatters import (
|
|
|
7
7
|
format_schema_csv,
|
|
8
8
|
format_schema_json,
|
|
9
9
|
format_schema_text,
|
|
10
|
+
load_schema_file,
|
|
11
|
+
parse_schema_csv,
|
|
12
|
+
parse_schema_json,
|
|
13
|
+
parse_schema_text,
|
|
10
14
|
)
|
|
11
15
|
|
|
12
16
|
|
|
@@ -84,3 +88,61 @@ class TestFormatSchema:
|
|
|
84
88
|
def test_invalid_format(self, sample_schema):
|
|
85
89
|
with pytest.raises(ValueError, match="Invalid schema format"):
|
|
86
90
|
format_schema(sample_schema, "xml")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TestParseSchemaJson:
|
|
94
|
+
def test_round_trip(self, sample_schema):
|
|
95
|
+
content = format_schema_json(sample_schema)
|
|
96
|
+
parsed = parse_schema_json(content)
|
|
97
|
+
assert parsed == sample_schema
|
|
98
|
+
|
|
99
|
+
def test_empty(self):
|
|
100
|
+
assert parse_schema_json("{}") == {}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TestParseSchemaCsv:
|
|
104
|
+
def test_round_trip(self, sample_schema):
|
|
105
|
+
content = format_schema_csv(sample_schema)
|
|
106
|
+
parsed = parse_schema_csv(content)
|
|
107
|
+
assert parsed == sample_schema
|
|
108
|
+
|
|
109
|
+
def test_empty(self):
|
|
110
|
+
parsed = parse_schema_csv("table,column,type\n")
|
|
111
|
+
assert parsed == {}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class TestParseSchemaText:
|
|
115
|
+
def test_round_trip(self, sample_schema):
|
|
116
|
+
content = format_schema_text(sample_schema)
|
|
117
|
+
parsed = parse_schema_text(content)
|
|
118
|
+
assert parsed == sample_schema
|
|
119
|
+
|
|
120
|
+
def test_empty(self):
|
|
121
|
+
assert parse_schema_text("") == {}
|
|
122
|
+
|
|
123
|
+
def test_single_table(self):
|
|
124
|
+
content = "users\n id\n name\n"
|
|
125
|
+
parsed = parse_schema_text(content)
|
|
126
|
+
assert parsed == {"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TestLoadSchemaFile:
|
|
130
|
+
def test_json_extension(self, tmp_path, sample_schema):
|
|
131
|
+
f = tmp_path / "schema.json"
|
|
132
|
+
f.write_text(format_schema_json(sample_schema))
|
|
133
|
+
assert load_schema_file(f) == sample_schema
|
|
134
|
+
|
|
135
|
+
def test_csv_extension(self, tmp_path, sample_schema):
|
|
136
|
+
f = tmp_path / "schema.csv"
|
|
137
|
+
f.write_text(format_schema_csv(sample_schema))
|
|
138
|
+
assert load_schema_file(f) == sample_schema
|
|
139
|
+
|
|
140
|
+
def test_txt_extension(self, tmp_path, sample_schema):
|
|
141
|
+
f = tmp_path / "schema.txt"
|
|
142
|
+
f.write_text(format_schema_text(sample_schema))
|
|
143
|
+
assert load_schema_file(f) == sample_schema
|
|
144
|
+
|
|
145
|
+
def test_no_extension_treated_as_text(self, tmp_path, sample_schema):
|
|
146
|
+
f = tmp_path / "schema"
|
|
147
|
+
f.write_text(format_schema_text(sample_schema))
|
|
148
|
+
assert load_schema_file(f) == sample_schema
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
import pytest
|
|
4
4
|
|
|
5
5
|
from sqlglider.global_models import AnalysisLevel
|
|
6
|
-
from sqlglider.lineage.analyzer import
|
|
6
|
+
from sqlglider.lineage.analyzer import (
|
|
7
|
+
LineageAnalyzer,
|
|
8
|
+
StarResolutionError,
|
|
9
|
+
_flat_schema_to_nested,
|
|
10
|
+
)
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class TestCaseInsensitiveForwardLineage:
|
|
@@ -3181,3 +3185,97 @@ class TestSchemaPruning:
|
|
|
3181
3185
|
output_names = {item.output_name for r in results for item in r.lineage_items}
|
|
3182
3186
|
assert "id" in output_names
|
|
3183
3187
|
assert "email" in output_names
|
|
3188
|
+
|
|
3189
|
+
|
|
3190
|
+
class TestFlatSchemaToNested:
|
|
3191
|
+
"""Tests for _flat_schema_to_nested conversion utility."""
|
|
3192
|
+
|
|
3193
|
+
def test_empty(self):
|
|
3194
|
+
assert _flat_schema_to_nested({}) == {}
|
|
3195
|
+
|
|
3196
|
+
def test_unqualified_passthrough(self):
|
|
3197
|
+
schema = {"users": {"id": "UNKNOWN"}}
|
|
3198
|
+
assert _flat_schema_to_nested(schema) == schema
|
|
3199
|
+
|
|
3200
|
+
def test_two_part_keys(self):
|
|
3201
|
+
schema = {"db.users": {"id": "UNKNOWN"}}
|
|
3202
|
+
result = _flat_schema_to_nested(schema)
|
|
3203
|
+
assert result == {"db": {"users": {"id": "UNKNOWN"}}}
|
|
3204
|
+
|
|
3205
|
+
def test_three_part_keys(self):
|
|
3206
|
+
schema = {"cat.db.users": {"id": "UNKNOWN"}}
|
|
3207
|
+
result = _flat_schema_to_nested(schema)
|
|
3208
|
+
assert result == {"cat": {"db": {"users": {"id": "UNKNOWN"}}}}
|
|
3209
|
+
|
|
3210
|
+
def test_mixed_depth_pads_shorter_keys(self):
|
|
3211
|
+
schema = {
|
|
3212
|
+
"my_view": {"x": "UNKNOWN"},
|
|
3213
|
+
"db.users": {"id": "UNKNOWN"},
|
|
3214
|
+
}
|
|
3215
|
+
result = _flat_schema_to_nested(schema)
|
|
3216
|
+
assert result == {
|
|
3217
|
+
"": {"my_view": {"x": "UNKNOWN"}},
|
|
3218
|
+
"db": {"users": {"id": "UNKNOWN"}},
|
|
3219
|
+
}
|
|
3220
|
+
|
|
3221
|
+
|
|
3222
|
+
class TestQualifiedSchemaKeys:
|
|
3223
|
+
"""Tests for schema with qualified (dotted) table names."""
|
|
3224
|
+
|
|
3225
|
+
def test_qualified_star_expansion(self):
|
|
3226
|
+
"""SELECT * resolves correctly with qualified schema keys."""
|
|
3227
|
+
sql = "SELECT * FROM mydb.users"
|
|
3228
|
+
schema = {"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
|
|
3229
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3230
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3231
|
+
items = {
|
|
3232
|
+
(item.source_name, item.output_name)
|
|
3233
|
+
for r in results
|
|
3234
|
+
for item in r.lineage_items
|
|
3235
|
+
}
|
|
3236
|
+
assert ("mydb.users.id", "id") in items
|
|
3237
|
+
assert ("mydb.users.name", "name") in items
|
|
3238
|
+
|
|
3239
|
+
def test_qualified_explicit_columns(self):
|
|
3240
|
+
"""Explicit columns trace sources correctly with qualified schema keys."""
|
|
3241
|
+
sql = "SELECT id, name FROM mydb.users"
|
|
3242
|
+
schema = {"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
|
|
3243
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3244
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3245
|
+
items = {
|
|
3246
|
+
(item.source_name, item.output_name)
|
|
3247
|
+
for r in results
|
|
3248
|
+
for item in r.lineage_items
|
|
3249
|
+
}
|
|
3250
|
+
assert ("mydb.users.id", "mydb.users.id") in items
|
|
3251
|
+
assert ("mydb.users.name", "mydb.users.name") in items
|
|
3252
|
+
|
|
3253
|
+
def test_three_part_qualified(self):
|
|
3254
|
+
"""3-part qualified names (catalog.db.table) work correctly."""
|
|
3255
|
+
sql = "SELECT id FROM catalog.mydb.users"
|
|
3256
|
+
schema = {"catalog.mydb.users": {"id": "UNKNOWN"}}
|
|
3257
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3258
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3259
|
+
items = [
|
|
3260
|
+
(item.source_name, item.output_name)
|
|
3261
|
+
for r in results
|
|
3262
|
+
for item in r.lineage_items
|
|
3263
|
+
]
|
|
3264
|
+
assert len(items) == 1
|
|
3265
|
+
assert items[0] == ("catalog.mydb.users.id", "catalog.mydb.users.id")
|
|
3266
|
+
|
|
3267
|
+
def test_mixed_qualified_and_unqualified(self):
|
|
3268
|
+
"""Mix of qualified and unqualified table names in schema."""
|
|
3269
|
+
sql = "SELECT * FROM my_view"
|
|
3270
|
+
schema = {
|
|
3271
|
+
"my_view": {"id": "UNKNOWN"},
|
|
3272
|
+
"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"},
|
|
3273
|
+
}
|
|
3274
|
+
analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
|
|
3275
|
+
results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
|
|
3276
|
+
items = {
|
|
3277
|
+
(item.source_name, item.output_name)
|
|
3278
|
+
for r in results
|
|
3279
|
+
for item in r.lineage_items
|
|
3280
|
+
}
|
|
3281
|
+
assert ("my_view.id", "id") in items
|
|
@@ -1884,3 +1884,141 @@ class TestTablesScrapeCommand:
|
|
|
1884
1884
|
|
|
1885
1885
|
data = json.loads(result.stdout)
|
|
1886
1886
|
assert "customers" in data
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
class TestProvideSchema:
|
|
1890
|
+
"""Tests for --provide-schema on lineage and graph build commands."""
|
|
1891
|
+
|
|
1892
|
+
@pytest.fixture
|
|
1893
|
+
def star_query_file(self, tmp_path):
|
|
1894
|
+
"""SQL file with SELECT * that needs schema to resolve."""
|
|
1895
|
+
sql_file = tmp_path / "star.sql"
|
|
1896
|
+
sql_file.write_text("SELECT * FROM users")
|
|
1897
|
+
return sql_file
|
|
1898
|
+
|
|
1899
|
+
@pytest.fixture
|
|
1900
|
+
def schema_json_file(self, tmp_path):
|
|
1901
|
+
schema = tmp_path / "schema.json"
|
|
1902
|
+
schema.write_text('{"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}')
|
|
1903
|
+
return schema
|
|
1904
|
+
|
|
1905
|
+
def test_lineage_with_provide_schema(self, star_query_file, schema_json_file):
|
|
1906
|
+
"""Test that --provide-schema resolves SELECT * in lineage."""
|
|
1907
|
+
result = runner.invoke(
|
|
1908
|
+
app,
|
|
1909
|
+
[
|
|
1910
|
+
"lineage",
|
|
1911
|
+
str(star_query_file),
|
|
1912
|
+
"--provide-schema",
|
|
1913
|
+
str(schema_json_file),
|
|
1914
|
+
"--output-format",
|
|
1915
|
+
"json",
|
|
1916
|
+
],
|
|
1917
|
+
)
|
|
1918
|
+
|
|
1919
|
+
assert result.exit_code == 0
|
|
1920
|
+
import json
|
|
1921
|
+
|
|
1922
|
+
data = json.loads(result.stdout)
|
|
1923
|
+
columns = [item["output_name"] for item in data["queries"][0]["lineage"]]
|
|
1924
|
+
assert "id" in columns
|
|
1925
|
+
assert "name" in columns
|
|
1926
|
+
|
|
1927
|
+
def test_graph_build_with_provide_schema(
|
|
1928
|
+
self, star_query_file, schema_json_file, tmp_path
|
|
1929
|
+
):
|
|
1930
|
+
"""Test that --provide-schema works with graph build."""
|
|
1931
|
+
output = tmp_path / "graph.json"
|
|
1932
|
+
result = runner.invoke(
|
|
1933
|
+
app,
|
|
1934
|
+
[
|
|
1935
|
+
"graph",
|
|
1936
|
+
"build",
|
|
1937
|
+
str(star_query_file),
|
|
1938
|
+
"-o",
|
|
1939
|
+
str(output),
|
|
1940
|
+
"--provide-schema",
|
|
1941
|
+
str(schema_json_file),
|
|
1942
|
+
],
|
|
1943
|
+
)
|
|
1944
|
+
|
|
1945
|
+
assert result.exit_code == 0
|
|
1946
|
+
assert output.exists()
|
|
1947
|
+
import json
|
|
1948
|
+
|
|
1949
|
+
graph = json.loads(output.read_text())
|
|
1950
|
+
assert graph["metadata"]["total_nodes"] > 0
|
|
1951
|
+
|
|
1952
|
+
|
|
1953
|
+
class TestProvideSchemaRoundTrip:
|
|
1954
|
+
"""Integration: tables scrape -> schema file -> graph build --provide-schema."""
|
|
1955
|
+
|
|
1956
|
+
@pytest.fixture
|
|
1957
|
+
def sql_dir(self, tmp_path):
|
|
1958
|
+
d = tmp_path / "sql"
|
|
1959
|
+
d.mkdir()
|
|
1960
|
+
(d / "a.sql").write_text(
|
|
1961
|
+
"CREATE TABLE output_table AS SELECT c.id, c.name FROM customers c;"
|
|
1962
|
+
)
|
|
1963
|
+
(d / "b.sql").write_text("SELECT * FROM output_table")
|
|
1964
|
+
return d
|
|
1965
|
+
|
|
1966
|
+
@pytest.mark.parametrize(
|
|
1967
|
+
"fmt,ext", [("json", ".json"), ("csv", ".csv"), ("text", ".txt")]
|
|
1968
|
+
)
|
|
1969
|
+
def test_round_trip(self, sql_dir, tmp_path, fmt, ext):
|
|
1970
|
+
"""Scrape schema, save to file, then use --provide-schema to build graph."""
|
|
1971
|
+
schema_file = tmp_path / f"schema{ext}"
|
|
1972
|
+
graph_provided = tmp_path / "graph_provided.json"
|
|
1973
|
+
graph_resolved = tmp_path / "graph_resolved.json"
|
|
1974
|
+
|
|
1975
|
+
# Step 1: Scrape schema
|
|
1976
|
+
scrape_result = runner.invoke(
|
|
1977
|
+
app,
|
|
1978
|
+
["tables", "scrape", str(sql_dir), "-f", fmt, "-o", str(schema_file)],
|
|
1979
|
+
)
|
|
1980
|
+
assert scrape_result.exit_code == 0
|
|
1981
|
+
assert schema_file.exists()
|
|
1982
|
+
|
|
1983
|
+
# Step 2: Build graph with --provide-schema
|
|
1984
|
+
result_provided = runner.invoke(
|
|
1985
|
+
app,
|
|
1986
|
+
[
|
|
1987
|
+
"graph",
|
|
1988
|
+
"build",
|
|
1989
|
+
str(sql_dir),
|
|
1990
|
+
"-o",
|
|
1991
|
+
str(graph_provided),
|
|
1992
|
+
"--provide-schema",
|
|
1993
|
+
str(schema_file),
|
|
1994
|
+
],
|
|
1995
|
+
)
|
|
1996
|
+
assert result_provided.exit_code == 0
|
|
1997
|
+
|
|
1998
|
+
# Step 3: Build graph with --resolve-schema
|
|
1999
|
+
result_resolved = runner.invoke(
|
|
2000
|
+
app,
|
|
2001
|
+
[
|
|
2002
|
+
"graph",
|
|
2003
|
+
"build",
|
|
2004
|
+
str(sql_dir),
|
|
2005
|
+
"-o",
|
|
2006
|
+
str(graph_resolved),
|
|
2007
|
+
"--resolve-schema",
|
|
2008
|
+
],
|
|
2009
|
+
)
|
|
2010
|
+
assert result_resolved.exit_code == 0
|
|
2011
|
+
|
|
2012
|
+
# Step 4: Compare graphs (nodes and edges should match)
|
|
2013
|
+
import json
|
|
2014
|
+
|
|
2015
|
+
g1 = json.loads(graph_provided.read_text())
|
|
2016
|
+
g2 = json.loads(graph_resolved.read_text())
|
|
2017
|
+
|
|
2018
|
+
nodes1 = sorted([n["identifier"] for n in g1["nodes"]])
|
|
2019
|
+
nodes2 = sorted([n["identifier"] for n in g2["nodes"]])
|
|
2020
|
+
assert nodes1 == nodes2
|
|
2021
|
+
|
|
2022
|
+
edges1 = sorted([(e["source_node"], e["target_node"]) for e in g1["edges"]])
|
|
2023
|
+
edges2 = sorted([(e["source_node"], e["target_node"]) for e in g2["edges"]])
|
|
2024
|
+
assert edges1 == edges2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/update_dim_customer_metrics.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_fact_payments.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_pres_sales_summary.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/maintenance/delete_expired_customers.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/maintenance/update_product_status.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_customer_360.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_customer_cohort.sql
RENAMED
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_sales_summary.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/analytics_pipeline.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/view_based_merge.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte_view_star.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_multi_query.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_single_query.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_view_window_cte.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|