sql-glider 0.1.14__tar.gz → 0.1.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.14 → sql_glider-0.1.15}/PKG-INFO +1 -1
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/_version.py +2 -2
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/cli.py +32 -1
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/builder.py +16 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/formatters.py +92 -1
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/schema/extractor.py +3 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_formatters.py +62 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/test_cli.py +138 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/.github/workflows/ci.yml +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/.github/workflows/publish.yml +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/.gitignore +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/.python-version +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/ARCHITECTURE.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/CLAUDE.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/LICENSE +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/README.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-05-column-level-lineage.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-05-reverse-lineage.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-06-config-file-support.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-06-graph-lineage.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-06-unify-single-multi-query.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-07-sample-data-model.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-07-sql-templating.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-08-tables-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-09-graph-query-paths.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-13-dissect-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-14-tables-pull-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-no-star-flag.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-resolve-schema.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-tables-scrape-command.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/pyproject.toml +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/README.md +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/expire_dim_customer.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/load_fact_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/load_fact_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/merge_dim_customer.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/merge_dim_product.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/complex/conditional_merge.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/complex/cte_insert.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/complex/multi_table_transform.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/dim_customer.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/dim_product.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/fact_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/fact_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_addresses.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_order_items.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_products.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_products.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/maintenance/update_product_status.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_payments.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_products.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/sqlglider.toml.example +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/databricks.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/analyzer.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/global_models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/merge.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/query.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/serialization.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/lineage/analyzer.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/lineage/formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/schema/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/jinja.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/variables.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/config.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/file_utils.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/schema.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/customers.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/orders.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/reports.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_multi.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_single_query.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_subquery.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_tables.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_view.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/sample_manifest.csv +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/test_base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/test_databricks.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/test_registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/test_analyzer.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/test_formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/test_models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_builder.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_merge.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_models.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_query.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_serialization.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/lineage/test_analyzer.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/lineage/test_formatters.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/schema/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/schema/test_extractor.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_base.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_jinja.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_registry.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_variables.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/test_config.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/test_file_utils.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/test_schema.py +0 -0
- {sql_glider-0.1.14 → sql_glider-0.1.15}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.15
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.15'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 15)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -171,6 +171,12 @@ def lineage(
|
|
|
171
171
|
"--no-star",
|
|
172
172
|
help="Fail if SELECT * cannot be resolved to actual columns",
|
|
173
173
|
),
|
|
174
|
+
provide_schema: Optional[Path] = typer.Option(
|
|
175
|
+
None,
|
|
176
|
+
"--provide-schema",
|
|
177
|
+
exists=True,
|
|
178
|
+
help="Path to a schema file (JSON, CSV, or text) for star resolution",
|
|
179
|
+
),
|
|
174
180
|
) -> None:
|
|
175
181
|
"""
|
|
176
182
|
Analyze column or table lineage for a SQL file.
|
|
@@ -266,8 +272,15 @@ def lineage(
|
|
|
266
272
|
source_path=source_path,
|
|
267
273
|
)
|
|
268
274
|
|
|
275
|
+
# Load provided schema if specified
|
|
276
|
+
schema = None
|
|
277
|
+
if provide_schema:
|
|
278
|
+
from sqlglider.graph.formatters import load_schema_file
|
|
279
|
+
|
|
280
|
+
schema = load_schema_file(provide_schema)
|
|
281
|
+
|
|
269
282
|
# Create analyzer
|
|
270
|
-
analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
|
|
283
|
+
analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star, schema=schema)
|
|
271
284
|
|
|
272
285
|
# Unified lineage analysis (handles both single and multi-query files)
|
|
273
286
|
results = analyzer.analyze_queries(
|
|
@@ -1292,6 +1305,13 @@ def graph_build(
|
|
|
1292
1305
|
"--dump-schema-format",
|
|
1293
1306
|
help="Format for dumped schema: 'text' (default), 'json', or 'csv'",
|
|
1294
1307
|
),
|
|
1308
|
+
provide_schema: Optional[Path] = typer.Option(
|
|
1309
|
+
None,
|
|
1310
|
+
"--provide-schema",
|
|
1311
|
+
exists=True,
|
|
1312
|
+
help="Path to a schema file (JSON, CSV, or text) to use for star resolution. "
|
|
1313
|
+
"Can be combined with --resolve-schema to merge file-extracted schema on top.",
|
|
1314
|
+
),
|
|
1295
1315
|
strict_schema: bool = typer.Option(
|
|
1296
1316
|
False,
|
|
1297
1317
|
"--strict-schema",
|
|
@@ -1434,6 +1454,17 @@ def graph_build(
|
|
|
1434
1454
|
strict_schema=strict_schema,
|
|
1435
1455
|
)
|
|
1436
1456
|
|
|
1457
|
+
# Load provided schema file if specified
|
|
1458
|
+
if provide_schema:
|
|
1459
|
+
from sqlglider.graph.formatters import load_schema_file
|
|
1460
|
+
|
|
1461
|
+
loaded_schema = load_schema_file(provide_schema)
|
|
1462
|
+
builder.set_schema(loaded_schema)
|
|
1463
|
+
console.print(
|
|
1464
|
+
f"[green]Loaded schema from {provide_schema} "
|
|
1465
|
+
f"({len(loaded_schema)} table(s))[/green]"
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1437
1468
|
# Collect file paths for schema extraction
|
|
1438
1469
|
manifest_files, path_files = _collect_sql_files(
|
|
1439
1470
|
paths, manifest, recursive, glob_pattern
|
|
@@ -303,6 +303,21 @@ class GraphBuilder:
|
|
|
303
303
|
self.add_file(file_path, dialect)
|
|
304
304
|
return self
|
|
305
305
|
|
|
306
|
+
def set_schema(self, schema: Dict[str, Dict[str, str]]) -> "GraphBuilder":
|
|
307
|
+
"""Pre-seed the resolved schema from an external source.
|
|
308
|
+
|
|
309
|
+
This allows skipping the schema extraction pass when the schema
|
|
310
|
+
is already known (e.g., loaded from a file).
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
schema: Schema dictionary mapping table names to column dicts.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
self for method chaining
|
|
317
|
+
"""
|
|
318
|
+
self._resolved_schema = schema
|
|
319
|
+
return self
|
|
320
|
+
|
|
306
321
|
def extract_schemas(
|
|
307
322
|
self,
|
|
308
323
|
file_paths: List[Path],
|
|
@@ -325,6 +340,7 @@ class GraphBuilder:
|
|
|
325
340
|
file_paths,
|
|
326
341
|
dialect=file_dialect,
|
|
327
342
|
sql_preprocessor=self.sql_preprocessor,
|
|
343
|
+
initial_schema=self._resolved_schema if self._resolved_schema else None,
|
|
328
344
|
strict_schema=self.strict_schema,
|
|
329
345
|
catalog_type=self.catalog_type,
|
|
330
346
|
catalog_config=self.catalog_config,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
"""Output formatters for resolved schema data."""
|
|
1
|
+
"""Output formatters and parsers for resolved schema data."""
|
|
2
2
|
|
|
3
3
|
import csv
|
|
4
4
|
import json
|
|
5
5
|
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import Dict
|
|
7
8
|
|
|
8
9
|
SchemaDict = Dict[str, Dict[str, str]]
|
|
@@ -96,3 +97,93 @@ def format_schema(schema: SchemaDict, output_format: str = "text") -> str:
|
|
|
96
97
|
f"Invalid schema format '{output_format}'. Use 'text', 'json', or 'csv'."
|
|
97
98
|
)
|
|
98
99
|
return formatter(schema)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_schema_json(content: str) -> SchemaDict:
|
|
103
|
+
"""Parse schema from JSON format.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
content: JSON string with table -> {column -> type} structure.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Parsed schema dictionary.
|
|
110
|
+
"""
|
|
111
|
+
return json.loads(content) # type: ignore[no-any-return]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def parse_schema_csv(content: str) -> SchemaDict:
|
|
115
|
+
"""Parse schema from CSV format.
|
|
116
|
+
|
|
117
|
+
Expects columns: table, column, type.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
content: CSV string with header row.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Parsed schema dictionary.
|
|
124
|
+
"""
|
|
125
|
+
schema: SchemaDict = {}
|
|
126
|
+
reader = csv.DictReader(StringIO(content))
|
|
127
|
+
for row in reader:
|
|
128
|
+
table = row["table"]
|
|
129
|
+
column = row["column"]
|
|
130
|
+
col_type = row.get("type", "UNKNOWN")
|
|
131
|
+
if table not in schema:
|
|
132
|
+
schema[table] = {}
|
|
133
|
+
schema[table][column] = col_type
|
|
134
|
+
return schema
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def parse_schema_text(content: str) -> SchemaDict:
|
|
138
|
+
"""Parse schema from indented text format.
|
|
139
|
+
|
|
140
|
+
Expected format:
|
|
141
|
+
table_name
|
|
142
|
+
column1
|
|
143
|
+
column2
|
|
144
|
+
|
|
145
|
+
other_table
|
|
146
|
+
col_a
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
content: Text-formatted schema string.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Parsed schema dictionary.
|
|
153
|
+
"""
|
|
154
|
+
schema: SchemaDict = {}
|
|
155
|
+
current_table: str | None = None
|
|
156
|
+
for line in content.splitlines():
|
|
157
|
+
if not line or not line.strip():
|
|
158
|
+
continue
|
|
159
|
+
if line.startswith(" "):
|
|
160
|
+
if current_table is not None:
|
|
161
|
+
schema[current_table][line.strip()] = "UNKNOWN"
|
|
162
|
+
else:
|
|
163
|
+
current_table = line.strip()
|
|
164
|
+
schema[current_table] = {}
|
|
165
|
+
return schema
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def load_schema_file(path: Path) -> SchemaDict:
|
|
169
|
+
"""Load a schema file, auto-detecting format from extension.
|
|
170
|
+
|
|
171
|
+
`.json` → JSON, `.csv` → CSV, otherwise text.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
path: Path to schema file.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Parsed schema dictionary.
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
FileNotFoundError: If the file does not exist.
|
|
181
|
+
"""
|
|
182
|
+
content = path.read_text(encoding="utf-8")
|
|
183
|
+
suffix = path.suffix.lower()
|
|
184
|
+
if suffix == ".json":
|
|
185
|
+
return parse_schema_json(content)
|
|
186
|
+
elif suffix == ".csv":
|
|
187
|
+
return parse_schema_csv(content)
|
|
188
|
+
else:
|
|
189
|
+
return parse_schema_text(content)
|
|
@@ -153,6 +153,7 @@ def extract_and_resolve_schema(
|
|
|
153
153
|
file_paths: List[Path],
|
|
154
154
|
dialect: str = "spark",
|
|
155
155
|
sql_preprocessor: Optional[SqlPreprocessor] = None,
|
|
156
|
+
initial_schema: Optional[SchemaDict] = None,
|
|
156
157
|
strict_schema: bool = False,
|
|
157
158
|
catalog_type: Optional[str] = None,
|
|
158
159
|
catalog_config: Optional[Dict[str, object]] = None,
|
|
@@ -167,6 +168,7 @@ def extract_and_resolve_schema(
|
|
|
167
168
|
file_paths: SQL files to extract schema from.
|
|
168
169
|
dialect: SQL dialect.
|
|
169
170
|
sql_preprocessor: Optional SQL preprocessor.
|
|
171
|
+
initial_schema: Optional starting schema to build upon.
|
|
170
172
|
strict_schema: If True, fail on ambiguous column attribution.
|
|
171
173
|
catalog_type: Optional catalog provider name.
|
|
172
174
|
catalog_config: Optional provider-specific configuration dict.
|
|
@@ -183,6 +185,7 @@ def extract_and_resolve_schema(
|
|
|
183
185
|
file_paths,
|
|
184
186
|
dialect=dialect,
|
|
185
187
|
sql_preprocessor=sql_preprocessor,
|
|
188
|
+
initial_schema=initial_schema,
|
|
186
189
|
strict_schema=strict_schema,
|
|
187
190
|
console=console,
|
|
188
191
|
)
|
|
@@ -7,6 +7,10 @@ from sqlglider.graph.formatters import (
|
|
|
7
7
|
format_schema_csv,
|
|
8
8
|
format_schema_json,
|
|
9
9
|
format_schema_text,
|
|
10
|
+
load_schema_file,
|
|
11
|
+
parse_schema_csv,
|
|
12
|
+
parse_schema_json,
|
|
13
|
+
parse_schema_text,
|
|
10
14
|
)
|
|
11
15
|
|
|
12
16
|
|
|
@@ -84,3 +88,61 @@ class TestFormatSchema:
|
|
|
84
88
|
def test_invalid_format(self, sample_schema):
|
|
85
89
|
with pytest.raises(ValueError, match="Invalid schema format"):
|
|
86
90
|
format_schema(sample_schema, "xml")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TestParseSchemaJson:
|
|
94
|
+
def test_round_trip(self, sample_schema):
|
|
95
|
+
content = format_schema_json(sample_schema)
|
|
96
|
+
parsed = parse_schema_json(content)
|
|
97
|
+
assert parsed == sample_schema
|
|
98
|
+
|
|
99
|
+
def test_empty(self):
|
|
100
|
+
assert parse_schema_json("{}") == {}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TestParseSchemaCsv:
|
|
104
|
+
def test_round_trip(self, sample_schema):
|
|
105
|
+
content = format_schema_csv(sample_schema)
|
|
106
|
+
parsed = parse_schema_csv(content)
|
|
107
|
+
assert parsed == sample_schema
|
|
108
|
+
|
|
109
|
+
def test_empty(self):
|
|
110
|
+
parsed = parse_schema_csv("table,column,type\n")
|
|
111
|
+
assert parsed == {}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class TestParseSchemaText:
|
|
115
|
+
def test_round_trip(self, sample_schema):
|
|
116
|
+
content = format_schema_text(sample_schema)
|
|
117
|
+
parsed = parse_schema_text(content)
|
|
118
|
+
assert parsed == sample_schema
|
|
119
|
+
|
|
120
|
+
def test_empty(self):
|
|
121
|
+
assert parse_schema_text("") == {}
|
|
122
|
+
|
|
123
|
+
def test_single_table(self):
|
|
124
|
+
content = "users\n id\n name\n"
|
|
125
|
+
parsed = parse_schema_text(content)
|
|
126
|
+
assert parsed == {"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TestLoadSchemaFile:
|
|
130
|
+
def test_json_extension(self, tmp_path, sample_schema):
|
|
131
|
+
f = tmp_path / "schema.json"
|
|
132
|
+
f.write_text(format_schema_json(sample_schema))
|
|
133
|
+
assert load_schema_file(f) == sample_schema
|
|
134
|
+
|
|
135
|
+
def test_csv_extension(self, tmp_path, sample_schema):
|
|
136
|
+
f = tmp_path / "schema.csv"
|
|
137
|
+
f.write_text(format_schema_csv(sample_schema))
|
|
138
|
+
assert load_schema_file(f) == sample_schema
|
|
139
|
+
|
|
140
|
+
def test_txt_extension(self, tmp_path, sample_schema):
|
|
141
|
+
f = tmp_path / "schema.txt"
|
|
142
|
+
f.write_text(format_schema_text(sample_schema))
|
|
143
|
+
assert load_schema_file(f) == sample_schema
|
|
144
|
+
|
|
145
|
+
def test_no_extension_treated_as_text(self, tmp_path, sample_schema):
|
|
146
|
+
f = tmp_path / "schema"
|
|
147
|
+
f.write_text(format_schema_text(sample_schema))
|
|
148
|
+
assert load_schema_file(f) == sample_schema
|
|
@@ -1884,3 +1884,141 @@ class TestTablesScrapeCommand:
|
|
|
1884
1884
|
|
|
1885
1885
|
data = json.loads(result.stdout)
|
|
1886
1886
|
assert "customers" in data
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
class TestProvideSchema:
|
|
1890
|
+
"""Tests for --provide-schema on lineage and graph build commands."""
|
|
1891
|
+
|
|
1892
|
+
@pytest.fixture
|
|
1893
|
+
def star_query_file(self, tmp_path):
|
|
1894
|
+
"""SQL file with SELECT * that needs schema to resolve."""
|
|
1895
|
+
sql_file = tmp_path / "star.sql"
|
|
1896
|
+
sql_file.write_text("SELECT * FROM users")
|
|
1897
|
+
return sql_file
|
|
1898
|
+
|
|
1899
|
+
@pytest.fixture
|
|
1900
|
+
def schema_json_file(self, tmp_path):
|
|
1901
|
+
schema = tmp_path / "schema.json"
|
|
1902
|
+
schema.write_text('{"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}')
|
|
1903
|
+
return schema
|
|
1904
|
+
|
|
1905
|
+
def test_lineage_with_provide_schema(self, star_query_file, schema_json_file):
|
|
1906
|
+
"""Test that --provide-schema resolves SELECT * in lineage."""
|
|
1907
|
+
result = runner.invoke(
|
|
1908
|
+
app,
|
|
1909
|
+
[
|
|
1910
|
+
"lineage",
|
|
1911
|
+
str(star_query_file),
|
|
1912
|
+
"--provide-schema",
|
|
1913
|
+
str(schema_json_file),
|
|
1914
|
+
"--output-format",
|
|
1915
|
+
"json",
|
|
1916
|
+
],
|
|
1917
|
+
)
|
|
1918
|
+
|
|
1919
|
+
assert result.exit_code == 0
|
|
1920
|
+
import json
|
|
1921
|
+
|
|
1922
|
+
data = json.loads(result.stdout)
|
|
1923
|
+
columns = [item["output_name"] for item in data["queries"][0]["lineage"]]
|
|
1924
|
+
assert "id" in columns
|
|
1925
|
+
assert "name" in columns
|
|
1926
|
+
|
|
1927
|
+
def test_graph_build_with_provide_schema(
|
|
1928
|
+
self, star_query_file, schema_json_file, tmp_path
|
|
1929
|
+
):
|
|
1930
|
+
"""Test that --provide-schema works with graph build."""
|
|
1931
|
+
output = tmp_path / "graph.json"
|
|
1932
|
+
result = runner.invoke(
|
|
1933
|
+
app,
|
|
1934
|
+
[
|
|
1935
|
+
"graph",
|
|
1936
|
+
"build",
|
|
1937
|
+
str(star_query_file),
|
|
1938
|
+
"-o",
|
|
1939
|
+
str(output),
|
|
1940
|
+
"--provide-schema",
|
|
1941
|
+
str(schema_json_file),
|
|
1942
|
+
],
|
|
1943
|
+
)
|
|
1944
|
+
|
|
1945
|
+
assert result.exit_code == 0
|
|
1946
|
+
assert output.exists()
|
|
1947
|
+
import json
|
|
1948
|
+
|
|
1949
|
+
graph = json.loads(output.read_text())
|
|
1950
|
+
assert graph["metadata"]["total_nodes"] > 0
|
|
1951
|
+
|
|
1952
|
+
|
|
1953
|
+
class TestProvideSchemaRoundTrip:
|
|
1954
|
+
"""Integration: tables scrape -> schema file -> graph build --provide-schema."""
|
|
1955
|
+
|
|
1956
|
+
@pytest.fixture
|
|
1957
|
+
def sql_dir(self, tmp_path):
|
|
1958
|
+
d = tmp_path / "sql"
|
|
1959
|
+
d.mkdir()
|
|
1960
|
+
(d / "a.sql").write_text(
|
|
1961
|
+
"CREATE TABLE output_table AS SELECT c.id, c.name FROM customers c;"
|
|
1962
|
+
)
|
|
1963
|
+
(d / "b.sql").write_text("SELECT * FROM output_table")
|
|
1964
|
+
return d
|
|
1965
|
+
|
|
1966
|
+
@pytest.mark.parametrize(
|
|
1967
|
+
"fmt,ext", [("json", ".json"), ("csv", ".csv"), ("text", ".txt")]
|
|
1968
|
+
)
|
|
1969
|
+
def test_round_trip(self, sql_dir, tmp_path, fmt, ext):
|
|
1970
|
+
"""Scrape schema, save to file, then use --provide-schema to build graph."""
|
|
1971
|
+
schema_file = tmp_path / f"schema{ext}"
|
|
1972
|
+
graph_provided = tmp_path / "graph_provided.json"
|
|
1973
|
+
graph_resolved = tmp_path / "graph_resolved.json"
|
|
1974
|
+
|
|
1975
|
+
# Step 1: Scrape schema
|
|
1976
|
+
scrape_result = runner.invoke(
|
|
1977
|
+
app,
|
|
1978
|
+
["tables", "scrape", str(sql_dir), "-f", fmt, "-o", str(schema_file)],
|
|
1979
|
+
)
|
|
1980
|
+
assert scrape_result.exit_code == 0
|
|
1981
|
+
assert schema_file.exists()
|
|
1982
|
+
|
|
1983
|
+
# Step 2: Build graph with --provide-schema
|
|
1984
|
+
result_provided = runner.invoke(
|
|
1985
|
+
app,
|
|
1986
|
+
[
|
|
1987
|
+
"graph",
|
|
1988
|
+
"build",
|
|
1989
|
+
str(sql_dir),
|
|
1990
|
+
"-o",
|
|
1991
|
+
str(graph_provided),
|
|
1992
|
+
"--provide-schema",
|
|
1993
|
+
str(schema_file),
|
|
1994
|
+
],
|
|
1995
|
+
)
|
|
1996
|
+
assert result_provided.exit_code == 0
|
|
1997
|
+
|
|
1998
|
+
# Step 3: Build graph with --resolve-schema
|
|
1999
|
+
result_resolved = runner.invoke(
|
|
2000
|
+
app,
|
|
2001
|
+
[
|
|
2002
|
+
"graph",
|
|
2003
|
+
"build",
|
|
2004
|
+
str(sql_dir),
|
|
2005
|
+
"-o",
|
|
2006
|
+
str(graph_resolved),
|
|
2007
|
+
"--resolve-schema",
|
|
2008
|
+
],
|
|
2009
|
+
)
|
|
2010
|
+
assert result_resolved.exit_code == 0
|
|
2011
|
+
|
|
2012
|
+
# Step 4: Compare graphs (nodes and edges should match)
|
|
2013
|
+
import json
|
|
2014
|
+
|
|
2015
|
+
g1 = json.loads(graph_provided.read_text())
|
|
2016
|
+
g2 = json.loads(graph_resolved.read_text())
|
|
2017
|
+
|
|
2018
|
+
nodes1 = sorted([n["identifier"] for n in g1["nodes"]])
|
|
2019
|
+
nodes2 = sorted([n["identifier"] for n in g2["nodes"]])
|
|
2020
|
+
assert nodes1 == nodes2
|
|
2021
|
+
|
|
2022
|
+
edges1 = sorted([(e["source_node"], e["target_node"]) for e in g1["edges"]])
|
|
2023
|
+
edges2 = sorted([(e["source_node"], e["target_node"]) for e in g2["edges"]])
|
|
2024
|
+
assert edges1 == edges2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/update_dim_customer_metrics.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_fact_payments.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_pres_sales_summary.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/maintenance/delete_expired_customers.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/maintenance/update_product_status.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_customer_360.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_customer_cohort.sql
RENAMED
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_sales_summary.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/analytics_pipeline.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/view_based_merge.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte_view_star.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_multi_query.sql
RENAMED
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_single_query.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_view_window_cte.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|