sql-glider 0.1.14__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sql_glider-0.1.14 → sql_glider-0.1.16}/PKG-INFO +1 -1
  2. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/_version.py +2 -2
  3. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/cli.py +32 -1
  4. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/builder.py +16 -0
  5. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/formatters.py +92 -1
  6. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/lineage/analyzer.py +49 -5
  7. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/schema/extractor.py +3 -0
  8. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_formatters.py +62 -0
  9. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/lineage/test_analyzer.py +99 -1
  10. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/test_cli.py +138 -0
  11. {sql_glider-0.1.14 → sql_glider-0.1.16}/.github/workflows/ci.yml +0 -0
  12. {sql_glider-0.1.14 → sql_glider-0.1.16}/.github/workflows/publish.yml +0 -0
  13. {sql_glider-0.1.14 → sql_glider-0.1.16}/.gitignore +0 -0
  14. {sql_glider-0.1.14 → sql_glider-0.1.16}/.python-version +0 -0
  15. {sql_glider-0.1.14 → sql_glider-0.1.16}/ARCHITECTURE.md +0 -0
  16. {sql_glider-0.1.14 → sql_glider-0.1.16}/CLAUDE.md +0 -0
  17. {sql_glider-0.1.14 → sql_glider-0.1.16}/LICENSE +0 -0
  18. {sql_glider-0.1.14 → sql_glider-0.1.16}/README.md +0 -0
  19. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-05-column-level-lineage.md +0 -0
  20. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-05-reverse-lineage.md +0 -0
  21. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-06-config-file-support.md +0 -0
  22. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-06-graph-lineage.md +0 -0
  23. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  24. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-07-sample-data-model.md +0 -0
  25. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-07-sql-templating.md +0 -0
  26. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-08-tables-command.md +0 -0
  27. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-09-graph-query-paths.md +0 -0
  28. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-13-dissect-command.md +0 -0
  29. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2025-12-14-tables-pull-command.md +0 -0
  30. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  31. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
  32. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
  33. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-no-star-flag.md +0 -0
  34. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-resolve-schema.md +0 -0
  35. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
  36. {sql_glider-0.1.14 → sql_glider-0.1.16}/plans/2026-01-29-tables-scrape-command.md +0 -0
  37. {sql_glider-0.1.14 → sql_glider-0.1.16}/pyproject.toml +0 -0
  38. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/README.md +0 -0
  39. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/expire_dim_customer.sql +0 -0
  40. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/load_fact_orders.sql +0 -0
  41. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/load_fact_payments.sql +0 -0
  42. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/merge_dim_customer.sql +0 -0
  43. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/merge_dim_product.sql +0 -0
  44. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  45. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/complex/conditional_merge.sql +0 -0
  46. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/complex/cte_insert.sql +0 -0
  47. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/complex/multi_table_transform.sql +0 -0
  48. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/dim_customer.sql +0 -0
  49. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/dim_product.sql +0 -0
  50. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/fact_orders.sql +0 -0
  51. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/fact_payments.sql +0 -0
  52. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_addresses.sql +0 -0
  53. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_customers.sql +0 -0
  54. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_order_items.sql +0 -0
  55. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_orders.sql +0 -0
  56. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_payments.sql +0 -0
  57. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/raw_products.sql +0 -0
  58. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_customers.sql +0 -0
  59. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_orders.sql +0 -0
  60. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_payments.sql +0 -0
  61. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/ddl/stg_products.sql +0 -0
  62. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  63. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  64. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  65. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  66. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/maintenance/update_product_status.sql +0 -0
  67. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  68. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  69. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  70. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  71. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_customers.sql +0 -0
  72. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_orders.sql +0 -0
  73. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_payments.sql +0 -0
  74. {sql_glider-0.1.14 → sql_glider-0.1.16}/sample_data_model/staging/load_stg_products.sql +0 -0
  75. {sql_glider-0.1.14 → sql_glider-0.1.16}/sqlglider.toml.example +0 -0
  76. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/__init__.py +0 -0
  77. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/__init__.py +0 -0
  78. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/base.py +0 -0
  79. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/databricks.py +0 -0
  80. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/catalog/registry.py +0 -0
  81. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/__init__.py +0 -0
  82. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/analyzer.py +0 -0
  83. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/formatters.py +0 -0
  84. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/dissection/models.py +0 -0
  85. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/global_models.py +0 -0
  86. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/__init__.py +0 -0
  87. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/merge.py +0 -0
  88. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/models.py +0 -0
  89. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/query.py +0 -0
  90. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/graph/serialization.py +0 -0
  91. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/lineage/__init__.py +0 -0
  92. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/lineage/formatters.py +0 -0
  93. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/schema/__init__.py +0 -0
  94. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/__init__.py +0 -0
  95. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/base.py +0 -0
  96. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/jinja.py +0 -0
  97. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/registry.py +0 -0
  98. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/templating/variables.py +0 -0
  99. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/__init__.py +0 -0
  100. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/config.py +0 -0
  101. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/file_utils.py +0 -0
  102. {sql_glider-0.1.14 → sql_glider-0.1.16}/src/sqlglider/utils/schema.py +0 -0
  103. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/__init__.py +0 -0
  104. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  105. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  106. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  107. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  108. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  109. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  110. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte.sql +0 -0
  111. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  112. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
  113. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  114. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_multi.sql +0 -0
  115. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  116. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  117. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  118. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_tables.sql +0 -0
  119. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_view.sql +0 -0
  120. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  121. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/fixtures/sample_manifest.csv +0 -0
  122. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/__init__.py +0 -0
  123. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/__init__.py +0 -0
  124. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/test_base.py +0 -0
  125. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/test_databricks.py +0 -0
  126. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/catalog/test_registry.py +0 -0
  127. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/__init__.py +0 -0
  128. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  129. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/test_formatters.py +0 -0
  130. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/dissection/test_models.py +0 -0
  131. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/__init__.py +0 -0
  132. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_builder.py +0 -0
  133. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_merge.py +0 -0
  134. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_models.py +0 -0
  135. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_query.py +0 -0
  136. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/graph/test_serialization.py +0 -0
  137. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/lineage/__init__.py +0 -0
  138. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/lineage/test_formatters.py +0 -0
  139. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/schema/__init__.py +0 -0
  140. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/schema/test_extractor.py +0 -0
  141. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/__init__.py +0 -0
  142. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_base.py +0 -0
  143. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_jinja.py +0 -0
  144. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_registry.py +0 -0
  145. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/templating/test_variables.py +0 -0
  146. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/__init__.py +0 -0
  147. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/test_config.py +0 -0
  148. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/test_file_utils.py +0 -0
  149. {sql_glider-0.1.14 → sql_glider-0.1.16}/tests/sqlglider/utils/test_schema.py +0 -0
  150. {sql_glider-0.1.14 → sql_glider-0.1.16}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.14
3
+ Version: 0.1.16
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.14'
32
- __version_tuple__ = version_tuple = (0, 1, 14)
31
+ __version__ = version = '0.1.16'
32
+ __version_tuple__ = version_tuple = (0, 1, 16)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -171,6 +171,12 @@ def lineage(
171
171
  "--no-star",
172
172
  help="Fail if SELECT * cannot be resolved to actual columns",
173
173
  ),
174
+ provide_schema: Optional[Path] = typer.Option(
175
+ None,
176
+ "--provide-schema",
177
+ exists=True,
178
+ help="Path to a schema file (JSON, CSV, or text) for star resolution",
179
+ ),
174
180
  ) -> None:
175
181
  """
176
182
  Analyze column or table lineage for a SQL file.
@@ -266,8 +272,15 @@ def lineage(
266
272
  source_path=source_path,
267
273
  )
268
274
 
275
+ # Load provided schema if specified
276
+ schema = None
277
+ if provide_schema:
278
+ from sqlglider.graph.formatters import load_schema_file
279
+
280
+ schema = load_schema_file(provide_schema)
281
+
269
282
  # Create analyzer
270
- analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
283
+ analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star, schema=schema)
271
284
 
272
285
  # Unified lineage analysis (handles both single and multi-query files)
273
286
  results = analyzer.analyze_queries(
@@ -1292,6 +1305,13 @@ def graph_build(
1292
1305
  "--dump-schema-format",
1293
1306
  help="Format for dumped schema: 'text' (default), 'json', or 'csv'",
1294
1307
  ),
1308
+ provide_schema: Optional[Path] = typer.Option(
1309
+ None,
1310
+ "--provide-schema",
1311
+ exists=True,
1312
+ help="Path to a schema file (JSON, CSV, or text) to use for star resolution. "
1313
+ "Can be combined with --resolve-schema to merge file-extracted schema on top.",
1314
+ ),
1295
1315
  strict_schema: bool = typer.Option(
1296
1316
  False,
1297
1317
  "--strict-schema",
@@ -1434,6 +1454,17 @@ def graph_build(
1434
1454
  strict_schema=strict_schema,
1435
1455
  )
1436
1456
 
1457
+ # Load provided schema file if specified
1458
+ if provide_schema:
1459
+ from sqlglider.graph.formatters import load_schema_file
1460
+
1461
+ loaded_schema = load_schema_file(provide_schema)
1462
+ builder.set_schema(loaded_schema)
1463
+ console.print(
1464
+ f"[green]Loaded schema from {provide_schema} "
1465
+ f"({len(loaded_schema)} table(s))[/green]"
1466
+ )
1467
+
1437
1468
  # Collect file paths for schema extraction
1438
1469
  manifest_files, path_files = _collect_sql_files(
1439
1470
  paths, manifest, recursive, glob_pattern
@@ -303,6 +303,21 @@ class GraphBuilder:
303
303
  self.add_file(file_path, dialect)
304
304
  return self
305
305
 
306
+ def set_schema(self, schema: Dict[str, Dict[str, str]]) -> "GraphBuilder":
307
+ """Pre-seed the resolved schema from an external source.
308
+
309
+ This allows skipping the schema extraction pass when the schema
310
+ is already known (e.g., loaded from a file).
311
+
312
+ Args:
313
+ schema: Schema dictionary mapping table names to column dicts.
314
+
315
+ Returns:
316
+ self for method chaining
317
+ """
318
+ self._resolved_schema = schema
319
+ return self
320
+
306
321
  def extract_schemas(
307
322
  self,
308
323
  file_paths: List[Path],
@@ -325,6 +340,7 @@ class GraphBuilder:
325
340
  file_paths,
326
341
  dialect=file_dialect,
327
342
  sql_preprocessor=self.sql_preprocessor,
343
+ initial_schema=self._resolved_schema if self._resolved_schema else None,
328
344
  strict_schema=self.strict_schema,
329
345
  catalog_type=self.catalog_type,
330
346
  catalog_config=self.catalog_config,
@@ -1,8 +1,9 @@
1
- """Output formatters for resolved schema data."""
1
+ """Output formatters and parsers for resolved schema data."""
2
2
 
3
3
  import csv
4
4
  import json
5
5
  from io import StringIO
6
+ from pathlib import Path
6
7
  from typing import Dict
7
8
 
8
9
  SchemaDict = Dict[str, Dict[str, str]]
@@ -96,3 +97,93 @@ def format_schema(schema: SchemaDict, output_format: str = "text") -> str:
96
97
  f"Invalid schema format '{output_format}'. Use 'text', 'json', or 'csv'."
97
98
  )
98
99
  return formatter(schema)
100
+
101
+
102
+ def parse_schema_json(content: str) -> SchemaDict:
103
+ """Parse schema from JSON format.
104
+
105
+ Args:
106
+ content: JSON string with table -> {column -> type} structure.
107
+
108
+ Returns:
109
+ Parsed schema dictionary.
110
+ """
111
+ return json.loads(content) # type: ignore[no-any-return]
112
+
113
+
114
+ def parse_schema_csv(content: str) -> SchemaDict:
115
+ """Parse schema from CSV format.
116
+
117
+ Expects columns: table, column, type.
118
+
119
+ Args:
120
+ content: CSV string with header row.
121
+
122
+ Returns:
123
+ Parsed schema dictionary.
124
+ """
125
+ schema: SchemaDict = {}
126
+ reader = csv.DictReader(StringIO(content))
127
+ for row in reader:
128
+ table = row["table"]
129
+ column = row["column"]
130
+ col_type = row.get("type", "UNKNOWN")
131
+ if table not in schema:
132
+ schema[table] = {}
133
+ schema[table][column] = col_type
134
+ return schema
135
+
136
+
137
+ def parse_schema_text(content: str) -> SchemaDict:
138
+ """Parse schema from indented text format.
139
+
140
+ Expected format:
141
+ table_name
142
+ column1
143
+ column2
144
+
145
+ other_table
146
+ col_a
147
+
148
+ Args:
149
+ content: Text-formatted schema string.
150
+
151
+ Returns:
152
+ Parsed schema dictionary.
153
+ """
154
+ schema: SchemaDict = {}
155
+ current_table: str | None = None
156
+ for line in content.splitlines():
157
+ if not line or not line.strip():
158
+ continue
159
+ if line.startswith(" "):
160
+ if current_table is not None:
161
+ schema[current_table][line.strip()] = "UNKNOWN"
162
+ else:
163
+ current_table = line.strip()
164
+ schema[current_table] = {}
165
+ return schema
166
+
167
+
168
+ def load_schema_file(path: Path) -> SchemaDict:
169
+ """Load a schema file, auto-detecting format from extension.
170
+
171
+ `.json` → JSON, `.csv` → CSV, otherwise text.
172
+
173
+ Args:
174
+ path: Path to schema file.
175
+
176
+ Returns:
177
+ Parsed schema dictionary.
178
+
179
+ Raises:
180
+ FileNotFoundError: If the file does not exist.
181
+ """
182
+ content = path.read_text(encoding="utf-8")
183
+ suffix = path.suffix.lower()
184
+ if suffix == ".json":
185
+ return parse_schema_json(content)
186
+ elif suffix == ".csv":
187
+ return parse_schema_csv(content)
188
+ else:
189
+ return parse_schema_text(content)
@@ -11,6 +11,48 @@ from sqlglot.lineage import Node, lineage
11
11
  from sqlglider.global_models import AnalysisLevel
12
12
 
13
13
 
14
+ def _flat_schema_to_nested(
15
+ schema: Dict[str, Dict[str, str]],
16
+ ) -> Dict[str, object]:
17
+ """Convert flat dot-notation schema keys to the nested dict structure sqlglot expects.
18
+
19
+ sqlglot's MappingSchema requires consistent nesting depth across all tables.
20
+ Flat keys like ``"db.table"`` are split on dots and nested accordingly.
21
+ Shorter keys are padded with empty-string prefixes to match the max depth.
22
+
23
+ Examples::
24
+
25
+ {"users": {"id": "UNKNOWN"}}
26
+ → {"users": {"id": "UNKNOWN"}} (depth 1, no change)
27
+
28
+ {"db.users": {"id": "UNKNOWN"}, "my_view": {"x": "UNKNOWN"}}
29
+ → {"db": {"users": {"id": "UNKNOWN"}}, "": {"my_view": {"x": "UNKNOWN"}}}
30
+ """
31
+ if not schema:
32
+ return {}
33
+
34
+ # Split all keys into parts
35
+ entries = [(key.split("."), cols) for key, cols in schema.items()]
36
+ max_depth = max(len(parts) for parts, _ in entries)
37
+
38
+ # If all keys are single-part (unqualified), return as-is
39
+ if max_depth == 1:
40
+ return schema # type: ignore[return-value]
41
+
42
+ # Pad shorter keys with empty-string prefixes to match max depth
43
+ nested: Dict[str, object] = {}
44
+ for parts, cols in entries:
45
+ while len(parts) < max_depth:
46
+ parts.insert(0, "")
47
+ d: Dict[str, object] = nested
48
+ for part in parts[:-1]:
49
+ if part not in d:
50
+ d[part] = {}
51
+ d = d[part] # type: ignore[assignment]
52
+ d[parts[-1]] = cols
53
+ return nested
54
+
55
+
14
56
  class StarResolutionError(Exception):
15
57
  """Raised when SELECT * cannot be resolved and no_star mode is enabled."""
16
58
 
@@ -860,8 +902,10 @@ class LineageAnalyzer:
860
902
  current_query_sql = self.expr.sql(dialect=self.dialect)
861
903
 
862
904
  # Prune schema to only tables referenced in this query to avoid
863
- # sqlglot.lineage() performance degradation with large schema dicts
864
- pruned_schema: Optional[Dict[str, Dict[str, str]]] = None
905
+ # sqlglot.lineage() performance degradation with large schema dicts.
906
+ # Then convert from flat dot-notation keys to the nested dict structure
907
+ # that sqlglot's MappingSchema expects.
908
+ lineage_schema: Optional[Dict[str, object]] = None
865
909
  if self._file_schema:
866
910
  referenced = {t.lower() for t in self._get_query_tables()}
867
911
  pruned_schema = {
@@ -869,8 +913,8 @@ class LineageAnalyzer:
869
913
  for table, cols in self._file_schema.items()
870
914
  if table.lower() in referenced
871
915
  }
872
- if not pruned_schema:
873
- pruned_schema = None
916
+ if pruned_schema:
917
+ lineage_schema = _flat_schema_to_nested(pruned_schema)
874
918
 
875
919
  for col in columns_to_analyze:
876
920
  try:
@@ -883,7 +927,7 @@ class LineageAnalyzer:
883
927
  lineage_col,
884
928
  current_query_sql,
885
929
  dialect=self.dialect,
886
- schema=pruned_schema,
930
+ schema=lineage_schema,
887
931
  )
888
932
 
889
933
  # Collect all source columns
@@ -153,6 +153,7 @@ def extract_and_resolve_schema(
153
153
  file_paths: List[Path],
154
154
  dialect: str = "spark",
155
155
  sql_preprocessor: Optional[SqlPreprocessor] = None,
156
+ initial_schema: Optional[SchemaDict] = None,
156
157
  strict_schema: bool = False,
157
158
  catalog_type: Optional[str] = None,
158
159
  catalog_config: Optional[Dict[str, object]] = None,
@@ -167,6 +168,7 @@ def extract_and_resolve_schema(
167
168
  file_paths: SQL files to extract schema from.
168
169
  dialect: SQL dialect.
169
170
  sql_preprocessor: Optional SQL preprocessor.
171
+ initial_schema: Optional starting schema to build upon.
170
172
  strict_schema: If True, fail on ambiguous column attribution.
171
173
  catalog_type: Optional catalog provider name.
172
174
  catalog_config: Optional provider-specific configuration dict.
@@ -183,6 +185,7 @@ def extract_and_resolve_schema(
183
185
  file_paths,
184
186
  dialect=dialect,
185
187
  sql_preprocessor=sql_preprocessor,
188
+ initial_schema=initial_schema,
186
189
  strict_schema=strict_schema,
187
190
  console=console,
188
191
  )
@@ -7,6 +7,10 @@ from sqlglider.graph.formatters import (
7
7
  format_schema_csv,
8
8
  format_schema_json,
9
9
  format_schema_text,
10
+ load_schema_file,
11
+ parse_schema_csv,
12
+ parse_schema_json,
13
+ parse_schema_text,
10
14
  )
11
15
 
12
16
 
@@ -84,3 +88,61 @@ class TestFormatSchema:
84
88
  def test_invalid_format(self, sample_schema):
85
89
  with pytest.raises(ValueError, match="Invalid schema format"):
86
90
  format_schema(sample_schema, "xml")
91
+
92
+
93
+ class TestParseSchemaJson:
94
+ def test_round_trip(self, sample_schema):
95
+ content = format_schema_json(sample_schema)
96
+ parsed = parse_schema_json(content)
97
+ assert parsed == sample_schema
98
+
99
+ def test_empty(self):
100
+ assert parse_schema_json("{}") == {}
101
+
102
+
103
+ class TestParseSchemaCsv:
104
+ def test_round_trip(self, sample_schema):
105
+ content = format_schema_csv(sample_schema)
106
+ parsed = parse_schema_csv(content)
107
+ assert parsed == sample_schema
108
+
109
+ def test_empty(self):
110
+ parsed = parse_schema_csv("table,column,type\n")
111
+ assert parsed == {}
112
+
113
+
114
+ class TestParseSchemaText:
115
+ def test_round_trip(self, sample_schema):
116
+ content = format_schema_text(sample_schema)
117
+ parsed = parse_schema_text(content)
118
+ assert parsed == sample_schema
119
+
120
+ def test_empty(self):
121
+ assert parse_schema_text("") == {}
122
+
123
+ def test_single_table(self):
124
+ content = "users\n id\n name\n"
125
+ parsed = parse_schema_text(content)
126
+ assert parsed == {"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
127
+
128
+
129
+ class TestLoadSchemaFile:
130
+ def test_json_extension(self, tmp_path, sample_schema):
131
+ f = tmp_path / "schema.json"
132
+ f.write_text(format_schema_json(sample_schema))
133
+ assert load_schema_file(f) == sample_schema
134
+
135
+ def test_csv_extension(self, tmp_path, sample_schema):
136
+ f = tmp_path / "schema.csv"
137
+ f.write_text(format_schema_csv(sample_schema))
138
+ assert load_schema_file(f) == sample_schema
139
+
140
+ def test_txt_extension(self, tmp_path, sample_schema):
141
+ f = tmp_path / "schema.txt"
142
+ f.write_text(format_schema_text(sample_schema))
143
+ assert load_schema_file(f) == sample_schema
144
+
145
+ def test_no_extension_treated_as_text(self, tmp_path, sample_schema):
146
+ f = tmp_path / "schema"
147
+ f.write_text(format_schema_text(sample_schema))
148
+ assert load_schema_file(f) == sample_schema
@@ -3,7 +3,11 @@
3
3
  import pytest
4
4
 
5
5
  from sqlglider.global_models import AnalysisLevel
6
- from sqlglider.lineage.analyzer import LineageAnalyzer, StarResolutionError
6
+ from sqlglider.lineage.analyzer import (
7
+ LineageAnalyzer,
8
+ StarResolutionError,
9
+ _flat_schema_to_nested,
10
+ )
7
11
 
8
12
 
9
13
  class TestCaseInsensitiveForwardLineage:
@@ -3181,3 +3185,97 @@ class TestSchemaPruning:
3181
3185
  output_names = {item.output_name for r in results for item in r.lineage_items}
3182
3186
  assert "id" in output_names
3183
3187
  assert "email" in output_names
3188
+
3189
+
3190
+ class TestFlatSchemaToNested:
3191
+ """Tests for _flat_schema_to_nested conversion utility."""
3192
+
3193
+ def test_empty(self):
3194
+ assert _flat_schema_to_nested({}) == {}
3195
+
3196
+ def test_unqualified_passthrough(self):
3197
+ schema = {"users": {"id": "UNKNOWN"}}
3198
+ assert _flat_schema_to_nested(schema) == schema
3199
+
3200
+ def test_two_part_keys(self):
3201
+ schema = {"db.users": {"id": "UNKNOWN"}}
3202
+ result = _flat_schema_to_nested(schema)
3203
+ assert result == {"db": {"users": {"id": "UNKNOWN"}}}
3204
+
3205
+ def test_three_part_keys(self):
3206
+ schema = {"cat.db.users": {"id": "UNKNOWN"}}
3207
+ result = _flat_schema_to_nested(schema)
3208
+ assert result == {"cat": {"db": {"users": {"id": "UNKNOWN"}}}}
3209
+
3210
+ def test_mixed_depth_pads_shorter_keys(self):
3211
+ schema = {
3212
+ "my_view": {"x": "UNKNOWN"},
3213
+ "db.users": {"id": "UNKNOWN"},
3214
+ }
3215
+ result = _flat_schema_to_nested(schema)
3216
+ assert result == {
3217
+ "": {"my_view": {"x": "UNKNOWN"}},
3218
+ "db": {"users": {"id": "UNKNOWN"}},
3219
+ }
3220
+
3221
+
3222
+ class TestQualifiedSchemaKeys:
3223
+ """Tests for schema with qualified (dotted) table names."""
3224
+
3225
+ def test_qualified_star_expansion(self):
3226
+ """SELECT * resolves correctly with qualified schema keys."""
3227
+ sql = "SELECT * FROM mydb.users"
3228
+ schema = {"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
3229
+ analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
3230
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3231
+ items = {
3232
+ (item.source_name, item.output_name)
3233
+ for r in results
3234
+ for item in r.lineage_items
3235
+ }
3236
+ assert ("mydb.users.id", "id") in items
3237
+ assert ("mydb.users.name", "name") in items
3238
+
3239
+ def test_qualified_explicit_columns(self):
3240
+ """Explicit columns trace sources correctly with qualified schema keys."""
3241
+ sql = "SELECT id, name FROM mydb.users"
3242
+ schema = {"mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
3243
+ analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
3244
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3245
+ items = {
3246
+ (item.source_name, item.output_name)
3247
+ for r in results
3248
+ for item in r.lineage_items
3249
+ }
3250
+ assert ("mydb.users.id", "mydb.users.id") in items
3251
+ assert ("mydb.users.name", "mydb.users.name") in items
3252
+
3253
+ def test_three_part_qualified(self):
3254
+ """3-part qualified names (catalog.db.table) work correctly."""
3255
+ sql = "SELECT id FROM catalog.mydb.users"
3256
+ schema = {"catalog.mydb.users": {"id": "UNKNOWN"}}
3257
+ analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
3258
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3259
+ items = [
3260
+ (item.source_name, item.output_name)
3261
+ for r in results
3262
+ for item in r.lineage_items
3263
+ ]
3264
+ assert len(items) == 1
3265
+ assert items[0] == ("catalog.mydb.users.id", "catalog.mydb.users.id")
3266
+
3267
+ def test_mixed_qualified_and_unqualified(self):
3268
+ """Mix of qualified and unqualified table names in schema."""
3269
+ sql = "SELECT * FROM my_view"
3270
+ schema = {
3271
+ "my_view": {"id": "UNKNOWN"},
3272
+ "mydb.users": {"id": "UNKNOWN", "name": "UNKNOWN"},
3273
+ }
3274
+ analyzer = LineageAnalyzer(sql, dialect="spark", schema=schema)
3275
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
3276
+ items = {
3277
+ (item.source_name, item.output_name)
3278
+ for r in results
3279
+ for item in r.lineage_items
3280
+ }
3281
+ assert ("my_view.id", "id") in items
@@ -1884,3 +1884,141 @@ class TestTablesScrapeCommand:
1884
1884
 
1885
1885
  data = json.loads(result.stdout)
1886
1886
  assert "customers" in data
1887
+
1888
+
1889
+ class TestProvideSchema:
1890
+ """Tests for --provide-schema on lineage and graph build commands."""
1891
+
1892
+ @pytest.fixture
1893
+ def star_query_file(self, tmp_path):
1894
+ """SQL file with SELECT * that needs schema to resolve."""
1895
+ sql_file = tmp_path / "star.sql"
1896
+ sql_file.write_text("SELECT * FROM users")
1897
+ return sql_file
1898
+
1899
+ @pytest.fixture
1900
+ def schema_json_file(self, tmp_path):
1901
+ schema = tmp_path / "schema.json"
1902
+ schema.write_text('{"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}')
1903
+ return schema
1904
+
1905
+ def test_lineage_with_provide_schema(self, star_query_file, schema_json_file):
1906
+ """Test that --provide-schema resolves SELECT * in lineage."""
1907
+ result = runner.invoke(
1908
+ app,
1909
+ [
1910
+ "lineage",
1911
+ str(star_query_file),
1912
+ "--provide-schema",
1913
+ str(schema_json_file),
1914
+ "--output-format",
1915
+ "json",
1916
+ ],
1917
+ )
1918
+
1919
+ assert result.exit_code == 0
1920
+ import json
1921
+
1922
+ data = json.loads(result.stdout)
1923
+ columns = [item["output_name"] for item in data["queries"][0]["lineage"]]
1924
+ assert "id" in columns
1925
+ assert "name" in columns
1926
+
1927
+ def test_graph_build_with_provide_schema(
1928
+ self, star_query_file, schema_json_file, tmp_path
1929
+ ):
1930
+ """Test that --provide-schema works with graph build."""
1931
+ output = tmp_path / "graph.json"
1932
+ result = runner.invoke(
1933
+ app,
1934
+ [
1935
+ "graph",
1936
+ "build",
1937
+ str(star_query_file),
1938
+ "-o",
1939
+ str(output),
1940
+ "--provide-schema",
1941
+ str(schema_json_file),
1942
+ ],
1943
+ )
1944
+
1945
+ assert result.exit_code == 0
1946
+ assert output.exists()
1947
+ import json
1948
+
1949
+ graph = json.loads(output.read_text())
1950
+ assert graph["metadata"]["total_nodes"] > 0
1951
+
1952
+
1953
+ class TestProvideSchemaRoundTrip:
1954
+ """Integration: tables scrape -> schema file -> graph build --provide-schema."""
1955
+
1956
+ @pytest.fixture
1957
+ def sql_dir(self, tmp_path):
1958
+ d = tmp_path / "sql"
1959
+ d.mkdir()
1960
+ (d / "a.sql").write_text(
1961
+ "CREATE TABLE output_table AS SELECT c.id, c.name FROM customers c;"
1962
+ )
1963
+ (d / "b.sql").write_text("SELECT * FROM output_table")
1964
+ return d
1965
+
1966
+ @pytest.mark.parametrize(
1967
+ "fmt,ext", [("json", ".json"), ("csv", ".csv"), ("text", ".txt")]
1968
+ )
1969
+ def test_round_trip(self, sql_dir, tmp_path, fmt, ext):
1970
+ """Scrape schema, save to file, then use --provide-schema to build graph."""
1971
+ schema_file = tmp_path / f"schema{ext}"
1972
+ graph_provided = tmp_path / "graph_provided.json"
1973
+ graph_resolved = tmp_path / "graph_resolved.json"
1974
+
1975
+ # Step 1: Scrape schema
1976
+ scrape_result = runner.invoke(
1977
+ app,
1978
+ ["tables", "scrape", str(sql_dir), "-f", fmt, "-o", str(schema_file)],
1979
+ )
1980
+ assert scrape_result.exit_code == 0
1981
+ assert schema_file.exists()
1982
+
1983
+ # Step 2: Build graph with --provide-schema
1984
+ result_provided = runner.invoke(
1985
+ app,
1986
+ [
1987
+ "graph",
1988
+ "build",
1989
+ str(sql_dir),
1990
+ "-o",
1991
+ str(graph_provided),
1992
+ "--provide-schema",
1993
+ str(schema_file),
1994
+ ],
1995
+ )
1996
+ assert result_provided.exit_code == 0
1997
+
1998
+ # Step 3: Build graph with --resolve-schema
1999
+ result_resolved = runner.invoke(
2000
+ app,
2001
+ [
2002
+ "graph",
2003
+ "build",
2004
+ str(sql_dir),
2005
+ "-o",
2006
+ str(graph_resolved),
2007
+ "--resolve-schema",
2008
+ ],
2009
+ )
2010
+ assert result_resolved.exit_code == 0
2011
+
2012
+ # Step 4: Compare graphs (nodes and edges should match)
2013
+ import json
2014
+
2015
+ g1 = json.loads(graph_provided.read_text())
2016
+ g2 = json.loads(graph_resolved.read_text())
2017
+
2018
+ nodes1 = sorted([n["identifier"] for n in g1["nodes"]])
2019
+ nodes2 = sorted([n["identifier"] for n in g2["nodes"]])
2020
+ assert nodes1 == nodes2
2021
+
2022
+ edges1 = sorted([(e["source_node"], e["target_node"]) for e in g1["edges"]])
2023
+ edges2 = sorted([(e["source_node"], e["target_node"]) for e in g2["edges"]])
2024
+ assert edges1 == edges2
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes