sql-glider 0.1.14__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sql_glider-0.1.14 → sql_glider-0.1.15}/PKG-INFO +1 -1
  2. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/_version.py +2 -2
  3. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/cli.py +32 -1
  4. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/builder.py +16 -0
  5. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/formatters.py +92 -1
  6. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/schema/extractor.py +3 -0
  7. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_formatters.py +62 -0
  8. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/test_cli.py +138 -0
  9. {sql_glider-0.1.14 → sql_glider-0.1.15}/.github/workflows/ci.yml +0 -0
  10. {sql_glider-0.1.14 → sql_glider-0.1.15}/.github/workflows/publish.yml +0 -0
  11. {sql_glider-0.1.14 → sql_glider-0.1.15}/.gitignore +0 -0
  12. {sql_glider-0.1.14 → sql_glider-0.1.15}/.python-version +0 -0
  13. {sql_glider-0.1.14 → sql_glider-0.1.15}/ARCHITECTURE.md +0 -0
  14. {sql_glider-0.1.14 → sql_glider-0.1.15}/CLAUDE.md +0 -0
  15. {sql_glider-0.1.14 → sql_glider-0.1.15}/LICENSE +0 -0
  16. {sql_glider-0.1.14 → sql_glider-0.1.15}/README.md +0 -0
  17. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-05-column-level-lineage.md +0 -0
  18. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-05-reverse-lineage.md +0 -0
  19. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-06-config-file-support.md +0 -0
  20. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-06-graph-lineage.md +0 -0
  21. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  22. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-07-sample-data-model.md +0 -0
  23. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-07-sql-templating.md +0 -0
  24. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-08-tables-command.md +0 -0
  25. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-09-graph-query-paths.md +0 -0
  26. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-13-dissect-command.md +0 -0
  27. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2025-12-14-tables-pull-command.md +0 -0
  28. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  29. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
  30. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
  31. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-no-star-flag.md +0 -0
  32. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-resolve-schema.md +0 -0
  33. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
  34. {sql_glider-0.1.14 → sql_glider-0.1.15}/plans/2026-01-29-tables-scrape-command.md +0 -0
  35. {sql_glider-0.1.14 → sql_glider-0.1.15}/pyproject.toml +0 -0
  36. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/README.md +0 -0
  37. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/expire_dim_customer.sql +0 -0
  38. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/load_fact_orders.sql +0 -0
  39. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/load_fact_payments.sql +0 -0
  40. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/merge_dim_customer.sql +0 -0
  41. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/merge_dim_product.sql +0 -0
  42. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  43. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/complex/conditional_merge.sql +0 -0
  44. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/complex/cte_insert.sql +0 -0
  45. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/complex/multi_table_transform.sql +0 -0
  46. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/dim_customer.sql +0 -0
  47. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/dim_product.sql +0 -0
  48. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/fact_orders.sql +0 -0
  49. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/fact_payments.sql +0 -0
  50. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_addresses.sql +0 -0
  51. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_customers.sql +0 -0
  52. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_order_items.sql +0 -0
  53. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_orders.sql +0 -0
  54. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_payments.sql +0 -0
  55. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/raw_products.sql +0 -0
  56. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_customers.sql +0 -0
  57. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_orders.sql +0 -0
  58. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_payments.sql +0 -0
  59. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/ddl/stg_products.sql +0 -0
  60. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  61. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  62. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  63. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  64. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/maintenance/update_product_status.sql +0 -0
  65. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  66. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  67. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  68. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  69. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_customers.sql +0 -0
  70. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_orders.sql +0 -0
  71. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_payments.sql +0 -0
  72. {sql_glider-0.1.14 → sql_glider-0.1.15}/sample_data_model/staging/load_stg_products.sql +0 -0
  73. {sql_glider-0.1.14 → sql_glider-0.1.15}/sqlglider.toml.example +0 -0
  74. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/__init__.py +0 -0
  75. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/__init__.py +0 -0
  76. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/base.py +0 -0
  77. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/databricks.py +0 -0
  78. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/catalog/registry.py +0 -0
  79. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/__init__.py +0 -0
  80. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/analyzer.py +0 -0
  81. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/formatters.py +0 -0
  82. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/dissection/models.py +0 -0
  83. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/global_models.py +0 -0
  84. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/__init__.py +0 -0
  85. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/merge.py +0 -0
  86. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/models.py +0 -0
  87. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/query.py +0 -0
  88. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/graph/serialization.py +0 -0
  89. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/lineage/__init__.py +0 -0
  90. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/lineage/analyzer.py +0 -0
  91. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/lineage/formatters.py +0 -0
  92. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/schema/__init__.py +0 -0
  93. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/__init__.py +0 -0
  94. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/base.py +0 -0
  95. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/jinja.py +0 -0
  96. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/registry.py +0 -0
  97. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/templating/variables.py +0 -0
  98. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/__init__.py +0 -0
  99. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/config.py +0 -0
  100. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/file_utils.py +0 -0
  101. {sql_glider-0.1.14 → sql_glider-0.1.15}/src/sqlglider/utils/schema.py +0 -0
  102. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/__init__.py +0 -0
  103. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  104. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  105. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  106. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  107. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  108. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  109. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte.sql +0 -0
  110. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  111. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
  112. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  113. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_multi.sql +0 -0
  114. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  115. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  116. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  117. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_tables.sql +0 -0
  118. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_view.sql +0 -0
  119. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  120. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/fixtures/sample_manifest.csv +0 -0
  121. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/__init__.py +0 -0
  122. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/__init__.py +0 -0
  123. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/test_base.py +0 -0
  124. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/test_databricks.py +0 -0
  125. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/catalog/test_registry.py +0 -0
  126. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/__init__.py +0 -0
  127. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  128. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/test_formatters.py +0 -0
  129. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/dissection/test_models.py +0 -0
  130. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/__init__.py +0 -0
  131. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_builder.py +0 -0
  132. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_merge.py +0 -0
  133. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_models.py +0 -0
  134. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_query.py +0 -0
  135. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/graph/test_serialization.py +0 -0
  136. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/lineage/__init__.py +0 -0
  137. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/lineage/test_analyzer.py +0 -0
  138. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/lineage/test_formatters.py +0 -0
  139. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/schema/__init__.py +0 -0
  140. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/schema/test_extractor.py +0 -0
  141. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/__init__.py +0 -0
  142. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_base.py +0 -0
  143. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_jinja.py +0 -0
  144. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_registry.py +0 -0
  145. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/templating/test_variables.py +0 -0
  146. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/__init__.py +0 -0
  147. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/test_config.py +0 -0
  148. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/test_file_utils.py +0 -0
  149. {sql_glider-0.1.14 → sql_glider-0.1.15}/tests/sqlglider/utils/test_schema.py +0 -0
  150. {sql_glider-0.1.14 → sql_glider-0.1.15}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.14'
32
- __version_tuple__ = version_tuple = (0, 1, 14)
31
+ __version__ = version = '0.1.15'
32
+ __version_tuple__ = version_tuple = (0, 1, 15)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -171,6 +171,12 @@ def lineage(
171
171
  "--no-star",
172
172
  help="Fail if SELECT * cannot be resolved to actual columns",
173
173
  ),
174
+ provide_schema: Optional[Path] = typer.Option(
175
+ None,
176
+ "--provide-schema",
177
+ exists=True,
178
+ help="Path to a schema file (JSON, CSV, or text) for star resolution",
179
+ ),
174
180
  ) -> None:
175
181
  """
176
182
  Analyze column or table lineage for a SQL file.
@@ -266,8 +272,15 @@ def lineage(
266
272
  source_path=source_path,
267
273
  )
268
274
 
275
+ # Load provided schema if specified
276
+ schema = None
277
+ if provide_schema:
278
+ from sqlglider.graph.formatters import load_schema_file
279
+
280
+ schema = load_schema_file(provide_schema)
281
+
269
282
  # Create analyzer
270
- analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
283
+ analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star, schema=schema)
271
284
 
272
285
  # Unified lineage analysis (handles both single and multi-query files)
273
286
  results = analyzer.analyze_queries(
@@ -1292,6 +1305,13 @@ def graph_build(
1292
1305
  "--dump-schema-format",
1293
1306
  help="Format for dumped schema: 'text' (default), 'json', or 'csv'",
1294
1307
  ),
1308
+ provide_schema: Optional[Path] = typer.Option(
1309
+ None,
1310
+ "--provide-schema",
1311
+ exists=True,
1312
+ help="Path to a schema file (JSON, CSV, or text) to use for star resolution. "
1313
+ "Can be combined with --resolve-schema to merge file-extracted schema on top.",
1314
+ ),
1295
1315
  strict_schema: bool = typer.Option(
1296
1316
  False,
1297
1317
  "--strict-schema",
@@ -1434,6 +1454,17 @@ def graph_build(
1434
1454
  strict_schema=strict_schema,
1435
1455
  )
1436
1456
 
1457
+ # Load provided schema file if specified
1458
+ if provide_schema:
1459
+ from sqlglider.graph.formatters import load_schema_file
1460
+
1461
+ loaded_schema = load_schema_file(provide_schema)
1462
+ builder.set_schema(loaded_schema)
1463
+ console.print(
1464
+ f"[green]Loaded schema from {provide_schema} "
1465
+ f"({len(loaded_schema)} table(s))[/green]"
1466
+ )
1467
+
1437
1468
  # Collect file paths for schema extraction
1438
1469
  manifest_files, path_files = _collect_sql_files(
1439
1470
  paths, manifest, recursive, glob_pattern
@@ -303,6 +303,21 @@ class GraphBuilder:
303
303
  self.add_file(file_path, dialect)
304
304
  return self
305
305
 
306
+ def set_schema(self, schema: Dict[str, Dict[str, str]]) -> "GraphBuilder":
307
+ """Pre-seed the resolved schema from an external source.
308
+
309
+ This allows skipping the schema extraction pass when the schema
310
+ is already known (e.g., loaded from a file).
311
+
312
+ Args:
313
+ schema: Schema dictionary mapping table names to column dicts.
314
+
315
+ Returns:
316
+ self for method chaining
317
+ """
318
+ self._resolved_schema = schema
319
+ return self
320
+
306
321
  def extract_schemas(
307
322
  self,
308
323
  file_paths: List[Path],
@@ -325,6 +340,7 @@ class GraphBuilder:
325
340
  file_paths,
326
341
  dialect=file_dialect,
327
342
  sql_preprocessor=self.sql_preprocessor,
343
+ initial_schema=self._resolved_schema if self._resolved_schema else None,
328
344
  strict_schema=self.strict_schema,
329
345
  catalog_type=self.catalog_type,
330
346
  catalog_config=self.catalog_config,
@@ -1,8 +1,9 @@
1
- """Output formatters for resolved schema data."""
1
+ """Output formatters and parsers for resolved schema data."""
2
2
 
3
3
  import csv
4
4
  import json
5
5
  from io import StringIO
6
+ from pathlib import Path
6
7
  from typing import Dict
7
8
 
8
9
  SchemaDict = Dict[str, Dict[str, str]]
@@ -96,3 +97,93 @@ def format_schema(schema: SchemaDict, output_format: str = "text") -> str:
96
97
  f"Invalid schema format '{output_format}'. Use 'text', 'json', or 'csv'."
97
98
  )
98
99
  return formatter(schema)
100
+
101
+
102
+ def parse_schema_json(content: str) -> SchemaDict:
103
+ """Parse schema from JSON format.
104
+
105
+ Args:
106
+ content: JSON string with table -> {column -> type} structure.
107
+
108
+ Returns:
109
+ Parsed schema dictionary.
110
+ """
111
+ return json.loads(content) # type: ignore[no-any-return]
112
+
113
+
114
+ def parse_schema_csv(content: str) -> SchemaDict:
115
+ """Parse schema from CSV format.
116
+
117
+ Expects columns: table, column, type.
118
+
119
+ Args:
120
+ content: CSV string with header row.
121
+
122
+ Returns:
123
+ Parsed schema dictionary.
124
+ """
125
+ schema: SchemaDict = {}
126
+ reader = csv.DictReader(StringIO(content))
127
+ for row in reader:
128
+ table = row["table"]
129
+ column = row["column"]
130
+ col_type = row.get("type", "UNKNOWN")
131
+ if table not in schema:
132
+ schema[table] = {}
133
+ schema[table][column] = col_type
134
+ return schema
135
+
136
+
137
+ def parse_schema_text(content: str) -> SchemaDict:
138
+ """Parse schema from indented text format.
139
+
140
+ Expected format:
141
+ table_name
142
+ column1
143
+ column2
144
+
145
+ other_table
146
+ col_a
147
+
148
+ Args:
149
+ content: Text-formatted schema string.
150
+
151
+ Returns:
152
+ Parsed schema dictionary.
153
+ """
154
+ schema: SchemaDict = {}
155
+ current_table: str | None = None
156
+ for line in content.splitlines():
157
+ if not line or not line.strip():
158
+ continue
159
+ if line.startswith(" "):
160
+ if current_table is not None:
161
+ schema[current_table][line.strip()] = "UNKNOWN"
162
+ else:
163
+ current_table = line.strip()
164
+ schema[current_table] = {}
165
+ return schema
166
+
167
+
168
+ def load_schema_file(path: Path) -> SchemaDict:
169
+ """Load a schema file, auto-detecting format from extension.
170
+
171
+ `.json` → JSON, `.csv` → CSV, otherwise text.
172
+
173
+ Args:
174
+ path: Path to schema file.
175
+
176
+ Returns:
177
+ Parsed schema dictionary.
178
+
179
+ Raises:
180
+ FileNotFoundError: If the file does not exist.
181
+ """
182
+ content = path.read_text(encoding="utf-8")
183
+ suffix = path.suffix.lower()
184
+ if suffix == ".json":
185
+ return parse_schema_json(content)
186
+ elif suffix == ".csv":
187
+ return parse_schema_csv(content)
188
+ else:
189
+ return parse_schema_text(content)
@@ -153,6 +153,7 @@ def extract_and_resolve_schema(
153
153
  file_paths: List[Path],
154
154
  dialect: str = "spark",
155
155
  sql_preprocessor: Optional[SqlPreprocessor] = None,
156
+ initial_schema: Optional[SchemaDict] = None,
156
157
  strict_schema: bool = False,
157
158
  catalog_type: Optional[str] = None,
158
159
  catalog_config: Optional[Dict[str, object]] = None,
@@ -167,6 +168,7 @@ def extract_and_resolve_schema(
167
168
  file_paths: SQL files to extract schema from.
168
169
  dialect: SQL dialect.
169
170
  sql_preprocessor: Optional SQL preprocessor.
171
+ initial_schema: Optional starting schema to build upon.
170
172
  strict_schema: If True, fail on ambiguous column attribution.
171
173
  catalog_type: Optional catalog provider name.
172
174
  catalog_config: Optional provider-specific configuration dict.
@@ -183,6 +185,7 @@ def extract_and_resolve_schema(
183
185
  file_paths,
184
186
  dialect=dialect,
185
187
  sql_preprocessor=sql_preprocessor,
188
+ initial_schema=initial_schema,
186
189
  strict_schema=strict_schema,
187
190
  console=console,
188
191
  )
@@ -7,6 +7,10 @@ from sqlglider.graph.formatters import (
7
7
  format_schema_csv,
8
8
  format_schema_json,
9
9
  format_schema_text,
10
+ load_schema_file,
11
+ parse_schema_csv,
12
+ parse_schema_json,
13
+ parse_schema_text,
10
14
  )
11
15
 
12
16
 
@@ -84,3 +88,61 @@ class TestFormatSchema:
84
88
  def test_invalid_format(self, sample_schema):
85
89
  with pytest.raises(ValueError, match="Invalid schema format"):
86
90
  format_schema(sample_schema, "xml")
91
+
92
+
93
+ class TestParseSchemaJson:
94
+ def test_round_trip(self, sample_schema):
95
+ content = format_schema_json(sample_schema)
96
+ parsed = parse_schema_json(content)
97
+ assert parsed == sample_schema
98
+
99
+ def test_empty(self):
100
+ assert parse_schema_json("{}") == {}
101
+
102
+
103
+ class TestParseSchemaCsv:
104
+ def test_round_trip(self, sample_schema):
105
+ content = format_schema_csv(sample_schema)
106
+ parsed = parse_schema_csv(content)
107
+ assert parsed == sample_schema
108
+
109
+ def test_empty(self):
110
+ parsed = parse_schema_csv("table,column,type\n")
111
+ assert parsed == {}
112
+
113
+
114
+ class TestParseSchemaText:
115
+ def test_round_trip(self, sample_schema):
116
+ content = format_schema_text(sample_schema)
117
+ parsed = parse_schema_text(content)
118
+ assert parsed == sample_schema
119
+
120
+ def test_empty(self):
121
+ assert parse_schema_text("") == {}
122
+
123
+ def test_single_table(self):
124
+ content = "users\n id\n name\n"
125
+ parsed = parse_schema_text(content)
126
+ assert parsed == {"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}
127
+
128
+
129
+ class TestLoadSchemaFile:
130
+ def test_json_extension(self, tmp_path, sample_schema):
131
+ f = tmp_path / "schema.json"
132
+ f.write_text(format_schema_json(sample_schema))
133
+ assert load_schema_file(f) == sample_schema
134
+
135
+ def test_csv_extension(self, tmp_path, sample_schema):
136
+ f = tmp_path / "schema.csv"
137
+ f.write_text(format_schema_csv(sample_schema))
138
+ assert load_schema_file(f) == sample_schema
139
+
140
+ def test_txt_extension(self, tmp_path, sample_schema):
141
+ f = tmp_path / "schema.txt"
142
+ f.write_text(format_schema_text(sample_schema))
143
+ assert load_schema_file(f) == sample_schema
144
+
145
+ def test_no_extension_treated_as_text(self, tmp_path, sample_schema):
146
+ f = tmp_path / "schema"
147
+ f.write_text(format_schema_text(sample_schema))
148
+ assert load_schema_file(f) == sample_schema
@@ -1884,3 +1884,141 @@ class TestTablesScrapeCommand:
1884
1884
 
1885
1885
  data = json.loads(result.stdout)
1886
1886
  assert "customers" in data
1887
+
1888
+
1889
+ class TestProvideSchema:
1890
+ """Tests for --provide-schema on lineage and graph build commands."""
1891
+
1892
+ @pytest.fixture
1893
+ def star_query_file(self, tmp_path):
1894
+ """SQL file with SELECT * that needs schema to resolve."""
1895
+ sql_file = tmp_path / "star.sql"
1896
+ sql_file.write_text("SELECT * FROM users")
1897
+ return sql_file
1898
+
1899
+ @pytest.fixture
1900
+ def schema_json_file(self, tmp_path):
1901
+ schema = tmp_path / "schema.json"
1902
+ schema.write_text('{"users": {"id": "UNKNOWN", "name": "UNKNOWN"}}')
1903
+ return schema
1904
+
1905
+ def test_lineage_with_provide_schema(self, star_query_file, schema_json_file):
1906
+ """Test that --provide-schema resolves SELECT * in lineage."""
1907
+ result = runner.invoke(
1908
+ app,
1909
+ [
1910
+ "lineage",
1911
+ str(star_query_file),
1912
+ "--provide-schema",
1913
+ str(schema_json_file),
1914
+ "--output-format",
1915
+ "json",
1916
+ ],
1917
+ )
1918
+
1919
+ assert result.exit_code == 0
1920
+ import json
1921
+
1922
+ data = json.loads(result.stdout)
1923
+ columns = [item["output_name"] for item in data["queries"][0]["lineage"]]
1924
+ assert "id" in columns
1925
+ assert "name" in columns
1926
+
1927
+ def test_graph_build_with_provide_schema(
1928
+ self, star_query_file, schema_json_file, tmp_path
1929
+ ):
1930
+ """Test that --provide-schema works with graph build."""
1931
+ output = tmp_path / "graph.json"
1932
+ result = runner.invoke(
1933
+ app,
1934
+ [
1935
+ "graph",
1936
+ "build",
1937
+ str(star_query_file),
1938
+ "-o",
1939
+ str(output),
1940
+ "--provide-schema",
1941
+ str(schema_json_file),
1942
+ ],
1943
+ )
1944
+
1945
+ assert result.exit_code == 0
1946
+ assert output.exists()
1947
+ import json
1948
+
1949
+ graph = json.loads(output.read_text())
1950
+ assert graph["metadata"]["total_nodes"] > 0
1951
+
1952
+
1953
+ class TestProvideSchemaRoundTrip:
1954
+ """Integration: tables scrape -> schema file -> graph build --provide-schema."""
1955
+
1956
+ @pytest.fixture
1957
+ def sql_dir(self, tmp_path):
1958
+ d = tmp_path / "sql"
1959
+ d.mkdir()
1960
+ (d / "a.sql").write_text(
1961
+ "CREATE TABLE output_table AS SELECT c.id, c.name FROM customers c;"
1962
+ )
1963
+ (d / "b.sql").write_text("SELECT * FROM output_table")
1964
+ return d
1965
+
1966
+ @pytest.mark.parametrize(
1967
+ "fmt,ext", [("json", ".json"), ("csv", ".csv"), ("text", ".txt")]
1968
+ )
1969
+ def test_round_trip(self, sql_dir, tmp_path, fmt, ext):
1970
+ """Scrape schema, save to file, then use --provide-schema to build graph."""
1971
+ schema_file = tmp_path / f"schema{ext}"
1972
+ graph_provided = tmp_path / "graph_provided.json"
1973
+ graph_resolved = tmp_path / "graph_resolved.json"
1974
+
1975
+ # Step 1: Scrape schema
1976
+ scrape_result = runner.invoke(
1977
+ app,
1978
+ ["tables", "scrape", str(sql_dir), "-f", fmt, "-o", str(schema_file)],
1979
+ )
1980
+ assert scrape_result.exit_code == 0
1981
+ assert schema_file.exists()
1982
+
1983
+ # Step 2: Build graph with --provide-schema
1984
+ result_provided = runner.invoke(
1985
+ app,
1986
+ [
1987
+ "graph",
1988
+ "build",
1989
+ str(sql_dir),
1990
+ "-o",
1991
+ str(graph_provided),
1992
+ "--provide-schema",
1993
+ str(schema_file),
1994
+ ],
1995
+ )
1996
+ assert result_provided.exit_code == 0
1997
+
1998
+ # Step 3: Build graph with --resolve-schema
1999
+ result_resolved = runner.invoke(
2000
+ app,
2001
+ [
2002
+ "graph",
2003
+ "build",
2004
+ str(sql_dir),
2005
+ "-o",
2006
+ str(graph_resolved),
2007
+ "--resolve-schema",
2008
+ ],
2009
+ )
2010
+ assert result_resolved.exit_code == 0
2011
+
2012
+ # Step 4: Compare graphs (nodes and edges should match)
2013
+ import json
2014
+
2015
+ g1 = json.loads(graph_provided.read_text())
2016
+ g2 = json.loads(graph_resolved.read_text())
2017
+
2018
+ nodes1 = sorted([n["identifier"] for n in g1["nodes"]])
2019
+ nodes2 = sorted([n["identifier"] for n in g2["nodes"]])
2020
+ assert nodes1 == nodes2
2021
+
2022
+ edges1 = sorted([(e["source_node"], e["target_node"]) for e in g1["edges"]])
2023
+ edges2 = sorted([(e["source_node"], e["target_node"]) for e in g2["edges"]])
2024
+ assert edges1 == edges2
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes