sql-glider 0.1.10__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {sql_glider-0.1.10 → sql_glider-0.1.12}/PKG-INFO +1 -1
  2. sql_glider-0.1.12/plans/2026-01-28-sparksql-table-extraction.md +58 -0
  3. sql_glider-0.1.12/plans/2026-01-29-no-star-flag.md +47 -0
  4. sql_glider-0.1.12/plans/2026-01-29-resolve-schema.md +49 -0
  5. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/_version.py +2 -2
  6. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/cli.py +101 -2
  7. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/graph/builder.py +206 -20
  8. sql_glider-0.1.12/src/sqlglider/graph/formatters.py +98 -0
  9. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/lineage/analyzer.py +217 -3
  10. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/utils/config.py +5 -0
  11. sql_glider-0.1.12/src/sqlglider/utils/schema.py +62 -0
  12. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/graph/test_builder.py +211 -0
  13. sql_glider-0.1.12/tests/sqlglider/graph/test_formatters.py +86 -0
  14. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/lineage/test_analyzer.py +211 -1
  15. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/test_cli.py +172 -0
  16. sql_glider-0.1.12/tests/sqlglider/utils/test_schema.py +55 -0
  17. {sql_glider-0.1.10 → sql_glider-0.1.12}/.github/workflows/ci.yml +0 -0
  18. {sql_glider-0.1.10 → sql_glider-0.1.12}/.github/workflows/publish.yml +0 -0
  19. {sql_glider-0.1.10 → sql_glider-0.1.12}/.gitignore +0 -0
  20. {sql_glider-0.1.10 → sql_glider-0.1.12}/.python-version +0 -0
  21. {sql_glider-0.1.10 → sql_glider-0.1.12}/ARCHITECTURE.md +0 -0
  22. {sql_glider-0.1.10 → sql_glider-0.1.12}/CLAUDE.md +0 -0
  23. {sql_glider-0.1.10 → sql_glider-0.1.12}/LICENSE +0 -0
  24. {sql_glider-0.1.10 → sql_glider-0.1.12}/README.md +0 -0
  25. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-05-column-level-lineage.md +0 -0
  26. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-05-reverse-lineage.md +0 -0
  27. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-06-config-file-support.md +0 -0
  28. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-06-graph-lineage.md +0 -0
  29. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  30. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-07-sample-data-model.md +0 -0
  31. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-07-sql-templating.md +0 -0
  32. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-08-tables-command.md +0 -0
  33. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-09-graph-query-paths.md +0 -0
  34. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-13-dissect-command.md +0 -0
  35. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2025-12-14-tables-pull-command.md +0 -0
  36. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  37. {sql_glider-0.1.10 → sql_glider-0.1.12}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
  38. {sql_glider-0.1.10 → sql_glider-0.1.12}/pyproject.toml +0 -0
  39. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/README.md +0 -0
  40. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/business/expire_dim_customer.sql +0 -0
  41. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/business/load_fact_orders.sql +0 -0
  42. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/business/load_fact_payments.sql +0 -0
  43. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/business/merge_dim_customer.sql +0 -0
  44. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/business/merge_dim_product.sql +0 -0
  45. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  46. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/complex/conditional_merge.sql +0 -0
  47. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/complex/cte_insert.sql +0 -0
  48. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/complex/multi_table_transform.sql +0 -0
  49. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/dim_customer.sql +0 -0
  50. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/dim_product.sql +0 -0
  51. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/fact_orders.sql +0 -0
  52. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/fact_payments.sql +0 -0
  53. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/raw_addresses.sql +0 -0
  54. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/raw_customers.sql +0 -0
  55. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/raw_order_items.sql +0 -0
  56. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/raw_orders.sql +0 -0
  57. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/raw_payments.sql +0 -0
  58. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/raw_products.sql +0 -0
  59. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/stg_customers.sql +0 -0
  60. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/stg_orders.sql +0 -0
  61. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/stg_payments.sql +0 -0
  62. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/ddl/stg_products.sql +0 -0
  63. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  64. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  65. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  66. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  67. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/maintenance/update_product_status.sql +0 -0
  68. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  69. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  70. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  71. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  72. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/staging/load_stg_customers.sql +0 -0
  73. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/staging/load_stg_orders.sql +0 -0
  74. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/staging/load_stg_payments.sql +0 -0
  75. {sql_glider-0.1.10 → sql_glider-0.1.12}/sample_data_model/staging/load_stg_products.sql +0 -0
  76. {sql_glider-0.1.10 → sql_glider-0.1.12}/sqlglider.toml.example +0 -0
  77. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/__init__.py +0 -0
  78. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/catalog/__init__.py +0 -0
  79. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/catalog/base.py +0 -0
  80. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/catalog/databricks.py +0 -0
  81. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/catalog/registry.py +0 -0
  82. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/dissection/__init__.py +0 -0
  83. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/dissection/analyzer.py +0 -0
  84. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/dissection/formatters.py +0 -0
  85. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/dissection/models.py +0 -0
  86. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/global_models.py +0 -0
  87. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/graph/__init__.py +0 -0
  88. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/graph/merge.py +0 -0
  89. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/graph/models.py +0 -0
  90. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/graph/query.py +0 -0
  91. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/graph/serialization.py +0 -0
  92. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/lineage/__init__.py +0 -0
  93. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/lineage/formatters.py +0 -0
  94. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/templating/__init__.py +0 -0
  95. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/templating/base.py +0 -0
  96. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/templating/jinja.py +0 -0
  97. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/templating/registry.py +0 -0
  98. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/templating/variables.py +0 -0
  99. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/utils/__init__.py +0 -0
  100. {sql_glider-0.1.10 → sql_glider-0.1.12}/src/sqlglider/utils/file_utils.py +0 -0
  101. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/__init__.py +0 -0
  102. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  103. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  104. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  105. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  106. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  107. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  108. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_cte.sql +0 -0
  109. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  110. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
  111. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  112. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_multi.sql +0 -0
  113. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  114. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  115. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  116. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_tables.sql +0 -0
  117. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_view.sql +0 -0
  118. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  119. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/fixtures/sample_manifest.csv +0 -0
  120. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/__init__.py +0 -0
  121. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/catalog/__init__.py +0 -0
  122. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/catalog/test_base.py +0 -0
  123. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/catalog/test_databricks.py +0 -0
  124. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/catalog/test_registry.py +0 -0
  125. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/dissection/__init__.py +0 -0
  126. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  127. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/dissection/test_formatters.py +0 -0
  128. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/dissection/test_models.py +0 -0
  129. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/graph/__init__.py +0 -0
  130. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/graph/test_merge.py +0 -0
  131. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/graph/test_models.py +0 -0
  132. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/graph/test_query.py +0 -0
  133. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/graph/test_serialization.py +0 -0
  134. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/lineage/__init__.py +0 -0
  135. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/lineage/test_formatters.py +0 -0
  136. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/templating/__init__.py +0 -0
  137. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/templating/test_base.py +0 -0
  138. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/templating/test_jinja.py +0 -0
  139. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/templating/test_registry.py +0 -0
  140. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/templating/test_variables.py +0 -0
  141. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/utils/__init__.py +0 -0
  142. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/utils/test_config.py +0 -0
  143. {sql_glider-0.1.10 → sql_glider-0.1.12}/tests/sqlglider/utils/test_file_utils.py +0 -0
  144. {sql_glider-0.1.10 → sql_glider-0.1.12}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -0,0 +1,58 @@
1
+ # SparkSQL Table Extraction for Unsupported Statement Types
2
+
3
+ **Status:** Planned
4
+ **Date:** 2026-01-28
5
+
6
+ ## Overview
7
+
8
+ The `analyze_tables()` method in `LineageAnalyzer` currently extracts tables from a subset of statement types (SELECT, INSERT, CREATE, DELETE, DROP, TRUNCATE, CACHE). Several SparkSQL-specific statement types reference tables but are not captured during table extraction. This plan adds table extraction support for these missing types.
9
+
10
+ Column lineage is **not affected** — these statements contain no SELECT and cannot produce column-level lineage. The goal is to ensure `sqlglider tables overview` reports all tables referenced in a SQL file.
11
+
12
+ ## Statements to Add
13
+
14
+ | Statement | SQLGlot Expression | Table Location | Proposed Usage |
15
+ |-----------|-------------------|----------------|----------------|
16
+ | `UNCACHE TABLE t` | `exp.Uncache` | `expr.this` | `INPUT` |
17
+ | `REFRESH TABLE t` | `exp.Refresh` | `expr.this` | `INPUT` |
18
+ | `LOAD DATA INPATH '...' INTO TABLE t` | `exp.LoadData` | `expr.this` | `OUTPUT` |
19
+ | `ALTER TABLE t ...` | `exp.Alter` | `expr.this` | `OUTPUT` |
20
+ | `ANALYZE TABLE t COMPUTE STATISTICS` | `exp.Analyze` | `expr.this` | `INPUT` |
21
+
22
+ ### Usage Rationale
23
+
24
+ - **UNCACHE / REFRESH / ANALYZE**: Read-oriented metadata operations on an existing table → `INPUT`
25
+ - **LOAD DATA**: Writes data into a table → `OUTPUT`
26
+ - **ALTER TABLE**: Modifies table structure → `OUTPUT`
27
+
28
+ ## Implementation Steps
29
+
30
+ - [ ] Add extraction logic to `_get_target_table_info()` in [analyzer.py](src/sqlglider/lineage/analyzer.py) for each new expression type
31
+ - [ ] Add each type to the `_get_statement_type()` type_map for readable skip messages
32
+ - [ ] Add entries to `_is_target_table()` where applicable (LOAD DATA, ALTER)
33
+ - [ ] Ensure `_get_target_and_select()` returns `None` gracefully for these types (they have no SELECT)
34
+ - [ ] Add unit tests in [test_analyzer.py](tests/sqlglider/lineage/test_analyzer.py):
35
+ - Table extraction returns correct table name and usage for each type
36
+ - Column lineage correctly skips these with appropriate message
37
+ - Parameterized test covering all five statement types
38
+ - [ ] Verify graph build handles these gracefully (skipped queries warning)
39
+ - [ ] Run full test suite and coverage check
40
+
41
+ ## Files to Modify
42
+
43
+ - `src/sqlglider/lineage/analyzer.py` — extraction logic
44
+ - `tests/sqlglider/lineage/test_analyzer.py` — unit tests
45
+
46
+ ## Testing Strategy
47
+
48
+ - Parameterized tests with SparkSQL syntax for each statement type
49
+ - Verify `analyze_tables()` returns correct table name, usage, and object type
50
+ - Verify `analyze_queries()` adds these to `skipped_queries` with clear reason
51
+ - Ensure no regressions in existing tests
52
+ - Coverage threshold (80%) maintained
53
+
54
+ ## Notes
55
+
56
+ - These are all parsed by sqlglot's Spark dialect parser, so no custom parsing is needed
57
+ - Some of these (SHOW, DESCRIBE, EXPLAIN) parse as `exp.Command` — those are intentionally excluded since they don't reference tables in a structured way
58
+ - INSERT OVERWRITE and multi-INSERT patterns may warrant separate investigation
@@ -0,0 +1,47 @@
1
+ # Add `--no-star` Flag
2
+
3
+ **Status:** Completed
4
+
5
+ ## Overview
6
+ Add `--no-star` flag to `lineage` and `graph build` commands. When set, analysis fails if `SELECT *` or `t.*` cannot be resolved to actual columns.
7
+
8
+ ## Changes
9
+
10
+ ### 1. `src/sqlglider/utils/config.py` — Add to ConfigSettings
11
+ - [x] Add `no_star: Optional[bool] = None`
12
+
13
+ ### 2. `src/sqlglider/lineage/analyzer.py` — Add parameter + enforce
14
+ - [x] Add `no_star: bool = False` to `__init__`, store as `self._no_star`
15
+ - [x] Add `StarResolutionError` exception class (distinct from `ValueError` to avoid being swallowed by skipped-query handler)
16
+ - [x] DML/DDL path: raise `StarResolutionError` before fallback for bare `*` and `t.*`
17
+ - [x] DQL path: add star handling for both bare `*` and `t.*` with same error behavior
18
+ - [x] Re-raise `StarResolutionError` in `analyze_queries` instead of treating as skipped query
19
+
20
+ ### 3. `src/sqlglider/graph/builder.py` — Pass through
21
+ - [x] Add `no_star: bool = False` to `__init__`, store as `self.no_star`
22
+ - [x] Pass to `LineageAnalyzer(sql_content, dialect=file_dialect, no_star=self.no_star)`
23
+
24
+ ### 4. `src/sqlglider/cli.py` — Add CLI options
25
+ - [x] `lineage` command: Add `no_star: bool = typer.Option(False, "--no-star", ...)`
26
+ - [x] Resolve: `no_star = no_star or config.no_star or False`
27
+ - [x] Pass to `LineageAnalyzer(sql, dialect=dialect, no_star=no_star)`
28
+ - [x] `graph_build` command: same option, passed to `GraphBuilder(..., no_star=no_star)`
29
+
30
+ ### 5. `tests/sqlglider/lineage/test_analyzer.py` — Tests
31
+ - [x] Test bare `SELECT *` with `no_star=True` raises `StarResolutionError`
32
+ - [x] Test `SELECT t.*` with `no_star=True` raises `StarResolutionError`
33
+ - [x] Test resolvable star (via CTE) still works with `no_star=True`
34
+ - [x] Test resolvable qualified star (via CTE) still works with `no_star=True`
35
+ - [x] Test default (`no_star=False`) still falls back to `table.*`
36
+
37
+ ## Implementation Notes
38
+
39
+ ### Deviations from original plan
40
+ - Used `StarResolutionError` instead of `ValueError` because `analyze_queries` catches `ValueError` to handle unsupported statement types (skipped queries). A plain `ValueError` would be silently swallowed.
41
+ - Added star handling in the DQL (plain SELECT) code path in addition to the DML/DDL path. The original plan only addressed the DML/DDL path, but plain `SELECT *` queries go through a different branch in `get_output_columns`.
42
+ - Resolvable star tests use CTEs instead of `CREATE TABLE` with explicit columns, since `_extract_schema_from_statement` only handles `CREATE ... AS SELECT`, not DDL with column definitions.
43
+
44
+ ## Verification
45
+ - `uv run pytest` — 597 passed, 1 skipped, coverage 80.48%
46
+ - `uv run basedpyright src/` — 0 errors
47
+ - `uv run ruff check` — all checks passed
@@ -0,0 +1,49 @@
1
+ # `--resolve-schema` Flag and Catalog Integration
2
+
3
+ **Status:** Completed
4
+
5
+ ## Overview
6
+
7
+ Add `--resolve-schema` flag to `graph build` that runs a two-pass process: first extracting schema from all files, then running lineage analysis with the full schema available. Optionally, `--catalog-type` fills schema gaps by pulling DDL from a remote catalog.
8
+
9
+ ## Design Decisions
10
+
11
+ - **Types are not required** — SQLGlot only needs column names for star expansion; types are stored as `"UNKNOWN"`
12
+ - **Two-pass approach** — Pass 1 extracts schema from all files (order-independent), Pass 2 runs lineage with full schema
13
+ - **Opt-in via `--resolve-schema`** — default behavior unchanged
14
+ - **Catalog fills gaps only** — file-derived schema always wins over catalog-sourced schema
15
+ - **`--catalog-type` requires `--resolve-schema`** — validated at CLI level
16
+
17
+ ## Implementation
18
+
19
+ - [x] Add `schema` param to `LineageAnalyzer.__init__()` — pre-populates `_file_schema`
20
+ - [x] Add `extract_schema_only()` and `get_extracted_schema()` methods to `LineageAnalyzer`
21
+ - [x] Create `src/sqlglider/utils/schema.py` with `parse_ddl_to_schema()` for DDL column extraction
22
+ - [x] Add `resolve_schema`, `catalog_type`, `catalog_config` to `GraphBuilder`
23
+ - [x] Implement `_extract_schemas()` for pass 1 and `_fill_schema_from_catalog()` for catalog gap-filling
24
+ - [x] Two-pass flow in `add_files()` and `add_manifest()`
25
+ - [x] Add `--resolve-schema` and `--catalog-type` CLI flags to `graph build`
26
+ - [x] Add `resolve_schema` to `ConfigSettings`
27
+ - [x] Tests: 25 new tests (schema parsing, analyzer schema param, cross-file resolution, catalog mocking)
28
+
29
+ ## Files Modified
30
+
31
+ - `src/sqlglider/lineage/analyzer.py` — schema param, extraction methods
32
+ - `src/sqlglider/graph/builder.py` — two-pass processing, catalog integration
33
+ - `src/sqlglider/cli.py` — CLI flags
34
+ - `src/sqlglider/utils/config.py` — config setting
35
+ - `src/sqlglider/utils/schema.py` — **new** DDL parsing utility
36
+ - `tests/sqlglider/utils/test_schema.py` — **new**
37
+ - `tests/sqlglider/graph/test_builder.py` — resolve schema + catalog tests
38
+ - `tests/sqlglider/lineage/test_analyzer.py` — schema param tests
39
+
40
+ ## Verification
41
+
42
+ - 617 passed, 1 skipped
43
+ - Coverage: 80.43%
44
+ - basedpyright: 0 errors
45
+ - ruff: all checks passed
46
+
47
+ ## Known Limitations
48
+
49
+ - Cross-file CTAS chains with `SELECT *` (view B depends on view A via star) may not resolve if both are in separate files and the schema extraction pass processes B before A. This is rare in practice.
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.10'
32
- __version_tuple__ = version_tuple = (0, 1, 10)
31
+ __version__ = version = '0.1.12'
32
+ __version_tuple__ = version_tuple = (0, 1, 12)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -12,7 +12,7 @@ from sqlglot.errors import ParseError
12
12
  from typing_extensions import Annotated
13
13
 
14
14
  from sqlglider.global_models import AnalysisLevel, NodeFormat
15
- from sqlglider.lineage.analyzer import LineageAnalyzer
15
+ from sqlglider.lineage.analyzer import LineageAnalyzer, SchemaResolutionError
16
16
  from sqlglider.lineage.formatters import (
17
17
  CsvFormatter,
18
18
  JsonFormatter,
@@ -166,6 +166,11 @@ def lineage(
166
166
  exists=True,
167
167
  help="Path to variables file (JSON or YAML)",
168
168
  ),
169
+ no_star: bool = typer.Option(
170
+ False,
171
+ "--no-star",
172
+ help="Fail if SELECT * cannot be resolved to actual columns",
173
+ ),
169
174
  ) -> None:
170
175
  """
171
176
  Analyze column or table lineage for a SQL file.
@@ -207,6 +212,7 @@ def lineage(
207
212
  level_str = level or config.level or "column"
208
213
  output_format = output_format or config.output_format or "text"
209
214
  templater = templater or config.templater # None means no templating
215
+ no_star = no_star or config.no_star or False
210
216
  # Validate and convert level to enum
211
217
  try:
212
218
  analysis_level = AnalysisLevel(level_str)
@@ -261,7 +267,7 @@ def lineage(
261
267
  )
262
268
 
263
269
  # Create analyzer
264
- analyzer = LineageAnalyzer(sql, dialect=dialect)
270
+ analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
265
271
 
266
272
  # Unified lineage analysis (handles both single and multi-query files)
267
273
  results = analyzer.analyze_queries(
@@ -990,6 +996,40 @@ def graph_build(
990
996
  exists=True,
991
997
  help="Path to variables file (JSON or YAML)",
992
998
  ),
999
+ no_star: bool = typer.Option(
1000
+ False,
1001
+ "--no-star",
1002
+ help="Fail if SELECT * cannot be resolved to actual columns",
1003
+ ),
1004
+ resolve_schema: bool = typer.Option(
1005
+ False,
1006
+ "--resolve-schema",
1007
+ help="Extract schema from all files before lineage analysis, "
1008
+ "enabling cross-file star resolution",
1009
+ ),
1010
+ catalog_type: Optional[str] = typer.Option(
1011
+ None,
1012
+ "--catalog-type",
1013
+ "-c",
1014
+ help="Catalog provider for pulling DDL of tables not found in files "
1015
+ "(requires --resolve-schema). E.g. 'databricks'",
1016
+ ),
1017
+ dump_schema: Optional[Path] = typer.Option(
1018
+ None,
1019
+ "--dump-schema",
1020
+ help="Dump resolved schema to file (requires --resolve-schema)",
1021
+ ),
1022
+ dump_schema_format: Optional[str] = typer.Option(
1023
+ None,
1024
+ "--dump-schema-format",
1025
+ help="Format for dumped schema: 'text' (default), 'json', or 'csv'",
1026
+ ),
1027
+ strict_schema: bool = typer.Option(
1028
+ False,
1029
+ "--strict-schema",
1030
+ help="Fail if any column's table cannot be identified during schema extraction "
1031
+ "(requires --resolve-schema)",
1032
+ ),
993
1033
  ) -> None:
994
1034
  """
995
1035
  Build a lineage graph from SQL files.
@@ -1024,6 +1064,38 @@ def graph_build(
1024
1064
  config = load_config()
1025
1065
  dialect = dialect or config.dialect or "spark"
1026
1066
  templater = templater or config.templater # None means no templating
1067
+ no_star = no_star or config.no_star or False
1068
+ resolve_schema = resolve_schema or config.resolve_schema or False
1069
+ strict_schema = strict_schema or config.strict_schema or False
1070
+
1071
+ if strict_schema and not resolve_schema:
1072
+ err_console.print("[red]Error:[/red] --strict-schema requires --resolve-schema")
1073
+ raise typer.Exit(1)
1074
+
1075
+ if catalog_type and not resolve_schema:
1076
+ err_console.print("[red]Error:[/red] --catalog-type requires --resolve-schema")
1077
+ raise typer.Exit(1)
1078
+
1079
+ # Resolve dump_schema options from config
1080
+ dump_schema = dump_schema or (
1081
+ Path(config.dump_schema) if config.dump_schema else None
1082
+ )
1083
+ dump_schema_format = dump_schema_format or config.dump_schema_format or "text"
1084
+
1085
+ if dump_schema and not resolve_schema:
1086
+ err_console.print("[red]Error:[/red] --dump-schema requires --resolve-schema")
1087
+ raise typer.Exit(1)
1088
+
1089
+ if dump_schema_format not in ("text", "json", "csv"):
1090
+ err_console.print(
1091
+ f"[red]Error:[/red] Invalid --dump-schema-format '{dump_schema_format}'. "
1092
+ "Use 'text', 'json', or 'csv'."
1093
+ )
1094
+ raise typer.Exit(1)
1095
+
1096
+ # Only inherit catalog_type from config when resolve_schema is active
1097
+ if resolve_schema and not catalog_type:
1098
+ catalog_type = config.catalog_type
1027
1099
 
1028
1100
  # Validate and convert node format to enum
1029
1101
  try:
@@ -1076,10 +1148,22 @@ def graph_build(
1076
1148
  sql_preprocessor = _preprocess
1077
1149
 
1078
1150
  try:
1151
+ # Build catalog config from config file if available
1152
+ catalog_config_dict = None
1153
+ if catalog_type and config.catalog:
1154
+ provider_config = getattr(config.catalog, catalog_type, None)
1155
+ if provider_config:
1156
+ catalog_config_dict = provider_config.model_dump(exclude_none=True)
1157
+
1079
1158
  builder = GraphBuilder(
1080
1159
  node_format=node_format_enum,
1081
1160
  dialect=dialect,
1082
1161
  sql_preprocessor=sql_preprocessor,
1162
+ no_star=no_star,
1163
+ resolve_schema=resolve_schema,
1164
+ catalog_type=catalog_type,
1165
+ catalog_config=catalog_config_dict,
1166
+ strict_schema=strict_schema,
1083
1167
  )
1084
1168
 
1085
1169
  # Process manifest if provided
@@ -1102,6 +1186,17 @@ def graph_build(
1102
1186
  raise typer.Exit(1)
1103
1187
  builder.add_files(all_files, dialect=dialect)
1104
1188
 
1189
+ # Dump resolved schema if requested
1190
+ if dump_schema:
1191
+ from sqlglider.graph.formatters import format_schema
1192
+
1193
+ schema_content = format_schema(builder.resolved_schema, dump_schema_format)
1194
+ dump_schema.write_text(schema_content, encoding="utf-8")
1195
+ console.print(
1196
+ f"[green]Schema dumped to {dump_schema} "
1197
+ f"({len(builder.resolved_schema)} table(s))[/green]"
1198
+ )
1199
+
1105
1200
  # Build and save graph
1106
1201
  graph = builder.build()
1107
1202
  save_graph(graph, output)
@@ -1111,6 +1206,10 @@ def graph_build(
1111
1206
  f"({graph.metadata.total_nodes} nodes, {graph.metadata.total_edges} edges)"
1112
1207
  )
1113
1208
 
1209
+ except SchemaResolutionError as e:
1210
+ err_console.print(f"[red]Error:[/red] {e}")
1211
+ raise typer.Exit(1)
1212
+
1114
1213
  except FileNotFoundError as e:
1115
1214
  err_console.print(f"[red]Error:[/red] {e}")
1116
1215
  raise typer.Exit(1)
@@ -16,8 +16,9 @@ from sqlglider.graph.models import (
16
16
  LineageGraph,
17
17
  Manifest,
18
18
  )
19
- from sqlglider.lineage.analyzer import LineageAnalyzer
19
+ from sqlglider.lineage.analyzer import LineageAnalyzer, SchemaResolutionError
20
20
  from sqlglider.utils.file_utils import read_sql_file
21
+ from sqlglider.utils.schema import parse_ddl_to_schema
21
22
 
22
23
  console = Console(stderr=True)
23
24
 
@@ -33,6 +34,11 @@ class GraphBuilder:
33
34
  node_format: NodeFormat = NodeFormat.QUALIFIED,
34
35
  dialect: str = "spark",
35
36
  sql_preprocessor: Optional[SqlPreprocessor] = None,
37
+ no_star: bool = False,
38
+ resolve_schema: bool = False,
39
+ catalog_type: Optional[str] = None,
40
+ catalog_config: Optional[Dict[str, object]] = None,
41
+ strict_schema: bool = False,
36
42
  ):
37
43
  """
38
44
  Initialize the graph builder.
@@ -43,15 +49,32 @@ class GraphBuilder:
43
49
  sql_preprocessor: Optional function to preprocess SQL before analysis.
44
50
  Takes (sql: str, file_path: Path) and returns processed SQL.
45
51
  Useful for templating (e.g., Jinja2 rendering).
52
+ no_star: If True, fail when SELECT * cannot be resolved to columns
53
+ resolve_schema: If True, run a schema extraction pass across all
54
+ files before lineage analysis so that schema from any file is
55
+ available when analyzing every other file.
56
+ catalog_type: Optional catalog provider name (e.g. "databricks").
57
+ When set together with resolve_schema, DDL is pulled from the
58
+ catalog for tables whose schema could not be inferred from files.
59
+ catalog_config: Optional provider-specific configuration dict
60
+ passed to the catalog's configure() method.
61
+ strict_schema: If True, fail during schema extraction when an
62
+ unqualified column cannot be attributed to a table.
46
63
  """
47
64
  self.node_format = node_format
48
65
  self.dialect = dialect
49
66
  self.sql_preprocessor = sql_preprocessor
67
+ self.no_star = no_star
68
+ self.resolve_schema = resolve_schema
69
+ self.catalog_type = catalog_type
70
+ self.catalog_config = catalog_config
71
+ self.strict_schema = strict_schema
50
72
  self.graph: rx.PyDiGraph = rx.PyDiGraph()
51
73
  self._node_index_map: Dict[str, int] = {} # identifier -> rustworkx node index
52
74
  self._source_files: Set[str] = set()
53
75
  self._edge_set: Set[tuple] = set() # (source, target) for dedup
54
76
  self._skipped_files: List[tuple[str, str]] = [] # (file_path, reason)
77
+ self._resolved_schema: Dict[str, Dict[str, str]] = {} # accumulated schema
55
78
 
56
79
  def add_file(
57
80
  self,
@@ -82,7 +105,12 @@ class GraphBuilder:
82
105
  if self.sql_preprocessor:
83
106
  sql_content = self.sql_preprocessor(sql_content, file_path)
84
107
 
85
- analyzer = LineageAnalyzer(sql_content, dialect=file_dialect)
108
+ analyzer = LineageAnalyzer(
109
+ sql_content,
110
+ dialect=file_dialect,
111
+ no_star=self.no_star,
112
+ schema=self._resolved_schema if self._resolved_schema else None,
113
+ )
86
114
  results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
87
115
 
88
116
  # Print warnings for any skipped queries within the file
@@ -204,23 +232,37 @@ class GraphBuilder:
204
232
  entry_dialect = entry.dialect or dialect or self.dialect
205
233
  files_with_dialects.append((file_path, entry_dialect))
206
234
 
207
- # Process with progress
208
- if files_with_dialects:
209
- total = len(files_with_dialects)
210
- with Progress(
211
- TextColumn("[progress.description]{task.description}"),
212
- BarColumn(),
213
- TaskProgressColumn(),
214
- console=console,
215
- transient=False,
216
- ) as progress:
217
- task = progress.add_task("Parsing", total=total)
218
- for i, (file_path, file_dialect) in enumerate(
219
- files_with_dialects, start=1
220
- ):
221
- console.print(f"Parsing file {i}/{total}: {file_path.name}")
222
- self.add_file(file_path, file_dialect)
223
- progress.advance(task)
235
+ if not files_with_dialects:
236
+ return self
237
+
238
+ # Two-pass schema resolution
239
+ if self.resolve_schema:
240
+ console.print("[blue]Pass 1: Extracting schema from files[/blue]")
241
+ file_paths_only = [fp for fp, _ in files_with_dialects]
242
+ self._resolved_schema = self._extract_schemas(file_paths_only, dialect)
243
+ if self.catalog_type:
244
+ self._resolved_schema = self._fill_schema_from_catalog(
245
+ self._resolved_schema, file_paths_only, dialect
246
+ )
247
+ console.print(
248
+ f"[blue]Schema resolved for "
249
+ f"{len(self._resolved_schema)} table(s)[/blue]"
250
+ )
251
+
252
+ total = len(files_with_dialects)
253
+ description = "Pass 2: Analyzing lineage" if self.resolve_schema else "Parsing"
254
+ with Progress(
255
+ TextColumn("[progress.description]{task.description}"),
256
+ BarColumn(),
257
+ TaskProgressColumn(),
258
+ console=console,
259
+ transient=False,
260
+ ) as progress:
261
+ task = progress.add_task(description, total=total)
262
+ for i, (file_path, file_dialect) in enumerate(files_with_dialects, start=1):
263
+ console.print(f"Parsing file {i}/{total}: {file_path.name}")
264
+ self.add_file(file_path, file_dialect)
265
+ progress.advance(task)
224
266
 
225
267
  return self
226
268
 
@@ -244,8 +286,24 @@ class GraphBuilder:
244
286
  if not file_paths:
245
287
  return self
246
288
 
289
+ # Two-pass schema resolution: extract schema from all files first
290
+ if self.resolve_schema:
291
+ console.print("[blue]Pass 1: Extracting schema from files[/blue]")
292
+ self._resolved_schema = self._extract_schemas(file_paths, dialect)
293
+ if self.catalog_type:
294
+ self._resolved_schema = self._fill_schema_from_catalog(
295
+ self._resolved_schema, file_paths, dialect
296
+ )
297
+ console.print(
298
+ f"[blue]Schema resolved for "
299
+ f"{len(self._resolved_schema)} table(s)[/blue]"
300
+ )
301
+
247
302
  if show_progress:
248
303
  total = len(file_paths)
304
+ description = (
305
+ "Pass 2: Analyzing lineage" if self.resolve_schema else "Parsing"
306
+ )
249
307
  with Progress(
250
308
  TextColumn("[progress.description]{task.description}"),
251
309
  BarColumn(),
@@ -253,7 +311,7 @@ class GraphBuilder:
253
311
  console=console,
254
312
  transient=False,
255
313
  ) as progress:
256
- task = progress.add_task("Parsing", total=total)
314
+ task = progress.add_task(description, total=total)
257
315
  for i, file_path in enumerate(file_paths, start=1):
258
316
  console.print(f"Parsing file {i}/{total}: {file_path.name}")
259
317
  self.add_file(file_path, dialect)
@@ -263,6 +321,129 @@ class GraphBuilder:
263
321
  self.add_file(file_path, dialect)
264
322
  return self
265
323
 
324
+ def _extract_schemas(
325
+ self,
326
+ file_paths: List[Path],
327
+ dialect: Optional[str] = None,
328
+ ) -> Dict[str, Dict[str, str]]:
329
+ """Run schema extraction pass across all files.
330
+
331
+ Parses each file and extracts schema from CREATE TABLE/VIEW
332
+ statements without performing lineage analysis.
333
+
334
+ Args:
335
+ file_paths: SQL files to extract schema from
336
+ dialect: SQL dialect override
337
+
338
+ Returns:
339
+ Accumulated schema dict from all files
340
+ """
341
+ schema: Dict[str, Dict[str, str]] = {}
342
+ total = len(file_paths)
343
+ with Progress(
344
+ TextColumn("[progress.description]{task.description}"),
345
+ BarColumn(),
346
+ TaskProgressColumn(),
347
+ console=console,
348
+ transient=False,
349
+ ) as progress:
350
+ task = progress.add_task("Pass 1: Extracting schema", total=total)
351
+ for i, file_path in enumerate(file_paths, start=1):
352
+ console.print(f"Extracting schema {i}/{total}: {file_path.name}")
353
+ file_dialect = dialect or self.dialect
354
+ try:
355
+ sql_content = read_sql_file(file_path)
356
+ if self.sql_preprocessor:
357
+ sql_content = self.sql_preprocessor(sql_content, file_path)
358
+ analyzer = LineageAnalyzer(
359
+ sql_content,
360
+ dialect=file_dialect,
361
+ schema=schema,
362
+ strict_schema=self.strict_schema,
363
+ )
364
+ file_schema = analyzer.extract_schema_only()
365
+ schema.update(file_schema)
366
+ except SchemaResolutionError:
367
+ raise
368
+ except Exception:
369
+ # Schema extraction failures are non-fatal; the file
370
+ # will be reported during the lineage pass if it also fails.
371
+ pass
372
+ progress.advance(task)
373
+ return schema
374
+
375
+ def _fill_schema_from_catalog(
376
+ self,
377
+ schema: Dict[str, Dict[str, str]],
378
+ file_paths: List[Path],
379
+ dialect: Optional[str] = None,
380
+ ) -> Dict[str, Dict[str, str]]:
381
+ """Pull DDL from catalog for tables not yet in schema.
382
+
383
+ Extracts all table names referenced across the files, identifies
384
+ those missing from the schema, and fetches their DDL from the
385
+ configured catalog provider.
386
+
387
+ Args:
388
+ schema: Schema dict already populated from file extraction
389
+ file_paths: SQL files to scan for table references
390
+ dialect: SQL dialect override
391
+
392
+ Returns:
393
+ Updated schema dict with catalog-sourced entries added
394
+ """
395
+ from sqlglider.catalog import get_catalog
396
+
397
+ catalog = get_catalog(self.catalog_type) # type: ignore[arg-type]
398
+ if self.catalog_config:
399
+ catalog.configure(self.catalog_config)
400
+
401
+ # Collect all referenced table names across files
402
+ all_tables: Set[str] = set()
403
+ for file_path in file_paths:
404
+ file_dialect = dialect or self.dialect
405
+ try:
406
+ sql_content = read_sql_file(file_path)
407
+ if self.sql_preprocessor:
408
+ sql_content = self.sql_preprocessor(sql_content, file_path)
409
+ analyzer = LineageAnalyzer(sql_content, dialect=file_dialect)
410
+ tables_results = analyzer.analyze_tables()
411
+ for result in tables_results:
412
+ for table_info in result.tables:
413
+ # Skip CTEs — they don't exist in catalogs
414
+ from sqlglider.lineage.analyzer import ObjectType
415
+
416
+ if table_info.object_type != ObjectType.CTE:
417
+ all_tables.add(table_info.name)
418
+ except Exception:
419
+ pass
420
+
421
+ # Find tables missing from schema
422
+ missing = [t for t in all_tables if t not in schema]
423
+ if not missing:
424
+ return schema
425
+
426
+ console.print(
427
+ f"[blue]Pulling DDL from {self.catalog_type} "
428
+ f"for {len(missing)} table(s)...[/blue]"
429
+ )
430
+
431
+ ddl_results = catalog.get_ddl_batch(missing)
432
+ file_dialect = dialect or self.dialect
433
+ for table_name, ddl in ddl_results.items():
434
+ if ddl.startswith("ERROR:"):
435
+ console.print(
436
+ f"[yellow]Warning:[/yellow] Could not pull DDL "
437
+ f"for {table_name}: {ddl}"
438
+ )
439
+ continue
440
+ parsed_schema = parse_ddl_to_schema(ddl, dialect=file_dialect)
441
+ for name, cols in parsed_schema.items():
442
+ if name not in schema:
443
+ schema[name] = cols
444
+
445
+ return schema
446
+
266
447
  def _ensure_node(
267
448
  self,
268
449
  identifier: str,
@@ -343,6 +524,11 @@ class GraphBuilder:
343
524
  """Get mapping from node identifiers to rustworkx indices."""
344
525
  return self._node_index_map.copy()
345
526
 
527
+ @property
528
+ def resolved_schema(self) -> Dict[str, Dict[str, str]]:
529
+ """Get the resolved schema dictionary from schema extraction pass."""
530
+ return self._resolved_schema.copy()
531
+
346
532
  @property
347
533
  def skipped_files(self) -> List[tuple[str, str]]:
348
534
  """Get list of files that were skipped during graph building."""