sql-glider 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. {sql_glider-0.1.2 → sql_glider-0.1.4}/.gitignore +2 -1
  2. {sql_glider-0.1.2 → sql_glider-0.1.4}/ARCHITECTURE.md +211 -5
  3. {sql_glider-0.1.2 → sql_glider-0.1.4}/CLAUDE.md +79 -10
  4. {sql_glider-0.1.2 → sql_glider-0.1.4}/PKG-INFO +177 -5
  5. {sql_glider-0.1.2 → sql_glider-0.1.4}/README.md +174 -4
  6. sql_glider-0.1.4/plans/2025-12-13-dissect-command.md +411 -0
  7. sql_glider-0.1.4/plans/2025-12-14-tables-pull-command.md +153 -0
  8. sql_glider-0.1.4/plans/2026-01-25-fix-union-lineage-chain.md +237 -0
  9. {sql_glider-0.1.2 → sql_glider-0.1.4}/pyproject.toml +11 -0
  10. {sql_glider-0.1.2 → sql_glider-0.1.4}/sqlglider.toml.example +42 -0
  11. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/_version.py +2 -2
  12. sql_glider-0.1.4/src/sqlglider/catalog/__init__.py +30 -0
  13. sql_glider-0.1.4/src/sqlglider/catalog/base.py +99 -0
  14. sql_glider-0.1.4/src/sqlglider/catalog/databricks.py +255 -0
  15. sql_glider-0.1.4/src/sqlglider/catalog/registry.py +121 -0
  16. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/cli.py +467 -15
  17. sql_glider-0.1.4/src/sqlglider/dissection/__init__.py +17 -0
  18. sql_glider-0.1.4/src/sqlglider/dissection/analyzer.py +767 -0
  19. sql_glider-0.1.4/src/sqlglider/dissection/formatters.py +222 -0
  20. sql_glider-0.1.4/src/sqlglider/dissection/models.py +112 -0
  21. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/graph/builder.py +46 -8
  22. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/lineage/analyzer.py +281 -13
  23. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/utils/config.py +25 -0
  24. sql_glider-0.1.4/tests/fixtures/original_queries/test_generated_column_query.sql +34 -0
  25. sql_glider-0.1.4/tests/fixtures/original_queries/test_view_window_cte.sql +27 -0
  26. sql_glider-0.1.4/tests/sqlglider/catalog/__init__.py +1 -0
  27. sql_glider-0.1.4/tests/sqlglider/catalog/test_base.py +53 -0
  28. sql_glider-0.1.4/tests/sqlglider/catalog/test_databricks.py +248 -0
  29. sql_glider-0.1.4/tests/sqlglider/catalog/test_registry.py +127 -0
  30. sql_glider-0.1.4/tests/sqlglider/dissection/__init__.py +1 -0
  31. sql_glider-0.1.4/tests/sqlglider/dissection/test_analyzer.py +535 -0
  32. sql_glider-0.1.4/tests/sqlglider/dissection/test_formatters.py +194 -0
  33. sql_glider-0.1.4/tests/sqlglider/dissection/test_models.py +85 -0
  34. sql_glider-0.1.4/tests/sqlglider/graph/test_builder.py +580 -0
  35. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/graph/test_merge.py +29 -48
  36. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/graph/test_models.py +32 -54
  37. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/graph/test_query.py +5 -13
  38. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/graph/test_serialization.py +15 -25
  39. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/lineage/test_analyzer.py +476 -0
  40. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/lineage/test_formatters.py +4 -11
  41. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/templating/test_variables.py +75 -107
  42. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/test_cli.py +190 -248
  43. sql_glider-0.1.4/tests/sqlglider/utils/test_file_utils.py +152 -0
  44. {sql_glider-0.1.2 → sql_glider-0.1.4}/uv.lock +562 -0
  45. sql_glider-0.1.2/tests/sqlglider/graph/test_builder.py +0 -414
  46. sql_glider-0.1.2/tests/sqlglider/utils/test_file_utils.py +0 -193
  47. {sql_glider-0.1.2 → sql_glider-0.1.4}/.github/workflows/ci.yml +0 -0
  48. {sql_glider-0.1.2 → sql_glider-0.1.4}/.github/workflows/publish.yml +0 -0
  49. {sql_glider-0.1.2 → sql_glider-0.1.4}/.python-version +0 -0
  50. {sql_glider-0.1.2 → sql_glider-0.1.4}/LICENSE +0 -0
  51. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-05-column-level-lineage.md +0 -0
  52. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-05-reverse-lineage.md +0 -0
  53. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-06-config-file-support.md +0 -0
  54. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-06-graph-lineage.md +0 -0
  55. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  56. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-07-sample-data-model.md +0 -0
  57. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-07-sql-templating.md +0 -0
  58. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-08-tables-command.md +0 -0
  59. {sql_glider-0.1.2 → sql_glider-0.1.4}/plans/2025-12-09-graph-query-paths.md +0 -0
  60. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/README.md +0 -0
  61. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/business/expire_dim_customer.sql +0 -0
  62. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/business/load_fact_orders.sql +0 -0
  63. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/business/load_fact_payments.sql +0 -0
  64. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/business/merge_dim_customer.sql +0 -0
  65. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/business/merge_dim_product.sql +0 -0
  66. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  67. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/complex/conditional_merge.sql +0 -0
  68. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/complex/cte_insert.sql +0 -0
  69. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/complex/multi_table_transform.sql +0 -0
  70. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/dim_customer.sql +0 -0
  71. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/dim_product.sql +0 -0
  72. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/fact_orders.sql +0 -0
  73. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/fact_payments.sql +0 -0
  74. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/raw_addresses.sql +0 -0
  75. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/raw_customers.sql +0 -0
  76. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/raw_order_items.sql +0 -0
  77. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/raw_orders.sql +0 -0
  78. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/raw_payments.sql +0 -0
  79. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/raw_products.sql +0 -0
  80. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/stg_customers.sql +0 -0
  81. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/stg_orders.sql +0 -0
  82. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/stg_payments.sql +0 -0
  83. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/ddl/stg_products.sql +0 -0
  84. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  85. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  86. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  87. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  88. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/maintenance/update_product_status.sql +0 -0
  89. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  90. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  91. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  92. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  93. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_customers.sql +0 -0
  94. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_orders.sql +0 -0
  95. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_payments.sql +0 -0
  96. {sql_glider-0.1.2 → sql_glider-0.1.4}/sample_data_model/staging/load_stg_products.sql +0 -0
  97. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/__init__.py +0 -0
  98. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/global_models.py +0 -0
  99. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/graph/__init__.py +0 -0
  100. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/graph/merge.py +0 -0
  101. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/graph/models.py +0 -0
  102. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/graph/query.py +0 -0
  103. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/graph/serialization.py +0 -0
  104. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/lineage/__init__.py +0 -0
  105. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/lineage/formatters.py +0 -0
  106. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/templating/__init__.py +0 -0
  107. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/templating/base.py +0 -0
  108. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/templating/jinja.py +0 -0
  109. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/templating/registry.py +0 -0
  110. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/templating/variables.py +0 -0
  111. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/utils/__init__.py +0 -0
  112. {sql_glider-0.1.2 → sql_glider-0.1.4}/src/sqlglider/utils/file_utils.py +0 -0
  113. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/__init__.py +0 -0
  114. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  115. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  116. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  117. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  118. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  119. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  120. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_cte.sql +0 -0
  121. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  122. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_multi.sql +0 -0
  123. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  124. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  125. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  126. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_tables.sql +0 -0
  127. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/original_queries/test_view.sql +0 -0
  128. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/fixtures/sample_manifest.csv +0 -0
  129. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/__init__.py +0 -0
  130. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/graph/__init__.py +0 -0
  131. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/lineage/__init__.py +0 -0
  132. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/templating/__init__.py +0 -0
  133. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/templating/test_base.py +0 -0
  134. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/templating/test_jinja.py +0 -0
  135. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/templating/test_registry.py +0 -0
  136. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/utils/__init__.py +0 -0
  137. {sql_glider-0.1.2 → sql_glider-0.1.4}/tests/sqlglider/utils/test_config.py +0 -0
@@ -211,4 +211,5 @@ src/sqlglider/_version.py
211
211
 
212
212
  # Random Local Stuff
213
213
  junk.md
214
- megagraph.json
214
+ megagraph.json
215
+ /sqlglider.toml
@@ -12,6 +12,11 @@ sql-glider/
12
12
  │ └── sqlglider/
13
13
  │ ├── __init__.py # Package initialization
14
14
  │ ├── cli.py # Typer CLI entry point
15
+ │ ├── dissection/
16
+ │ │ ├── __init__.py # Dissection module exports
17
+ │ │ ├── models.py # ComponentType, SQLComponent, QueryDissectionResult
18
+ │ │ ├── analyzer.py # DissectionAnalyzer for query decomposition
19
+ │ │ └── formatters.py # Output formatters (text, JSON, CSV)
15
20
  │ ├── graph/
16
21
  │ │ ├── __init__.py # Graph module exports
17
22
  │ │ ├── models.py # Pydantic models for graph data
@@ -23,6 +28,17 @@ sql-glider/
23
28
  │ │ ├── __init__.py # Lineage module exports
24
29
  │ │ ├── analyzer.py # Core lineage analysis logic
25
30
  │ │ └── formatters.py # Output formatters (text, JSON, CSV)
31
+ │ ├── catalog/
32
+ │ │ ├── __init__.py # Catalog module exports
33
+ │ │ ├── base.py # Abstract Catalog class + CatalogError
34
+ │ │ ├── registry.py # Plugin discovery via entry points
35
+ │ │ └── databricks.py # Databricks Unity Catalog implementation
36
+ │ ├── templating/
37
+ │ │ ├── __init__.py # Templating module exports
38
+ │ │ ├── base.py # Abstract Templater class + TemplaterError
39
+ │ │ ├── registry.py # Plugin discovery for templaters
40
+ │ │ ├── jinja.py # Jinja2 templater implementation
41
+ │ │ └── variables.py # Variable loading from multiple sources
26
42
  │ └── utils/
27
43
  │ ├── __init__.py # Utils module exports
28
44
  │ ├── config.py # Configuration file loading
@@ -32,6 +48,11 @@ sql-glider/
32
48
  │ ├── sqlglider/
33
49
  │ │ ├── __init__.py
34
50
  │ │ ├── test_cli.py # CLI integration tests
51
+ │ │ ├── dissection/
52
+ │ │ │ ├── __init__.py
53
+ │ │ │ ├── test_models.py # Dissection model tests
54
+ │ │ │ ├── test_analyzer.py # DissectionAnalyzer tests
55
+ │ │ │ └── test_formatters.py # Dissection formatter tests
35
56
  │ │ ├── graph/
36
57
  │ │ │ ├── __init__.py
37
58
  │ │ │ ├── test_models.py # Graph model tests
@@ -92,11 +113,11 @@ sql-glider/
92
113
  - Example: `sqlglider lineage query.sql --source-column orders.customer_id`
93
114
  - Note: `--column` and `--source-column` are mutually exclusive
94
115
 
95
- **Tables Command:** `sqlglider tables <sql_file>`
96
- - Lists all tables involved in SQL files with usage and type information
116
+ **Tables Command Group:** `sqlglider tables <subcommand>`
117
+ - `tables overview <sql_file>`: Lists all tables involved in SQL files with usage and type information
97
118
  - Outputs include: table name (fully qualified), usage (INPUT/OUTPUT/BOTH), object type (TABLE/VIEW/CTE/UNKNOWN)
98
119
  - Supports all standard options: `--dialect`, `--output-format`, `--output-file`, `--templater`, `--var`, `--vars-file`
99
- - Example: `sqlglider tables query.sql --output-format json`
120
+ - Example: `sqlglider tables overview query.sql --output-format json`
100
121
 
101
122
  **Error Handling:**
102
123
  - File not found errors
@@ -428,7 +449,123 @@ sqlglider graph query graph.json --upstream orders.customer_id
428
449
  sqlglider graph query graph.json --downstream customers.id -f json
429
450
  ```
430
451
 
431
- ### 5. File Utilities (`utils/file_utils.py`)
452
+ ### 5. Dissection Module (`dissection/`)
453
+
454
+ **Purpose:** Decompose SQL queries into constituent parts for unit testing and analysis
455
+
456
+ The dissection module enables extracting components from SQL queries (CTEs, subqueries, UNION branches, etc.) so they can be tested individually or analyzed for structure.
457
+
458
+ #### Data Models (`dissection/models.py`)
459
+
460
+ ```python
461
+ class ComponentType(str, Enum):
462
+ """Type of SQL component extracted from a query."""
463
+ CTE = "CTE" # Common Table Expression
464
+ MAIN_QUERY = "MAIN_QUERY" # Primary SELECT statement
465
+ SUBQUERY = "SUBQUERY" # Nested SELECT in FROM clause
466
+ SCALAR_SUBQUERY = "SCALAR_SUBQUERY" # Single-value subquery
467
+ TARGET_TABLE = "TARGET_TABLE" # Output table for DML/DDL
468
+ SOURCE_QUERY = "SOURCE_QUERY" # SELECT within DML/DDL
469
+ UNION_BRANCH = "UNION_BRANCH" # Individual SELECT in UNION
470
+
471
+ class SQLComponent(BaseModel):
472
+ """Represents an extracted SQL component."""
473
+ component_type: ComponentType
474
+ component_index: int # Sequential order within query
475
+ name: Optional[str] = None # CTE name, alias, or target table
476
+ sql: str # Extracted SQL for this component
477
+ parent_index: Optional[int] = None # Index of parent component
478
+ depth: int = 0 # Nesting level (0 = top-level)
479
+ is_executable: bool = True # Can run standalone?
480
+ dependencies: List[str] = [] # CTE names this depends on
481
+ location: str = "" # Human-readable location context
482
+
483
+ class QueryMetadata(BaseModel):
484
+ """Metadata about a dissected query."""
485
+ query_index: int # 0-based index in multi-query file
486
+ query_preview: str # First 100 chars of query
487
+ statement_type: str # SELECT, INSERT, CREATE, etc.
488
+ total_components: int # Number of components extracted
489
+
490
+ class QueryDissectionResult(BaseModel):
491
+ """Complete dissection result for a single query."""
492
+ metadata: QueryMetadata
493
+ components: List[SQLComponent]
494
+ original_sql: str # Full original SQL for reference
495
+
496
+ def get_component_by_name(self, name: str) -> Optional[SQLComponent]
497
+ def get_components_by_type(self, component_type: ComponentType) -> List[SQLComponent]
498
+ def get_executable_components(self) -> List[SQLComponent]
499
+ ```
500
+
501
+ #### Dissection Analyzer (`dissection/analyzer.py`)
502
+
503
+ ```python
504
+ class DissectionAnalyzer:
505
+ def __init__(self, sql: str, dialect: str = "spark")
506
+ def dissect_queries(self) -> List[QueryDissectionResult]
507
+ ```
508
+
509
+ **Extraction Order:**
510
+ 1. CTEs (by declaration order)
511
+ 2. TARGET_TABLE (for INSERT/CREATE/MERGE)
512
+ 3. SOURCE_QUERY (for DML/DDL statements)
513
+ 4. MAIN_QUERY (with full SQL including WITH clause)
514
+ 5. UNION_BRANCHES (if MAIN_QUERY is a UNION)
515
+ 6. SUBQUERIES (depth-first from FROM clauses)
516
+ 7. SCALAR_SUBQUERIES (from SELECT list, WHERE, HAVING)
517
+
518
+ **Key Features:**
519
+ - Uses SQLGlot AST traversal for accurate extraction
520
+ - Tracks CTE dependencies by finding table references matching CTE names
521
+ - UNION flattening extracts all branches from nested UNION expressions
522
+ - Parent-child relationships via `parent_index` and `depth`
523
+ - `location` field provides human-readable context (e.g., "SELECT list in CTE 'customer_segments'")
524
+
525
+ #### Formatters (`dissection/formatters.py`)
526
+
527
+ ```python
528
+ class DissectionTextFormatter:
529
+ @staticmethod
530
+ def format(results: List[QueryDissectionResult], console: Console) -> None
531
+
532
+ class DissectionJsonFormatter:
533
+ @staticmethod
534
+ def format(results: List[QueryDissectionResult]) -> str
535
+
536
+ class DissectionCsvFormatter:
537
+ @staticmethod
538
+ def format(results: List[QueryDissectionResult]) -> str
539
+ ```
540
+
541
+ **Output Formats:**
542
+ - **Text:** Rich table with columns for Index, Type, Name, Depth, Executable, Location, SQL Preview
543
+ - **JSON:** Full structured data with all component details
544
+ - **CSV:** Flattened format with semicolon-separated dependencies
545
+
546
+ #### CLI Command
547
+
548
+ ```bash
549
+ # Dissect a SQL file
550
+ sqlglider dissect query.sql
551
+
552
+ # JSON output
553
+ sqlglider dissect query.sql --output-format json
554
+
555
+ # CSV output
556
+ sqlglider dissect query.sql --output-format csv
557
+
558
+ # Export to file
559
+ sqlglider dissect query.sql -f json -o dissected.json
560
+
561
+ # From stdin
562
+ echo "WITH cte AS (SELECT id FROM users) SELECT * FROM cte" | sqlglider dissect
563
+
564
+ # With templating
565
+ sqlglider dissect query.sql --templater jinja --var schema=analytics
566
+ ```
567
+
568
+ ### 6. File Utilities (`utils/file_utils.py`)
432
569
 
433
570
  **Purpose:** File I/O operations with proper error handling
434
571
 
@@ -442,7 +579,7 @@ def read_sql_file(file_path: Path) -> str
442
579
  - PermissionError: Cannot read file
443
580
  - UnicodeDecodeError: File not UTF-8 encoded
444
581
 
445
- ### 5. Configuration System (`utils/config.py`)
582
+ ### 7. Configuration System (`utils/config.py`)
446
583
 
447
584
  **Purpose:** Load and manage configuration from `sqlglider.toml`
448
585
 
@@ -453,6 +590,11 @@ class ConfigSettings(BaseModel):
453
590
  dialect: Optional[str] = None
454
591
  level: Optional[str] = None
455
592
  output_format: Optional[str] = None
593
+ templater: Optional[str] = None
594
+ templating: Optional[TemplatingConfig] = None
595
+ catalog_type: Optional[str] = None
596
+ ddl_folder: Optional[str] = None
597
+ catalog: Optional[CatalogConfig] = None
456
598
  ```
457
599
 
458
600
  **Key Functions:**
@@ -480,6 +622,11 @@ def load_config(config_path: Optional[Path] = None) -> ConfigSettings
480
622
  dialect = "postgres"
481
623
  level = "column"
482
624
  output_format = "json"
625
+ catalog_type = "databricks"
626
+ ddl_folder = "./ddl"
627
+
628
+ [sqlglider.catalog.databricks]
629
+ warehouse_id = "abc123..."
483
630
  ```
484
631
 
485
632
  **Design Notes:**
@@ -488,6 +635,65 @@ output_format = "json"
488
635
  - Fail-safe: Never crashes on config errors
489
636
  - Forward compatible: Ignores unknown settings for future features
490
637
 
638
+ ### 8. Catalog Module (`catalog/`)
639
+
640
+ **Purpose:** Plugin system for fetching DDL from remote data catalogs
641
+
642
+ The catalog module provides an extensible architecture for connecting to various data catalogs (e.g., Databricks Unity Catalog) and fetching table DDL definitions.
643
+
644
+ **Plugin Architecture:**
645
+
646
+ ```python
647
+ # Abstract base class
648
+ class Catalog(ABC):
649
+ @property
650
+ @abstractmethod
651
+ def name(self) -> str: ...
652
+
653
+ @abstractmethod
654
+ def get_ddl(self, table_name: str) -> str: ...
655
+
656
+ @abstractmethod
657
+ def get_ddl_batch(self, table_names: List[str]) -> Dict[str, str]: ...
658
+
659
+ def configure(self, config: Optional[Dict[str, Any]] = None) -> None: ...
660
+ ```
661
+
662
+ **Registry Pattern:**
663
+
664
+ - Catalogs are discovered via Python entry points (`sqlglider.catalogs`)
665
+ - Lazy loading with graceful handling of missing optional dependencies
666
+ - Factory function `get_catalog(name)` returns configured instances
667
+
668
+ **Built-in Catalogs:**
669
+
670
+ - **databricks**: Databricks Unity Catalog via `databricks-sdk`
671
+ - Uses `SHOW CREATE TABLE` via statement execution API
672
+ - Requires warehouse ID for SQL execution
673
+ - Authentication via env vars or config
674
+
675
+ **CLI Integration:**
676
+
677
+ ```bash
678
+ # Pull DDL for tables in a SQL file
679
+ sqlglider tables pull query.sql --catalog-type databricks
680
+
681
+ # Output to folder (one file per table)
682
+ sqlglider tables pull query.sql -c databricks -o ./ddl/
683
+
684
+ # List available catalog providers
685
+ sqlglider tables pull --list
686
+ ```
687
+
688
+ **Adding Custom Catalogs:**
689
+
690
+ 1. Create a class inheriting from `Catalog`
691
+ 2. Register via entry point in `pyproject.toml`:
692
+ ```toml
693
+ [project.entry-points."sqlglider.catalogs"]
694
+ my-catalog = "my_package.catalog:MyCatalog"
695
+ ```
696
+
491
697
  ## Technology Stack
492
698
 
493
699
  ### Core Dependencies
@@ -59,6 +59,10 @@ This project uses `uv` for Python package management. Python 3.11+ is required.
59
59
  ```
60
60
  src/sqlglider/
61
61
  ├── cli.py # Typer CLI entry point
62
+ ├── dissection/
63
+ │ ├── analyzer.py # DissectionAnalyzer for query decomposition
64
+ │ ├── formatters.py # Output formatters (text, JSON, CSV)
65
+ │ └── models.py # ComponentType, SQLComponent, QueryDissectionResult
62
66
  ├── graph/
63
67
  │ ├── builder.py # Build lineage graphs from SQL files
64
68
  │ ├── merge.py # Merge multiple graphs
@@ -109,7 +113,7 @@ uv run sqlglider lineage query.sql --dialect postgres
109
113
 
110
114
  ### Reading from Stdin
111
115
 
112
- All commands (`lineage`, `tables`, `template`) support reading SQL from stdin when no file is provided:
116
+ All commands (`lineage`, `tables overview`, `tables pull`, `template`) support reading SQL from stdin when no file is provided:
113
117
 
114
118
  ```bash
115
119
  # Pipe SQL directly to lineage analysis
@@ -128,8 +132,8 @@ FROM customers c
128
132
  JOIN orders o ON c.id = o.customer_id
129
133
  EOF
130
134
 
131
- # Pipe to tables command
132
- echo "SELECT * FROM users JOIN orders ON users.id = orders.user_id" | uv run sqlglider tables
135
+ # Pipe to tables overview command
136
+ echo "SELECT * FROM users JOIN orders ON users.id = orders.user_id" | uv run sqlglider tables overview
133
137
 
134
138
  # Pipe template with variables
135
139
  echo "SELECT * FROM {{ schema }}.users" | uv run sqlglider template --var schema=prod
@@ -174,25 +178,25 @@ Extract all tables involved in SQL files with usage and type information:
174
178
 
175
179
  ```bash
176
180
  # List all tables in a SQL file
177
- uv run sqlglider tables query.sql
181
+ uv run sqlglider tables overview query.sql
178
182
 
179
183
  # JSON output
180
- uv run sqlglider tables query.sql --output-format json
184
+ uv run sqlglider tables overview query.sql --output-format json
181
185
 
182
186
  # CSV output
183
- uv run sqlglider tables query.sql --output-format csv
187
+ uv run sqlglider tables overview query.sql --output-format csv
184
188
 
185
189
  # Export to file
186
- uv run sqlglider tables query.sql --output-format csv --output-file tables.csv
190
+ uv run sqlglider tables overview query.sql --output-format csv --output-file tables.csv
187
191
 
188
192
  # Different SQL dialect
189
- uv run sqlglider tables query.sql --dialect postgres
193
+ uv run sqlglider tables overview query.sql --dialect postgres
190
194
 
191
195
  # Filter to queries referencing a specific table (multi-query files)
192
- uv run sqlglider tables multi_query.sql --table customers
196
+ uv run sqlglider tables overview multi_query.sql --table customers
193
197
 
194
198
  # With templating support
195
- uv run sqlglider tables query.sql --templater jinja --var schema=analytics
199
+ uv run sqlglider tables overview query.sql --templater jinja --var schema=analytics
196
200
  ```
197
201
 
198
202
  **Output includes:**
@@ -200,6 +204,32 @@ uv run sqlglider tables query.sql --templater jinja --var schema=analytics
200
204
  - **Usage**: `INPUT` (read from), `OUTPUT` (written to), or `BOTH`
201
205
  - **Object Type**: `TABLE`, `VIEW`, `CTE`, or `UNKNOWN`
202
206
 
207
+ ### DDL Retrieval from Remote Catalogs
208
+
209
+ Pull DDL definitions from remote data catalogs for tables used in SQL:
210
+
211
+ ```bash
212
+ # Pull DDL for tables in a SQL file (output to stdout)
213
+ uv run sqlglider tables pull query.sql --catalog-type databricks
214
+
215
+ # Pull DDL to a folder (one file per table)
216
+ uv run sqlglider tables pull query.sql -c databricks -o ./ddl/
217
+
218
+ # With templating
219
+ uv run sqlglider tables pull query.sql -c databricks --templater jinja --var schema=prod
220
+
221
+ # From stdin
222
+ echo "SELECT * FROM my_catalog.my_schema.users" | uv run sqlglider tables pull -c databricks
223
+
224
+ # List available catalog providers
225
+ uv run sqlglider tables pull --list
226
+ ```
227
+
228
+ **Notes:**
229
+ - Requires optional dependency: `pip install sql-glider[databricks]`
230
+ - CTEs are automatically excluded (they don't exist in remote catalogs)
231
+ - Configure authentication via environment variables (`DATABRICKS_HOST`, `DATABRICKS_TOKEN`, `DATABRICKS_WAREHOUSE_ID`) or `sqlglider.toml`
232
+
203
233
  ### Graph-Based Lineage (Cross-File Analysis)
204
234
 
205
235
  ```bash
@@ -293,6 +323,45 @@ SELECT * FROM cte
293
323
  3. Config file (`[sqlglider.templating.variables]`)
294
324
  4. Environment variables (`SQLGLIDER_VAR_*`)
295
325
 
326
+ ### Query Dissection
327
+
328
+ Decompose SQL queries into constituent parts for unit testing and analysis:
329
+
330
+ ```bash
331
+ # Dissect a SQL file (text output)
332
+ uv run sqlglider dissect query.sql
333
+
334
+ # JSON output with full component details
335
+ uv run sqlglider dissect query.sql --output-format json
336
+
337
+ # CSV output for spreadsheet analysis
338
+ uv run sqlglider dissect query.sql --output-format csv
339
+
340
+ # Export to file
341
+ uv run sqlglider dissect query.sql --output-format json --output-file dissected.json
342
+
343
+ # With templating support
344
+ uv run sqlglider dissect query.sql --templater jinja --var schema=analytics
345
+
346
+ # From stdin
347
+ echo "WITH cte AS (SELECT id FROM users) SELECT * FROM cte" | uv run sqlglider dissect
348
+ ```
349
+
350
+ **Extracted Component Types:**
351
+ - `CTE`: Common Table Expressions from WITH clause
352
+ - `MAIN_QUERY`: The primary SELECT statement
353
+ - `SUBQUERY`: Nested SELECT in FROM clause
354
+ - `SCALAR_SUBQUERY`: Single-value subquery in SELECT list, WHERE, HAVING
355
+ - `TARGET_TABLE`: Output table for INSERT/CREATE/MERGE (not executable)
356
+ - `SOURCE_QUERY`: SELECT within DML/DDL statements
357
+ - `UNION_BRANCH`: Individual SELECT in UNION/UNION ALL
358
+
359
+ **Use Cases:**
360
+ - Unit test CTEs and subqueries individually
361
+ - Extract DQL from CTAS, CREATE VIEW, INSERT statements
362
+ - Analyze query structure and component dependencies
363
+ - Break apart complex queries for understanding
364
+
296
365
  ## Development Guidelines
297
366
 
298
367
  ### Code Style
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -26,6 +26,8 @@ Requires-Dist: rich>=13.0.0
26
26
  Requires-Dist: rustworkx>=0.15.0
27
27
  Requires-Dist: sqlglot[rs]>=25.0.0
28
28
  Requires-Dist: typer>=0.9.0
29
+ Provides-Extra: databricks
30
+ Requires-Dist: databricks-sdk>=0.20.0; extra == 'databricks'
29
31
  Description-Content-Type: text/markdown
30
32
 
31
33
  # SQL Glider
@@ -40,6 +42,7 @@ SQL Glider provides powerful column-level and table-level lineage analysis for S
40
42
 
41
43
  - **Forward Lineage:** Trace output columns back to their source tables and columns
42
44
  - **Reverse Lineage:** Impact analysis - find which output columns are affected by a source column
45
+ - **Query Dissection:** Decompose SQL into components (CTEs, subqueries, UNION branches) for unit testing
43
46
  - **Table Extraction:** List all tables in SQL files with usage type (INPUT/OUTPUT) and object type (TABLE/VIEW/CTE)
44
47
  - **Multi-level Tracing:** Automatically handles CTEs, subqueries, and complex expressions
45
48
  - **Graph-Based Lineage:** Build and query lineage graphs across thousands of SQL files
@@ -171,15 +174,32 @@ List all tables involved in SQL files with usage and type information:
171
174
 
172
175
  ```bash
173
176
  # List all tables in a SQL file
174
- uv run sqlglider tables query.sql
177
+ uv run sqlglider tables overview query.sql
175
178
 
176
179
  # JSON output with detailed table info
177
- uv run sqlglider tables query.sql --output-format json
180
+ uv run sqlglider tables overview query.sql --output-format json
178
181
 
179
182
  # Export to CSV
180
- uv run sqlglider tables query.sql --output-format csv --output-file tables.csv
183
+ uv run sqlglider tables overview query.sql --output-format csv --output-file tables.csv
181
184
  ```
182
185
 
186
+ ### Pull DDL from Remote Catalogs
187
+
188
+ Fetch DDL definitions from remote data catalogs (e.g., Databricks Unity Catalog):
189
+
190
+ ```bash
191
+ # Pull DDL for all tables used in a SQL file (outputs to stdout)
192
+ uv run sqlglider tables pull query.sql --catalog-type databricks
193
+
194
+ # Save DDL files to a folder (one file per table)
195
+ uv run sqlglider tables pull query.sql -c databricks -o ./ddl/
196
+
197
+ # List available catalog providers
198
+ uv run sqlglider tables pull --list
199
+ ```
200
+
201
+ **Note:** Requires optional dependencies. Install with: `pip install sql-glider[databricks]`
202
+
183
203
  **Example Output (JSON):**
184
204
  ```json
185
205
  {
@@ -204,6 +224,94 @@ uv run sqlglider tables query.sql --output-format csv --output-file tables.csv
204
224
  - `CTE`: Common Table Expression (WITH clause)
205
225
  - `UNKNOWN`: Cannot determine type from SQL alone
206
226
 
227
+ ### Query Dissection
228
+
229
+ Decompose SQL queries into constituent parts for unit testing and analysis:
230
+
231
+ ```bash
232
+ # Dissect a SQL file (text output)
233
+ uv run sqlglider dissect query.sql
234
+
235
+ # JSON output with full component details
236
+ uv run sqlglider dissect query.sql --output-format json
237
+
238
+ # CSV output for spreadsheet analysis
239
+ uv run sqlglider dissect query.sql --output-format csv
240
+
241
+ # Export to file
242
+ uv run sqlglider dissect query.sql -f json -o dissected.json
243
+
244
+ # With templating support
245
+ uv run sqlglider dissect query.sql --templater jinja --var schema=analytics
246
+
247
+ # From stdin
248
+ echo "WITH cte AS (SELECT id FROM users) SELECT * FROM cte" | uv run sqlglider dissect
249
+ ```
250
+
251
+ **Example Input:**
252
+ ```sql
253
+ WITH order_totals AS (
254
+ SELECT customer_id, SUM(amount) AS total
255
+ FROM orders
256
+ GROUP BY customer_id
257
+ )
258
+ INSERT INTO analytics.summary
259
+ SELECT * FROM order_totals WHERE total > 100
260
+ ```
261
+
262
+ **Example Output (JSON):**
263
+ ```json
264
+ {
265
+ "queries": [{
266
+ "query_index": 0,
267
+ "statement_type": "INSERT",
268
+ "total_components": 3,
269
+ "components": [
270
+ {
271
+ "component_type": "CTE",
272
+ "component_index": 0,
273
+ "name": "order_totals",
274
+ "sql": "SELECT customer_id, SUM(amount) AS total FROM orders GROUP BY customer_id",
275
+ "is_executable": true,
276
+ "dependencies": [],
277
+ "location": "WITH clause"
278
+ },
279
+ {
280
+ "component_type": "TARGET_TABLE",
281
+ "component_index": 1,
282
+ "name": "analytics.summary",
283
+ "sql": "analytics.summary",
284
+ "is_executable": false,
285
+ "location": "INSERT INTO target"
286
+ },
287
+ {
288
+ "component_type": "SOURCE_QUERY",
289
+ "component_index": 2,
290
+ "sql": "SELECT * FROM order_totals WHERE total > 100",
291
+ "is_executable": true,
292
+ "dependencies": ["order_totals"],
293
+ "location": "INSERT source SELECT"
294
+ }
295
+ ]
296
+ }]
297
+ }
298
+ ```
299
+
300
+ **Extracted Component Types:**
301
+ - `CTE`: Common Table Expressions from WITH clause
302
+ - `MAIN_QUERY`: The primary SELECT statement
303
+ - `SUBQUERY`: Nested SELECT in FROM clause
304
+ - `SCALAR_SUBQUERY`: Single-value subquery in SELECT list, WHERE, HAVING
305
+ - `TARGET_TABLE`: Output table for INSERT/CREATE/MERGE (not executable)
306
+ - `SOURCE_QUERY`: SELECT within DML/DDL statements
307
+ - `UNION_BRANCH`: Individual SELECT in UNION/UNION ALL
308
+
309
+ **Use Cases:**
310
+ - Unit test CTEs and subqueries individually
311
+ - Extract DQL from CTAS, CREATE VIEW, INSERT statements
312
+ - Analyze query structure and component dependencies
313
+ - Break apart complex queries for understanding
314
+
207
315
  ### Different SQL Dialects
208
316
 
209
317
  ```bash
@@ -475,7 +583,7 @@ Options:
475
583
  ### Tables Command
476
584
 
477
585
  ```
478
- sqlglider tables <sql_file> [OPTIONS]
586
+ sqlglider tables overview <sql_file> [OPTIONS]
479
587
 
480
588
  Arguments:
481
589
  sql_file Path to SQL file to analyze [required]
@@ -491,6 +599,66 @@ Options:
491
599
  --help Show help message and exit
492
600
  ```
493
601
 
602
+ ```
603
+ sqlglider tables pull <sql_file> [OPTIONS]
604
+
605
+ Arguments:
606
+ sql_file Path to SQL file to analyze [optional, reads from stdin if omitted]
607
+
608
+ Options:
609
+ --catalog-type, -c Catalog provider (e.g., 'databricks') [required if not in config]
610
+ --ddl-folder, -o Output folder for DDL files [optional, outputs to stdout if omitted]
611
+ --dialect, -d SQL dialect (spark, postgres, snowflake, etc.) [default: spark]
612
+ --templater, -t Templater for SQL preprocessing (e.g., 'jinja', 'none') [optional]
613
+ --var, -v Template variable in key=value format (repeatable) [optional]
614
+ --vars-file Path to variables file (JSON or YAML) [optional]
615
+ --list, -l List available catalog providers and exit
616
+ --help Show help message and exit
617
+ ```
618
+
619
+ **Databricks Setup:**
620
+
621
+ Install the optional Databricks dependency:
622
+ ```bash
623
+ pip install sql-glider[databricks]
624
+ ```
625
+
626
+ Configure authentication (via environment variables or `sqlglider.toml`):
627
+ ```bash
628
+ export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com"
629
+ export DATABRICKS_TOKEN="dapi..."
630
+ export DATABRICKS_WAREHOUSE_ID="abc123..."
631
+ ```
632
+
633
+ ### Dissect Command
634
+
635
+ ```
636
+ sqlglider dissect [sql_file] [OPTIONS]
637
+
638
+ Arguments:
639
+ sql_file Path to SQL file to analyze [optional, reads from stdin if omitted]
640
+
641
+ Options:
642
+ --dialect, -d SQL dialect (spark, postgres, snowflake, etc.) [default: spark]
643
+ --output-format, -f Output format: 'text', 'json', or 'csv' [default: text]
644
+ --output-file, -o Write output to file instead of stdout [optional]
645
+ --templater, -t Templater for SQL preprocessing (e.g., 'jinja', 'none') [optional]
646
+ --var, -v Template variable in key=value format (repeatable) [optional]
647
+ --vars-file Path to variables file (JSON or YAML) [optional]
648
+ --help Show help message and exit
649
+ ```
650
+
651
+ **Output Fields:**
652
+ - `component_type`: Type of component (CTE, MAIN_QUERY, SUBQUERY, etc.)
653
+ - `component_index`: Sequential order within the query (0-based)
654
+ - `name`: CTE name, subquery alias, or target table name
655
+ - `sql`: The extracted SQL for this component
656
+ - `is_executable`: Whether the component can run standalone (TARGET_TABLE is false)
657
+ - `dependencies`: List of CTE names this component references
658
+ - `location`: Human-readable context (e.g., "WITH clause", "FROM clause")
659
+ - `depth`: Nesting level (0 = top-level)
660
+ - `parent_index`: Index of parent component for nested components
661
+
494
662
  ### Graph Commands
495
663
 
496
664
  ```
@@ -612,6 +780,10 @@ See [ARCHITECTURE.md](ARCHITECTURE.md) for detailed technical documentation.
612
780
  ```
613
781
  src/sqlglider/
614
782
  ├── cli.py # Typer CLI entry point
783
+ ├── dissection/
784
+ │ ├── analyzer.py # DissectionAnalyzer for query decomposition
785
+ │ ├── formatters.py # Output formatters (text, JSON, CSV)
786
+ │ └── models.py # ComponentType, SQLComponent, QueryDissectionResult
615
787
  ├── graph/
616
788
  │ ├── builder.py # Build graphs from SQL files
617
789
  │ ├── merge.py # Merge multiple graphs