sql-glider 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.2 → sql_glider-0.1.3}/.gitignore +2 -1
- {sql_glider-0.1.2 → sql_glider-0.1.3}/ARCHITECTURE.md +211 -5
- {sql_glider-0.1.2 → sql_glider-0.1.3}/CLAUDE.md +79 -10
- {sql_glider-0.1.2 → sql_glider-0.1.3}/PKG-INFO +177 -5
- {sql_glider-0.1.2 → sql_glider-0.1.3}/README.md +174 -4
- sql_glider-0.1.3/plans/2025-12-13-dissect-command.md +411 -0
- sql_glider-0.1.3/plans/2025-12-14-tables-pull-command.md +153 -0
- sql_glider-0.1.3/plans/2026-01-25-fix-union-lineage-chain.md +237 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/pyproject.toml +11 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sqlglider.toml.example +42 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/_version.py +2 -2
- sql_glider-0.1.3/src/sqlglider/catalog/__init__.py +30 -0
- sql_glider-0.1.3/src/sqlglider/catalog/base.py +99 -0
- sql_glider-0.1.3/src/sqlglider/catalog/databricks.py +255 -0
- sql_glider-0.1.3/src/sqlglider/catalog/registry.py +121 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/cli.py +467 -15
- sql_glider-0.1.3/src/sqlglider/dissection/__init__.py +17 -0
- sql_glider-0.1.3/src/sqlglider/dissection/analyzer.py +767 -0
- sql_glider-0.1.3/src/sqlglider/dissection/formatters.py +222 -0
- sql_glider-0.1.3/src/sqlglider/dissection/models.py +112 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/graph/builder.py +46 -8
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/lineage/analyzer.py +66 -12
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/utils/config.py +25 -0
- sql_glider-0.1.3/tests/fixtures/original_queries/test_generated_column_query.sql +34 -0
- sql_glider-0.1.3/tests/sqlglider/catalog/__init__.py +1 -0
- sql_glider-0.1.3/tests/sqlglider/catalog/test_base.py +53 -0
- sql_glider-0.1.3/tests/sqlglider/catalog/test_databricks.py +248 -0
- sql_glider-0.1.3/tests/sqlglider/catalog/test_registry.py +127 -0
- sql_glider-0.1.3/tests/sqlglider/dissection/__init__.py +1 -0
- sql_glider-0.1.3/tests/sqlglider/dissection/test_analyzer.py +535 -0
- sql_glider-0.1.3/tests/sqlglider/dissection/test_formatters.py +194 -0
- sql_glider-0.1.3/tests/sqlglider/dissection/test_models.py +85 -0
- sql_glider-0.1.3/tests/sqlglider/graph/test_builder.py +430 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/graph/test_merge.py +29 -48
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/graph/test_models.py +32 -54
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/graph/test_query.py +5 -13
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/graph/test_serialization.py +15 -25
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/lineage/test_analyzer.py +189 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/lineage/test_formatters.py +4 -11
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/templating/test_variables.py +75 -107
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/test_cli.py +190 -248
- sql_glider-0.1.3/tests/sqlglider/utils/test_file_utils.py +152 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/uv.lock +562 -0
- sql_glider-0.1.2/tests/sqlglider/graph/test_builder.py +0 -414
- sql_glider-0.1.2/tests/sqlglider/utils/test_file_utils.py +0 -193
- {sql_glider-0.1.2 → sql_glider-0.1.3}/.github/workflows/ci.yml +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/.github/workflows/publish.yml +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/.python-version +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/LICENSE +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-05-column-level-lineage.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-05-reverse-lineage.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-06-config-file-support.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-06-graph-lineage.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-06-unify-single-multi-query.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-07-sample-data-model.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-07-sql-templating.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-08-tables-command.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/plans/2025-12-09-graph-query-paths.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/README.md +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/business/expire_dim_customer.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/business/load_fact_orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/business/load_fact_payments.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/business/merge_dim_customer.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/business/merge_dim_product.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/complex/conditional_merge.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/complex/cte_insert.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/complex/multi_table_transform.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/dim_customer.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/dim_product.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/fact_orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/fact_payments.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/raw_addresses.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/raw_customers.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/raw_order_items.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/raw_orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/raw_payments.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/raw_products.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/stg_customers.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/stg_orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/stg_payments.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/ddl/stg_products.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/maintenance/update_product_status.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/staging/load_stg_customers.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/staging/load_stg_orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/staging/load_stg_payments.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/sample_data_model/staging/load_stg_products.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/global_models.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/graph/merge.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/graph/models.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/graph/query.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/graph/serialization.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/lineage/formatters.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/templating/base.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/templating/jinja.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/templating/registry.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/templating/variables.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/src/sqlglider/utils/file_utils.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/multi_file_queries/customers.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/multi_file_queries/orders.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/multi_file_queries/reports.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_cte.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_multi.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_single_query.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_subquery.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_tables.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/original_queries/test_view.sql +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/fixtures/sample_manifest.csv +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/graph/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/lineage/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/templating/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/templating/test_base.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/templating/test_jinja.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/templating/test_registry.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/utils/__init__.py +0 -0
- {sql_glider-0.1.2 → sql_glider-0.1.3}/tests/sqlglider/utils/test_config.py +0 -0
|
@@ -12,6 +12,11 @@ sql-glider/
|
|
|
12
12
|
│ └── sqlglider/
|
|
13
13
|
│ ├── __init__.py # Package initialization
|
|
14
14
|
│ ├── cli.py # Typer CLI entry point
|
|
15
|
+
│ ├── dissection/
|
|
16
|
+
│ │ ├── __init__.py # Dissection module exports
|
|
17
|
+
│ │ ├── models.py # ComponentType, SQLComponent, QueryDissectionResult
|
|
18
|
+
│ │ ├── analyzer.py # DissectionAnalyzer for query decomposition
|
|
19
|
+
│ │ └── formatters.py # Output formatters (text, JSON, CSV)
|
|
15
20
|
│ ├── graph/
|
|
16
21
|
│ │ ├── __init__.py # Graph module exports
|
|
17
22
|
│ │ ├── models.py # Pydantic models for graph data
|
|
@@ -23,6 +28,17 @@ sql-glider/
|
|
|
23
28
|
│ │ ├── __init__.py # Lineage module exports
|
|
24
29
|
│ │ ├── analyzer.py # Core lineage analysis logic
|
|
25
30
|
│ │ └── formatters.py # Output formatters (text, JSON, CSV)
|
|
31
|
+
│ ├── catalog/
|
|
32
|
+
│ │ ├── __init__.py # Catalog module exports
|
|
33
|
+
│ │ ├── base.py # Abstract Catalog class + CatalogError
|
|
34
|
+
│ │ ├── registry.py # Plugin discovery via entry points
|
|
35
|
+
│ │ └── databricks.py # Databricks Unity Catalog implementation
|
|
36
|
+
│ ├── templating/
|
|
37
|
+
│ │ ├── __init__.py # Templating module exports
|
|
38
|
+
│ │ ├── base.py # Abstract Templater class + TemplaterError
|
|
39
|
+
│ │ ├── registry.py # Plugin discovery for templaters
|
|
40
|
+
│ │ ├── jinja.py # Jinja2 templater implementation
|
|
41
|
+
│ │ └── variables.py # Variable loading from multiple sources
|
|
26
42
|
│ └── utils/
|
|
27
43
|
│ ├── __init__.py # Utils module exports
|
|
28
44
|
│ ├── config.py # Configuration file loading
|
|
@@ -32,6 +48,11 @@ sql-glider/
|
|
|
32
48
|
│ ├── sqlglider/
|
|
33
49
|
│ │ ├── __init__.py
|
|
34
50
|
│ │ ├── test_cli.py # CLI integration tests
|
|
51
|
+
│ │ ├── dissection/
|
|
52
|
+
│ │ │ ├── __init__.py
|
|
53
|
+
│ │ │ ├── test_models.py # Dissection model tests
|
|
54
|
+
│ │ │ ├── test_analyzer.py # DissectionAnalyzer tests
|
|
55
|
+
│ │ │ └── test_formatters.py # Dissection formatter tests
|
|
35
56
|
│ │ ├── graph/
|
|
36
57
|
│ │ │ ├── __init__.py
|
|
37
58
|
│ │ │ ├── test_models.py # Graph model tests
|
|
@@ -92,11 +113,11 @@ sql-glider/
|
|
|
92
113
|
- Example: `sqlglider lineage query.sql --source-column orders.customer_id`
|
|
93
114
|
- Note: `--column` and `--source-column` are mutually exclusive
|
|
94
115
|
|
|
95
|
-
**Tables Command:** `sqlglider tables <
|
|
96
|
-
- Lists all tables involved in SQL files with usage and type information
|
|
116
|
+
**Tables Command Group:** `sqlglider tables <subcommand>`
|
|
117
|
+
- `tables overview <sql_file>`: Lists all tables involved in SQL files with usage and type information
|
|
97
118
|
- Outputs include: table name (fully qualified), usage (INPUT/OUTPUT/BOTH), object type (TABLE/VIEW/CTE/UNKNOWN)
|
|
98
119
|
- Supports all standard options: `--dialect`, `--output-format`, `--output-file`, `--templater`, `--var`, `--vars-file`
|
|
99
|
-
- Example: `sqlglider tables query.sql --output-format json`
|
|
120
|
+
- Example: `sqlglider tables overview query.sql --output-format json`
|
|
100
121
|
|
|
101
122
|
**Error Handling:**
|
|
102
123
|
- File not found errors
|
|
@@ -428,7 +449,123 @@ sqlglider graph query graph.json --upstream orders.customer_id
|
|
|
428
449
|
sqlglider graph query graph.json --downstream customers.id -f json
|
|
429
450
|
```
|
|
430
451
|
|
|
431
|
-
### 5.
|
|
452
|
+
### 5. Dissection Module (`dissection/`)
|
|
453
|
+
|
|
454
|
+
**Purpose:** Decompose SQL queries into constituent parts for unit testing and analysis
|
|
455
|
+
|
|
456
|
+
The dissection module enables extracting components from SQL queries (CTEs, subqueries, UNION branches, etc.) so they can be tested individually or analyzed for structure.
|
|
457
|
+
|
|
458
|
+
#### Data Models (`dissection/models.py`)
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
class ComponentType(str, Enum):
|
|
462
|
+
"""Type of SQL component extracted from a query."""
|
|
463
|
+
CTE = "CTE" # Common Table Expression
|
|
464
|
+
MAIN_QUERY = "MAIN_QUERY" # Primary SELECT statement
|
|
465
|
+
SUBQUERY = "SUBQUERY" # Nested SELECT in FROM clause
|
|
466
|
+
SCALAR_SUBQUERY = "SCALAR_SUBQUERY" # Single-value subquery
|
|
467
|
+
TARGET_TABLE = "TARGET_TABLE" # Output table for DML/DDL
|
|
468
|
+
SOURCE_QUERY = "SOURCE_QUERY" # SELECT within DML/DDL
|
|
469
|
+
UNION_BRANCH = "UNION_BRANCH" # Individual SELECT in UNION
|
|
470
|
+
|
|
471
|
+
class SQLComponent(BaseModel):
|
|
472
|
+
"""Represents an extracted SQL component."""
|
|
473
|
+
component_type: ComponentType
|
|
474
|
+
component_index: int # Sequential order within query
|
|
475
|
+
name: Optional[str] = None # CTE name, alias, or target table
|
|
476
|
+
sql: str # Extracted SQL for this component
|
|
477
|
+
parent_index: Optional[int] = None # Index of parent component
|
|
478
|
+
depth: int = 0 # Nesting level (0 = top-level)
|
|
479
|
+
is_executable: bool = True # Can run standalone?
|
|
480
|
+
dependencies: List[str] = [] # CTE names this depends on
|
|
481
|
+
location: str = "" # Human-readable location context
|
|
482
|
+
|
|
483
|
+
class QueryMetadata(BaseModel):
|
|
484
|
+
"""Metadata about a dissected query."""
|
|
485
|
+
query_index: int # 0-based index in multi-query file
|
|
486
|
+
query_preview: str # First 100 chars of query
|
|
487
|
+
statement_type: str # SELECT, INSERT, CREATE, etc.
|
|
488
|
+
total_components: int # Number of components extracted
|
|
489
|
+
|
|
490
|
+
class QueryDissectionResult(BaseModel):
|
|
491
|
+
"""Complete dissection result for a single query."""
|
|
492
|
+
metadata: QueryMetadata
|
|
493
|
+
components: List[SQLComponent]
|
|
494
|
+
original_sql: str # Full original SQL for reference
|
|
495
|
+
|
|
496
|
+
def get_component_by_name(self, name: str) -> Optional[SQLComponent]
|
|
497
|
+
def get_components_by_type(self, component_type: ComponentType) -> List[SQLComponent]
|
|
498
|
+
def get_executable_components(self) -> List[SQLComponent]
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
#### Dissection Analyzer (`dissection/analyzer.py`)
|
|
502
|
+
|
|
503
|
+
```python
|
|
504
|
+
class DissectionAnalyzer:
|
|
505
|
+
def __init__(self, sql: str, dialect: str = "spark")
|
|
506
|
+
def dissect_queries(self) -> List[QueryDissectionResult]
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
**Extraction Order:**
|
|
510
|
+
1. CTEs (by declaration order)
|
|
511
|
+
2. TARGET_TABLE (for INSERT/CREATE/MERGE)
|
|
512
|
+
3. SOURCE_QUERY (for DML/DDL statements)
|
|
513
|
+
4. MAIN_QUERY (with full SQL including WITH clause)
|
|
514
|
+
5. UNION_BRANCHES (if MAIN_QUERY is a UNION)
|
|
515
|
+
6. SUBQUERIES (depth-first from FROM clauses)
|
|
516
|
+
7. SCALAR_SUBQUERIES (from SELECT list, WHERE, HAVING)
|
|
517
|
+
|
|
518
|
+
**Key Features:**
|
|
519
|
+
- Uses SQLGlot AST traversal for accurate extraction
|
|
520
|
+
- Tracks CTE dependencies by finding table references matching CTE names
|
|
521
|
+
- UNION flattening extracts all branches from nested UNION expressions
|
|
522
|
+
- Parent-child relationships via `parent_index` and `depth`
|
|
523
|
+
- `location` field provides human-readable context (e.g., "SELECT list in CTE 'customer_segments'")
|
|
524
|
+
|
|
525
|
+
#### Formatters (`dissection/formatters.py`)
|
|
526
|
+
|
|
527
|
+
```python
|
|
528
|
+
class DissectionTextFormatter:
|
|
529
|
+
@staticmethod
|
|
530
|
+
def format(results: List[QueryDissectionResult], console: Console) -> None
|
|
531
|
+
|
|
532
|
+
class DissectionJsonFormatter:
|
|
533
|
+
@staticmethod
|
|
534
|
+
def format(results: List[QueryDissectionResult]) -> str
|
|
535
|
+
|
|
536
|
+
class DissectionCsvFormatter:
|
|
537
|
+
@staticmethod
|
|
538
|
+
def format(results: List[QueryDissectionResult]) -> str
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
**Output Formats:**
|
|
542
|
+
- **Text:** Rich table with columns for Index, Type, Name, Depth, Executable, Location, SQL Preview
|
|
543
|
+
- **JSON:** Full structured data with all component details
|
|
544
|
+
- **CSV:** Flattened format with semicolon-separated dependencies
|
|
545
|
+
|
|
546
|
+
#### CLI Command
|
|
547
|
+
|
|
548
|
+
```bash
|
|
549
|
+
# Dissect a SQL file
|
|
550
|
+
sqlglider dissect query.sql
|
|
551
|
+
|
|
552
|
+
# JSON output
|
|
553
|
+
sqlglider dissect query.sql --output-format json
|
|
554
|
+
|
|
555
|
+
# CSV output
|
|
556
|
+
sqlglider dissect query.sql --output-format csv
|
|
557
|
+
|
|
558
|
+
# Export to file
|
|
559
|
+
sqlglider dissect query.sql -f json -o dissected.json
|
|
560
|
+
|
|
561
|
+
# From stdin
|
|
562
|
+
echo "WITH cte AS (SELECT id FROM users) SELECT * FROM cte" | sqlglider dissect
|
|
563
|
+
|
|
564
|
+
# With templating
|
|
565
|
+
sqlglider dissect query.sql --templater jinja --var schema=analytics
|
|
566
|
+
```
|
|
567
|
+
|
|
568
|
+
### 6. File Utilities (`utils/file_utils.py`)
|
|
432
569
|
|
|
433
570
|
**Purpose:** File I/O operations with proper error handling
|
|
434
571
|
|
|
@@ -442,7 +579,7 @@ def read_sql_file(file_path: Path) -> str
|
|
|
442
579
|
- PermissionError: Cannot read file
|
|
443
580
|
- UnicodeDecodeError: File not UTF-8 encoded
|
|
444
581
|
|
|
445
|
-
###
|
|
582
|
+
### 7. Configuration System (`utils/config.py`)
|
|
446
583
|
|
|
447
584
|
**Purpose:** Load and manage configuration from `sqlglider.toml`
|
|
448
585
|
|
|
@@ -453,6 +590,11 @@ class ConfigSettings(BaseModel):
|
|
|
453
590
|
dialect: Optional[str] = None
|
|
454
591
|
level: Optional[str] = None
|
|
455
592
|
output_format: Optional[str] = None
|
|
593
|
+
templater: Optional[str] = None
|
|
594
|
+
templating: Optional[TemplatingConfig] = None
|
|
595
|
+
catalog_type: Optional[str] = None
|
|
596
|
+
ddl_folder: Optional[str] = None
|
|
597
|
+
catalog: Optional[CatalogConfig] = None
|
|
456
598
|
```
|
|
457
599
|
|
|
458
600
|
**Key Functions:**
|
|
@@ -480,6 +622,11 @@ def load_config(config_path: Optional[Path] = None) -> ConfigSettings
|
|
|
480
622
|
dialect = "postgres"
|
|
481
623
|
level = "column"
|
|
482
624
|
output_format = "json"
|
|
625
|
+
catalog_type = "databricks"
|
|
626
|
+
ddl_folder = "./ddl"
|
|
627
|
+
|
|
628
|
+
[sqlglider.catalog.databricks]
|
|
629
|
+
warehouse_id = "abc123..."
|
|
483
630
|
```
|
|
484
631
|
|
|
485
632
|
**Design Notes:**
|
|
@@ -488,6 +635,65 @@ output_format = "json"
|
|
|
488
635
|
- Fail-safe: Never crashes on config errors
|
|
489
636
|
- Forward compatible: Ignores unknown settings for future features
|
|
490
637
|
|
|
638
|
+
### 8. Catalog Module (`catalog/`)
|
|
639
|
+
|
|
640
|
+
**Purpose:** Plugin system for fetching DDL from remote data catalogs
|
|
641
|
+
|
|
642
|
+
The catalog module provides an extensible architecture for connecting to various data catalogs (e.g., Databricks Unity Catalog) and fetching table DDL definitions.
|
|
643
|
+
|
|
644
|
+
**Plugin Architecture:**
|
|
645
|
+
|
|
646
|
+
```python
|
|
647
|
+
# Abstract base class
|
|
648
|
+
class Catalog(ABC):
|
|
649
|
+
@property
|
|
650
|
+
@abstractmethod
|
|
651
|
+
def name(self) -> str: ...
|
|
652
|
+
|
|
653
|
+
@abstractmethod
|
|
654
|
+
def get_ddl(self, table_name: str) -> str: ...
|
|
655
|
+
|
|
656
|
+
@abstractmethod
|
|
657
|
+
def get_ddl_batch(self, table_names: List[str]) -> Dict[str, str]: ...
|
|
658
|
+
|
|
659
|
+
def configure(self, config: Optional[Dict[str, Any]] = None) -> None: ...
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
**Registry Pattern:**
|
|
663
|
+
|
|
664
|
+
- Catalogs are discovered via Python entry points (`sqlglider.catalogs`)
|
|
665
|
+
- Lazy loading with graceful handling of missing optional dependencies
|
|
666
|
+
- Factory function `get_catalog(name)` returns configured instances
|
|
667
|
+
|
|
668
|
+
**Built-in Catalogs:**
|
|
669
|
+
|
|
670
|
+
- **databricks**: Databricks Unity Catalog via `databricks-sdk`
|
|
671
|
+
- Uses `SHOW CREATE TABLE` via statement execution API
|
|
672
|
+
- Requires warehouse ID for SQL execution
|
|
673
|
+
- Authentication via env vars or config
|
|
674
|
+
|
|
675
|
+
**CLI Integration:**
|
|
676
|
+
|
|
677
|
+
```bash
|
|
678
|
+
# Pull DDL for tables in a SQL file
|
|
679
|
+
sqlglider tables pull query.sql --catalog-type databricks
|
|
680
|
+
|
|
681
|
+
# Output to folder (one file per table)
|
|
682
|
+
sqlglider tables pull query.sql -c databricks -o ./ddl/
|
|
683
|
+
|
|
684
|
+
# List available catalog providers
|
|
685
|
+
sqlglider tables pull --list
|
|
686
|
+
```
|
|
687
|
+
|
|
688
|
+
**Adding Custom Catalogs:**
|
|
689
|
+
|
|
690
|
+
1. Create a class inheriting from `Catalog`
|
|
691
|
+
2. Register via entry point in `pyproject.toml`:
|
|
692
|
+
```toml
|
|
693
|
+
[project.entry-points."sqlglider.catalogs"]
|
|
694
|
+
my-catalog = "my_package.catalog:MyCatalog"
|
|
695
|
+
```
|
|
696
|
+
|
|
491
697
|
## Technology Stack
|
|
492
698
|
|
|
493
699
|
### Core Dependencies
|
|
@@ -59,6 +59,10 @@ This project uses `uv` for Python package management. Python 3.11+ is required.
|
|
|
59
59
|
```
|
|
60
60
|
src/sqlglider/
|
|
61
61
|
├── cli.py # Typer CLI entry point
|
|
62
|
+
├── dissection/
|
|
63
|
+
│ ├── analyzer.py # DissectionAnalyzer for query decomposition
|
|
64
|
+
│ ├── formatters.py # Output formatters (text, JSON, CSV)
|
|
65
|
+
│ └── models.py # ComponentType, SQLComponent, QueryDissectionResult
|
|
62
66
|
├── graph/
|
|
63
67
|
│ ├── builder.py # Build lineage graphs from SQL files
|
|
64
68
|
│ ├── merge.py # Merge multiple graphs
|
|
@@ -109,7 +113,7 @@ uv run sqlglider lineage query.sql --dialect postgres
|
|
|
109
113
|
|
|
110
114
|
### Reading from Stdin
|
|
111
115
|
|
|
112
|
-
All commands (`lineage`, `tables`, `template`) support reading SQL from stdin when no file is provided:
|
|
116
|
+
All commands (`lineage`, `tables overview`, `tables pull`, `template`) support reading SQL from stdin when no file is provided:
|
|
113
117
|
|
|
114
118
|
```bash
|
|
115
119
|
# Pipe SQL directly to lineage analysis
|
|
@@ -128,8 +132,8 @@ FROM customers c
|
|
|
128
132
|
JOIN orders o ON c.id = o.customer_id
|
|
129
133
|
EOF
|
|
130
134
|
|
|
131
|
-
# Pipe to tables command
|
|
132
|
-
echo "SELECT * FROM users JOIN orders ON users.id = orders.user_id" | uv run sqlglider tables
|
|
135
|
+
# Pipe to tables overview command
|
|
136
|
+
echo "SELECT * FROM users JOIN orders ON users.id = orders.user_id" | uv run sqlglider tables overview
|
|
133
137
|
|
|
134
138
|
# Pipe template with variables
|
|
135
139
|
echo "SELECT * FROM {{ schema }}.users" | uv run sqlglider template --var schema=prod
|
|
@@ -174,25 +178,25 @@ Extract all tables involved in SQL files with usage and type information:
|
|
|
174
178
|
|
|
175
179
|
```bash
|
|
176
180
|
# List all tables in a SQL file
|
|
177
|
-
uv run sqlglider tables query.sql
|
|
181
|
+
uv run sqlglider tables overview query.sql
|
|
178
182
|
|
|
179
183
|
# JSON output
|
|
180
|
-
uv run sqlglider tables query.sql --output-format json
|
|
184
|
+
uv run sqlglider tables overview query.sql --output-format json
|
|
181
185
|
|
|
182
186
|
# CSV output
|
|
183
|
-
uv run sqlglider tables query.sql --output-format csv
|
|
187
|
+
uv run sqlglider tables overview query.sql --output-format csv
|
|
184
188
|
|
|
185
189
|
# Export to file
|
|
186
|
-
uv run sqlglider tables query.sql --output-format csv --output-file tables.csv
|
|
190
|
+
uv run sqlglider tables overview query.sql --output-format csv --output-file tables.csv
|
|
187
191
|
|
|
188
192
|
# Different SQL dialect
|
|
189
|
-
uv run sqlglider tables query.sql --dialect postgres
|
|
193
|
+
uv run sqlglider tables overview query.sql --dialect postgres
|
|
190
194
|
|
|
191
195
|
# Filter to queries referencing a specific table (multi-query files)
|
|
192
|
-
uv run sqlglider tables multi_query.sql --table customers
|
|
196
|
+
uv run sqlglider tables overview multi_query.sql --table customers
|
|
193
197
|
|
|
194
198
|
# With templating support
|
|
195
|
-
uv run sqlglider tables query.sql --templater jinja --var schema=analytics
|
|
199
|
+
uv run sqlglider tables overview query.sql --templater jinja --var schema=analytics
|
|
196
200
|
```
|
|
197
201
|
|
|
198
202
|
**Output includes:**
|
|
@@ -200,6 +204,32 @@ uv run sqlglider tables query.sql --templater jinja --var schema=analytics
|
|
|
200
204
|
- **Usage**: `INPUT` (read from), `OUTPUT` (written to), or `BOTH`
|
|
201
205
|
- **Object Type**: `TABLE`, `VIEW`, `CTE`, or `UNKNOWN`
|
|
202
206
|
|
|
207
|
+
### DDL Retrieval from Remote Catalogs
|
|
208
|
+
|
|
209
|
+
Pull DDL definitions from remote data catalogs for tables used in SQL:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
# Pull DDL for tables in a SQL file (output to stdout)
|
|
213
|
+
uv run sqlglider tables pull query.sql --catalog-type databricks
|
|
214
|
+
|
|
215
|
+
# Pull DDL to a folder (one file per table)
|
|
216
|
+
uv run sqlglider tables pull query.sql -c databricks -o ./ddl/
|
|
217
|
+
|
|
218
|
+
# With templating
|
|
219
|
+
uv run sqlglider tables pull query.sql -c databricks --templater jinja --var schema=prod
|
|
220
|
+
|
|
221
|
+
# From stdin
|
|
222
|
+
echo "SELECT * FROM my_catalog.my_schema.users" | uv run sqlglider tables pull -c databricks
|
|
223
|
+
|
|
224
|
+
# List available catalog providers
|
|
225
|
+
uv run sqlglider tables pull --list
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
**Notes:**
|
|
229
|
+
- Requires optional dependency: `pip install sql-glider[databricks]`
|
|
230
|
+
- CTEs are automatically excluded (they don't exist in remote catalogs)
|
|
231
|
+
- Configure authentication via environment variables (`DATABRICKS_HOST`, `DATABRICKS_TOKEN`, `DATABRICKS_WAREHOUSE_ID`) or `sqlglider.toml`
|
|
232
|
+
|
|
203
233
|
### Graph-Based Lineage (Cross-File Analysis)
|
|
204
234
|
|
|
205
235
|
```bash
|
|
@@ -293,6 +323,45 @@ SELECT * FROM cte
|
|
|
293
323
|
3. Config file (`[sqlglider.templating.variables]`)
|
|
294
324
|
4. Environment variables (`SQLGLIDER_VAR_*`)
|
|
295
325
|
|
|
326
|
+
### Query Dissection
|
|
327
|
+
|
|
328
|
+
Decompose SQL queries into constituent parts for unit testing and analysis:
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
# Dissect a SQL file (text output)
|
|
332
|
+
uv run sqlglider dissect query.sql
|
|
333
|
+
|
|
334
|
+
# JSON output with full component details
|
|
335
|
+
uv run sqlglider dissect query.sql --output-format json
|
|
336
|
+
|
|
337
|
+
# CSV output for spreadsheet analysis
|
|
338
|
+
uv run sqlglider dissect query.sql --output-format csv
|
|
339
|
+
|
|
340
|
+
# Export to file
|
|
341
|
+
uv run sqlglider dissect query.sql --output-format json --output-file dissected.json
|
|
342
|
+
|
|
343
|
+
# With templating support
|
|
344
|
+
uv run sqlglider dissect query.sql --templater jinja --var schema=analytics
|
|
345
|
+
|
|
346
|
+
# From stdin
|
|
347
|
+
echo "WITH cte AS (SELECT id FROM users) SELECT * FROM cte" | uv run sqlglider dissect
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
**Extracted Component Types:**
|
|
351
|
+
- `CTE`: Common Table Expressions from WITH clause
|
|
352
|
+
- `MAIN_QUERY`: The primary SELECT statement
|
|
353
|
+
- `SUBQUERY`: Nested SELECT in FROM clause
|
|
354
|
+
- `SCALAR_SUBQUERY`: Single-value subquery in SELECT list, WHERE, HAVING
|
|
355
|
+
- `TARGET_TABLE`: Output table for INSERT/CREATE/MERGE (not executable)
|
|
356
|
+
- `SOURCE_QUERY`: SELECT within DML/DDL statements
|
|
357
|
+
- `UNION_BRANCH`: Individual SELECT in UNION/UNION ALL
|
|
358
|
+
|
|
359
|
+
**Use Cases:**
|
|
360
|
+
- Unit test CTEs and subqueries individually
|
|
361
|
+
- Extract DQL from CTAS, CREATE VIEW, INSERT statements
|
|
362
|
+
- Analyze query structure and component dependencies
|
|
363
|
+
- Break apart complex queries for understanding
|
|
364
|
+
|
|
296
365
|
## Development Guidelines
|
|
297
366
|
|
|
298
367
|
### Code Style
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -26,6 +26,8 @@ Requires-Dist: rich>=13.0.0
|
|
|
26
26
|
Requires-Dist: rustworkx>=0.15.0
|
|
27
27
|
Requires-Dist: sqlglot[rs]>=25.0.0
|
|
28
28
|
Requires-Dist: typer>=0.9.0
|
|
29
|
+
Provides-Extra: databricks
|
|
30
|
+
Requires-Dist: databricks-sdk>=0.20.0; extra == 'databricks'
|
|
29
31
|
Description-Content-Type: text/markdown
|
|
30
32
|
|
|
31
33
|
# SQL Glider
|
|
@@ -40,6 +42,7 @@ SQL Glider provides powerful column-level and table-level lineage analysis for S
|
|
|
40
42
|
|
|
41
43
|
- **Forward Lineage:** Trace output columns back to their source tables and columns
|
|
42
44
|
- **Reverse Lineage:** Impact analysis - find which output columns are affected by a source column
|
|
45
|
+
- **Query Dissection:** Decompose SQL into components (CTEs, subqueries, UNION branches) for unit testing
|
|
43
46
|
- **Table Extraction:** List all tables in SQL files with usage type (INPUT/OUTPUT) and object type (TABLE/VIEW/CTE)
|
|
44
47
|
- **Multi-level Tracing:** Automatically handles CTEs, subqueries, and complex expressions
|
|
45
48
|
- **Graph-Based Lineage:** Build and query lineage graphs across thousands of SQL files
|
|
@@ -171,15 +174,32 @@ List all tables involved in SQL files with usage and type information:
|
|
|
171
174
|
|
|
172
175
|
```bash
|
|
173
176
|
# List all tables in a SQL file
|
|
174
|
-
uv run sqlglider tables query.sql
|
|
177
|
+
uv run sqlglider tables overview query.sql
|
|
175
178
|
|
|
176
179
|
# JSON output with detailed table info
|
|
177
|
-
uv run sqlglider tables query.sql --output-format json
|
|
180
|
+
uv run sqlglider tables overview query.sql --output-format json
|
|
178
181
|
|
|
179
182
|
# Export to CSV
|
|
180
|
-
uv run sqlglider tables query.sql --output-format csv --output-file tables.csv
|
|
183
|
+
uv run sqlglider tables overview query.sql --output-format csv --output-file tables.csv
|
|
181
184
|
```
|
|
182
185
|
|
|
186
|
+
### Pull DDL from Remote Catalogs
|
|
187
|
+
|
|
188
|
+
Fetch DDL definitions from remote data catalogs (e.g., Databricks Unity Catalog):
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# Pull DDL for all tables used in a SQL file (outputs to stdout)
|
|
192
|
+
uv run sqlglider tables pull query.sql --catalog-type databricks
|
|
193
|
+
|
|
194
|
+
# Save DDL files to a folder (one file per table)
|
|
195
|
+
uv run sqlglider tables pull query.sql -c databricks -o ./ddl/
|
|
196
|
+
|
|
197
|
+
# List available catalog providers
|
|
198
|
+
uv run sqlglider tables pull --list
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Note:** Requires optional dependencies. Install with: `pip install sql-glider[databricks]`
|
|
202
|
+
|
|
183
203
|
**Example Output (JSON):**
|
|
184
204
|
```json
|
|
185
205
|
{
|
|
@@ -204,6 +224,94 @@ uv run sqlglider tables query.sql --output-format csv --output-file tables.csv
|
|
|
204
224
|
- `CTE`: Common Table Expression (WITH clause)
|
|
205
225
|
- `UNKNOWN`: Cannot determine type from SQL alone
|
|
206
226
|
|
|
227
|
+
### Query Dissection
|
|
228
|
+
|
|
229
|
+
Decompose SQL queries into constituent parts for unit testing and analysis:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
# Dissect a SQL file (text output)
|
|
233
|
+
uv run sqlglider dissect query.sql
|
|
234
|
+
|
|
235
|
+
# JSON output with full component details
|
|
236
|
+
uv run sqlglider dissect query.sql --output-format json
|
|
237
|
+
|
|
238
|
+
# CSV output for spreadsheet analysis
|
|
239
|
+
uv run sqlglider dissect query.sql --output-format csv
|
|
240
|
+
|
|
241
|
+
# Export to file
|
|
242
|
+
uv run sqlglider dissect query.sql -f json -o dissected.json
|
|
243
|
+
|
|
244
|
+
# With templating support
|
|
245
|
+
uv run sqlglider dissect query.sql --templater jinja --var schema=analytics
|
|
246
|
+
|
|
247
|
+
# From stdin
|
|
248
|
+
echo "WITH cte AS (SELECT id FROM users) SELECT * FROM cte" | uv run sqlglider dissect
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
**Example Input:**
|
|
252
|
+
```sql
|
|
253
|
+
WITH order_totals AS (
|
|
254
|
+
SELECT customer_id, SUM(amount) AS total
|
|
255
|
+
FROM orders
|
|
256
|
+
GROUP BY customer_id
|
|
257
|
+
)
|
|
258
|
+
INSERT INTO analytics.summary
|
|
259
|
+
SELECT * FROM order_totals WHERE total > 100
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
**Example Output (JSON):**
|
|
263
|
+
```json
|
|
264
|
+
{
|
|
265
|
+
"queries": [{
|
|
266
|
+
"query_index": 0,
|
|
267
|
+
"statement_type": "INSERT",
|
|
268
|
+
"total_components": 3,
|
|
269
|
+
"components": [
|
|
270
|
+
{
|
|
271
|
+
"component_type": "CTE",
|
|
272
|
+
"component_index": 0,
|
|
273
|
+
"name": "order_totals",
|
|
274
|
+
"sql": "SELECT customer_id, SUM(amount) AS total FROM orders GROUP BY customer_id",
|
|
275
|
+
"is_executable": true,
|
|
276
|
+
"dependencies": [],
|
|
277
|
+
"location": "WITH clause"
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
"component_type": "TARGET_TABLE",
|
|
281
|
+
"component_index": 1,
|
|
282
|
+
"name": "analytics.summary",
|
|
283
|
+
"sql": "analytics.summary",
|
|
284
|
+
"is_executable": false,
|
|
285
|
+
"location": "INSERT INTO target"
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
"component_type": "SOURCE_QUERY",
|
|
289
|
+
"component_index": 2,
|
|
290
|
+
"sql": "SELECT * FROM order_totals WHERE total > 100",
|
|
291
|
+
"is_executable": true,
|
|
292
|
+
"dependencies": ["order_totals"],
|
|
293
|
+
"location": "INSERT source SELECT"
|
|
294
|
+
}
|
|
295
|
+
]
|
|
296
|
+
}]
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
**Extracted Component Types:**
|
|
301
|
+
- `CTE`: Common Table Expressions from WITH clause
|
|
302
|
+
- `MAIN_QUERY`: The primary SELECT statement
|
|
303
|
+
- `SUBQUERY`: Nested SELECT in FROM clause
|
|
304
|
+
- `SCALAR_SUBQUERY`: Single-value subquery in SELECT list, WHERE, HAVING
|
|
305
|
+
- `TARGET_TABLE`: Output table for INSERT/CREATE/MERGE (not executable)
|
|
306
|
+
- `SOURCE_QUERY`: SELECT within DML/DDL statements
|
|
307
|
+
- `UNION_BRANCH`: Individual SELECT in UNION/UNION ALL
|
|
308
|
+
|
|
309
|
+
**Use Cases:**
|
|
310
|
+
- Unit test CTEs and subqueries individually
|
|
311
|
+
- Extract DQL from CTAS, CREATE VIEW, INSERT statements
|
|
312
|
+
- Analyze query structure and component dependencies
|
|
313
|
+
- Break apart complex queries for understanding
|
|
314
|
+
|
|
207
315
|
### Different SQL Dialects
|
|
208
316
|
|
|
209
317
|
```bash
|
|
@@ -475,7 +583,7 @@ Options:
|
|
|
475
583
|
### Tables Command
|
|
476
584
|
|
|
477
585
|
```
|
|
478
|
-
sqlglider tables <sql_file> [OPTIONS]
|
|
586
|
+
sqlglider tables overview <sql_file> [OPTIONS]
|
|
479
587
|
|
|
480
588
|
Arguments:
|
|
481
589
|
sql_file Path to SQL file to analyze [required]
|
|
@@ -491,6 +599,66 @@ Options:
|
|
|
491
599
|
--help Show help message and exit
|
|
492
600
|
```
|
|
493
601
|
|
|
602
|
+
```
|
|
603
|
+
sqlglider tables pull <sql_file> [OPTIONS]
|
|
604
|
+
|
|
605
|
+
Arguments:
|
|
606
|
+
sql_file Path to SQL file to analyze [optional, reads from stdin if omitted]
|
|
607
|
+
|
|
608
|
+
Options:
|
|
609
|
+
--catalog-type, -c Catalog provider (e.g., 'databricks') [required if not in config]
|
|
610
|
+
--ddl-folder, -o Output folder for DDL files [optional, outputs to stdout if omitted]
|
|
611
|
+
--dialect, -d SQL dialect (spark, postgres, snowflake, etc.) [default: spark]
|
|
612
|
+
--templater, -t Templater for SQL preprocessing (e.g., 'jinja', 'none') [optional]
|
|
613
|
+
--var, -v Template variable in key=value format (repeatable) [optional]
|
|
614
|
+
--vars-file Path to variables file (JSON or YAML) [optional]
|
|
615
|
+
--list, -l List available catalog providers and exit
|
|
616
|
+
--help Show help message and exit
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
**Databricks Setup:**
|
|
620
|
+
|
|
621
|
+
Install the optional Databricks dependency:
|
|
622
|
+
```bash
|
|
623
|
+
pip install sql-glider[databricks]
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
Configure authentication (via environment variables or `sqlglider.toml`):
|
|
627
|
+
```bash
|
|
628
|
+
export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com"
|
|
629
|
+
export DATABRICKS_TOKEN="dapi..."
|
|
630
|
+
export DATABRICKS_WAREHOUSE_ID="abc123..."
|
|
631
|
+
```
|
|
632
|
+
|
|
633
|
+
### Dissect Command
|
|
634
|
+
|
|
635
|
+
```
|
|
636
|
+
sqlglider dissect [sql_file] [OPTIONS]
|
|
637
|
+
|
|
638
|
+
Arguments:
|
|
639
|
+
sql_file Path to SQL file to analyze [optional, reads from stdin if omitted]
|
|
640
|
+
|
|
641
|
+
Options:
|
|
642
|
+
--dialect, -d SQL dialect (spark, postgres, snowflake, etc.) [default: spark]
|
|
643
|
+
--output-format, -f Output format: 'text', 'json', or 'csv' [default: text]
|
|
644
|
+
--output-file, -o Write output to file instead of stdout [optional]
|
|
645
|
+
--templater, -t Templater for SQL preprocessing (e.g., 'jinja', 'none') [optional]
|
|
646
|
+
--var, -v Template variable in key=value format (repeatable) [optional]
|
|
647
|
+
--vars-file Path to variables file (JSON or YAML) [optional]
|
|
648
|
+
--help Show help message and exit
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
**Output Fields:**
|
|
652
|
+
- `component_type`: Type of component (CTE, MAIN_QUERY, SUBQUERY, etc.)
|
|
653
|
+
- `component_index`: Sequential order within the query (0-based)
|
|
654
|
+
- `name`: CTE name, subquery alias, or target table name
|
|
655
|
+
- `sql`: The extracted SQL for this component
|
|
656
|
+
- `is_executable`: Whether the component can run standalone (TARGET_TABLE is false)
|
|
657
|
+
- `dependencies`: List of CTE names this component references
|
|
658
|
+
- `location`: Human-readable context (e.g., "WITH clause", "FROM clause")
|
|
659
|
+
- `depth`: Nesting level (0 = top-level)
|
|
660
|
+
- `parent_index`: Index of parent component for nested components
|
|
661
|
+
|
|
494
662
|
### Graph Commands
|
|
495
663
|
|
|
496
664
|
```
|
|
@@ -612,6 +780,10 @@ See [ARCHITECTURE.md](ARCHITECTURE.md) for detailed technical documentation.
|
|
|
612
780
|
```
|
|
613
781
|
src/sqlglider/
|
|
614
782
|
├── cli.py # Typer CLI entry point
|
|
783
|
+
├── dissection/
|
|
784
|
+
│ ├── analyzer.py # DissectionAnalyzer for query decomposition
|
|
785
|
+
│ ├── formatters.py # Output formatters (text, JSON, CSV)
|
|
786
|
+
│ └── models.py # ComponentType, SQLComponent, QueryDissectionResult
|
|
615
787
|
├── graph/
|
|
616
788
|
│ ├── builder.py # Build graphs from SQL files
|
|
617
789
|
│ ├── merge.py # Merge multiple graphs
|