sql-glider 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/METADATA +177 -5
- sql_glider-0.1.4.dist-info/RECORD +34 -0
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/entry_points.txt +3 -0
- sqlglider/_version.py +2 -2
- sqlglider/catalog/__init__.py +30 -0
- sqlglider/catalog/base.py +99 -0
- sqlglider/catalog/databricks.py +255 -0
- sqlglider/catalog/registry.py +121 -0
- sqlglider/cli.py +467 -15
- sqlglider/dissection/__init__.py +17 -0
- sqlglider/dissection/analyzer.py +767 -0
- sqlglider/dissection/formatters.py +222 -0
- sqlglider/dissection/models.py +112 -0
- sqlglider/graph/builder.py +46 -8
- sqlglider/lineage/analyzer.py +281 -13
- sqlglider/utils/config.py +25 -0
- sql_glider-0.1.2.dist-info/RECORD +0 -26
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/WHEEL +0 -0
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Output formatters for dissection results."""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
from sqlglider.dissection.models import QueryDissectionResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DissectionTextFormatter:
|
|
16
|
+
"""Format dissection results as Rich tables for terminal display."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def format(results: List[QueryDissectionResult], console: Console) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Format and print dissection results as Rich tables.
|
|
22
|
+
|
|
23
|
+
Creates a styled table for each query showing all extracted components.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
results: List of QueryDissectionResult objects
|
|
27
|
+
console: Rich Console instance for output
|
|
28
|
+
"""
|
|
29
|
+
if not results:
|
|
30
|
+
console.print("[yellow]No dissection results found.[/yellow]")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
for i, result in enumerate(results):
|
|
34
|
+
# Add spacing between queries (except for first)
|
|
35
|
+
if i > 0:
|
|
36
|
+
console.print()
|
|
37
|
+
|
|
38
|
+
# Create table with query info as title
|
|
39
|
+
title = (
|
|
40
|
+
f"Query {result.metadata.query_index} "
|
|
41
|
+
f"({result.metadata.statement_type}): "
|
|
42
|
+
f"{result.metadata.query_preview}"
|
|
43
|
+
)
|
|
44
|
+
table = Table(title=title, title_style="bold")
|
|
45
|
+
|
|
46
|
+
table.add_column("Index", style="dim", width=6)
|
|
47
|
+
table.add_column("Type", style="cyan", width=16)
|
|
48
|
+
table.add_column("Name", style="green", min_width=10)
|
|
49
|
+
table.add_column("Depth", style="yellow", width=6)
|
|
50
|
+
table.add_column("Exec?", style="magenta", width=6)
|
|
51
|
+
table.add_column("Location", style="blue", min_width=15)
|
|
52
|
+
table.add_column("SQL Preview", style="dim", min_width=30)
|
|
53
|
+
|
|
54
|
+
# Add rows for each component
|
|
55
|
+
for component in result.components:
|
|
56
|
+
# Truncate SQL for preview
|
|
57
|
+
sql_preview = " ".join(component.sql.split())[:50]
|
|
58
|
+
if len(component.sql) > 50:
|
|
59
|
+
sql_preview += "..."
|
|
60
|
+
|
|
61
|
+
table.add_row(
|
|
62
|
+
str(component.component_index),
|
|
63
|
+
component.component_type.value,
|
|
64
|
+
component.name or "-",
|
|
65
|
+
str(component.depth),
|
|
66
|
+
"Yes" if component.is_executable else "No",
|
|
67
|
+
component.location[:35] + "..."
|
|
68
|
+
if len(component.location) > 35
|
|
69
|
+
else component.location,
|
|
70
|
+
sql_preview,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
console.print(table)
|
|
74
|
+
console.print(
|
|
75
|
+
f"[dim]Total components: {result.metadata.total_components}[/dim]"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DissectionJsonFormatter:
|
|
80
|
+
"""Format dissection results as JSON."""
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def format(results: List[QueryDissectionResult]) -> str:
|
|
84
|
+
"""
|
|
85
|
+
Format dissection results as JSON.
|
|
86
|
+
|
|
87
|
+
Output format:
|
|
88
|
+
{
|
|
89
|
+
"queries": [
|
|
90
|
+
{
|
|
91
|
+
"query_index": 0,
|
|
92
|
+
"query_preview": "SELECT ...",
|
|
93
|
+
"statement_type": "INSERT",
|
|
94
|
+
"total_components": 5,
|
|
95
|
+
"components": [
|
|
96
|
+
{
|
|
97
|
+
"component_type": "CTE",
|
|
98
|
+
"component_index": 0,
|
|
99
|
+
"name": "order_totals",
|
|
100
|
+
"sql": "SELECT ...",
|
|
101
|
+
"parent_index": null,
|
|
102
|
+
"depth": 0,
|
|
103
|
+
"is_executable": true,
|
|
104
|
+
"dependencies": [],
|
|
105
|
+
"location": "WITH clause"
|
|
106
|
+
}
|
|
107
|
+
],
|
|
108
|
+
"original_sql": "WITH order_totals AS ..."
|
|
109
|
+
}
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
results: List of QueryDissectionResult objects
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
JSON-formatted string
|
|
118
|
+
"""
|
|
119
|
+
queries = []
|
|
120
|
+
for result in results:
|
|
121
|
+
query_data = {
|
|
122
|
+
"query_index": result.metadata.query_index,
|
|
123
|
+
"query_preview": result.metadata.query_preview,
|
|
124
|
+
"statement_type": result.metadata.statement_type,
|
|
125
|
+
"total_components": result.metadata.total_components,
|
|
126
|
+
"components": [
|
|
127
|
+
{
|
|
128
|
+
"component_type": component.component_type.value,
|
|
129
|
+
"component_index": component.component_index,
|
|
130
|
+
"name": component.name,
|
|
131
|
+
"sql": component.sql,
|
|
132
|
+
"parent_index": component.parent_index,
|
|
133
|
+
"depth": component.depth,
|
|
134
|
+
"is_executable": component.is_executable,
|
|
135
|
+
"dependencies": component.dependencies,
|
|
136
|
+
"location": component.location,
|
|
137
|
+
}
|
|
138
|
+
for component in result.components
|
|
139
|
+
],
|
|
140
|
+
"original_sql": result.original_sql,
|
|
141
|
+
}
|
|
142
|
+
queries.append(query_data)
|
|
143
|
+
|
|
144
|
+
return json.dumps({"queries": queries}, indent=2)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class DissectionCsvFormatter:
|
|
148
|
+
"""Format dissection results as CSV."""
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def format(results: List[QueryDissectionResult]) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Format dissection results as CSV.
|
|
154
|
+
|
|
155
|
+
Output format:
|
|
156
|
+
query_index,component_index,component_type,name,depth,is_executable,location,dependencies,sql
|
|
157
|
+
0,0,CTE,order_totals,0,true,WITH clause,,"SELECT ..."
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
results: List of QueryDissectionResult objects
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
CSV-formatted string
|
|
164
|
+
"""
|
|
165
|
+
if not results:
|
|
166
|
+
return ""
|
|
167
|
+
|
|
168
|
+
output = StringIO()
|
|
169
|
+
headers = [
|
|
170
|
+
"query_index",
|
|
171
|
+
"component_index",
|
|
172
|
+
"component_type",
|
|
173
|
+
"name",
|
|
174
|
+
"depth",
|
|
175
|
+
"is_executable",
|
|
176
|
+
"location",
|
|
177
|
+
"dependencies",
|
|
178
|
+
"sql",
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
writer = csv.writer(output)
|
|
182
|
+
writer.writerow(headers)
|
|
183
|
+
|
|
184
|
+
# Write data rows
|
|
185
|
+
for result in results:
|
|
186
|
+
query_index = result.metadata.query_index
|
|
187
|
+
for component in result.components:
|
|
188
|
+
# Join dependencies with semicolon
|
|
189
|
+
deps_str = ";".join(component.dependencies)
|
|
190
|
+
writer.writerow(
|
|
191
|
+
[
|
|
192
|
+
query_index,
|
|
193
|
+
component.component_index,
|
|
194
|
+
component.component_type.value,
|
|
195
|
+
component.name or "",
|
|
196
|
+
component.depth,
|
|
197
|
+
"true" if component.is_executable else "false",
|
|
198
|
+
component.location,
|
|
199
|
+
deps_str,
|
|
200
|
+
component.sql,
|
|
201
|
+
]
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return output.getvalue()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class OutputWriter:
|
|
208
|
+
"""Write formatted output to file or stdout."""
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def write(content: str, output_file: Optional[Path] = None) -> None:
|
|
212
|
+
"""
|
|
213
|
+
Write content to file or stdout.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
content: The content to write
|
|
217
|
+
output_file: Optional file path. If None, writes to stdout.
|
|
218
|
+
"""
|
|
219
|
+
if output_file:
|
|
220
|
+
output_file.write_text(content, encoding="utf-8")
|
|
221
|
+
else:
|
|
222
|
+
print(content)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Pydantic models for SQL dissection results."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ComponentType(str, Enum):
|
|
10
|
+
"""Type of SQL component extracted from a query."""
|
|
11
|
+
|
|
12
|
+
CTE = "CTE"
|
|
13
|
+
MAIN_QUERY = "MAIN_QUERY"
|
|
14
|
+
SUBQUERY = "SUBQUERY"
|
|
15
|
+
SCALAR_SUBQUERY = "SCALAR_SUBQUERY"
|
|
16
|
+
TARGET_TABLE = "TARGET_TABLE"
|
|
17
|
+
SOURCE_QUERY = "SOURCE_QUERY"
|
|
18
|
+
UNION_BRANCH = "UNION_BRANCH"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SQLComponent(BaseModel):
|
|
22
|
+
"""Represents a single SQL component extracted from a query."""
|
|
23
|
+
|
|
24
|
+
component_type: ComponentType = Field(
|
|
25
|
+
..., description="Type of component (CTE, SUBQUERY, etc.)"
|
|
26
|
+
)
|
|
27
|
+
component_index: int = Field(
|
|
28
|
+
..., description="0-based index within query (order of extraction)"
|
|
29
|
+
)
|
|
30
|
+
name: Optional[str] = Field(
|
|
31
|
+
None,
|
|
32
|
+
description="Name/alias of component (CTE name, subquery alias, target table)",
|
|
33
|
+
)
|
|
34
|
+
sql: str = Field(
|
|
35
|
+
..., description="Extracted SQL for this component (executable if applicable)"
|
|
36
|
+
)
|
|
37
|
+
parent_index: Optional[int] = Field(
|
|
38
|
+
None, description="Index of parent component (for nested subqueries)"
|
|
39
|
+
)
|
|
40
|
+
depth: int = Field(
|
|
41
|
+
default=0, description="Nesting depth (0 = top-level, 1+ = nested)"
|
|
42
|
+
)
|
|
43
|
+
is_executable: bool = Field(
|
|
44
|
+
default=True, description="Whether this SQL can be executed standalone"
|
|
45
|
+
)
|
|
46
|
+
dependencies: List[str] = Field(
|
|
47
|
+
default_factory=list,
|
|
48
|
+
description="Names of CTEs this component depends on",
|
|
49
|
+
)
|
|
50
|
+
location: str = Field(
|
|
51
|
+
..., description="Human-readable location context (e.g., 'WITH clause')"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class QueryMetadata(BaseModel):
|
|
56
|
+
"""Metadata about a dissected query."""
|
|
57
|
+
|
|
58
|
+
query_index: int = Field(..., description="0-based query index in multi-query file")
|
|
59
|
+
query_preview: str = Field(..., description="First 100 chars of original query")
|
|
60
|
+
statement_type: str = Field(
|
|
61
|
+
..., description="Type of SQL statement (SELECT, INSERT, CREATE, etc.)"
|
|
62
|
+
)
|
|
63
|
+
total_components: int = Field(
|
|
64
|
+
..., description="Total number of components extracted"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class QueryDissectionResult(BaseModel):
|
|
69
|
+
"""Complete dissection result for a single query."""
|
|
70
|
+
|
|
71
|
+
metadata: QueryMetadata
|
|
72
|
+
components: List[SQLComponent] = Field(
|
|
73
|
+
default_factory=list,
|
|
74
|
+
description="All extracted components in order",
|
|
75
|
+
)
|
|
76
|
+
original_sql: str = Field(
|
|
77
|
+
..., description="Original SQL query for reference/validation"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def get_component_by_name(self, name: str) -> Optional[SQLComponent]:
|
|
81
|
+
"""Find a component by name (case-insensitive).
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
name: The component name to search for.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The matching SQLComponent or None if not found.
|
|
88
|
+
"""
|
|
89
|
+
name_lower = name.lower()
|
|
90
|
+
for component in self.components:
|
|
91
|
+
if component.name and component.name.lower() == name_lower:
|
|
92
|
+
return component
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def get_components_by_type(self, comp_type: ComponentType) -> List[SQLComponent]:
|
|
96
|
+
"""Get all components of a specific type.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
comp_type: The ComponentType to filter by.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
List of matching SQLComponent objects.
|
|
103
|
+
"""
|
|
104
|
+
return [c for c in self.components if c.component_type == comp_type]
|
|
105
|
+
|
|
106
|
+
def get_executable_components(self) -> List[SQLComponent]:
|
|
107
|
+
"""Get all executable components.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
List of SQLComponent objects that can be executed standalone.
|
|
111
|
+
"""
|
|
112
|
+
return [c for c in self.components if c.is_executable]
|
sqlglider/graph/builder.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Callable, Dict, List, Optional, Set
|
|
|
6
6
|
|
|
7
7
|
import rustworkx as rx
|
|
8
8
|
from rich.console import Console
|
|
9
|
+
from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn
|
|
9
10
|
|
|
10
11
|
from sqlglider.global_models import AnalysisLevel, NodeFormat
|
|
11
12
|
from sqlglider.graph.models import (
|
|
@@ -166,11 +167,8 @@ class GraphBuilder:
|
|
|
166
167
|
else:
|
|
167
168
|
pattern = glob_pattern
|
|
168
169
|
|
|
169
|
-
for
|
|
170
|
-
|
|
171
|
-
self.add_file(sql_file, dialect)
|
|
172
|
-
|
|
173
|
-
return self
|
|
170
|
+
sql_files = [f for f in sorted(dir_path.glob(pattern)) if f.is_file()]
|
|
171
|
+
return self.add_files(sql_files, dialect)
|
|
174
172
|
|
|
175
173
|
def add_manifest(
|
|
176
174
|
self,
|
|
@@ -194,6 +192,8 @@ class GraphBuilder:
|
|
|
194
192
|
manifest = Manifest.from_csv(manifest_path)
|
|
195
193
|
base_dir = manifest_path.parent
|
|
196
194
|
|
|
195
|
+
# Collect files with their dialects
|
|
196
|
+
files_with_dialects: List[tuple[Path, str]] = []
|
|
197
197
|
for entry in manifest.entries:
|
|
198
198
|
# Resolve file path relative to manifest location
|
|
199
199
|
file_path = Path(entry.file_path)
|
|
@@ -202,7 +202,25 @@ class GraphBuilder:
|
|
|
202
202
|
|
|
203
203
|
# Use entry dialect, then CLI dialect, then builder default
|
|
204
204
|
entry_dialect = entry.dialect or dialect or self.dialect
|
|
205
|
-
|
|
205
|
+
files_with_dialects.append((file_path, entry_dialect))
|
|
206
|
+
|
|
207
|
+
# Process with progress
|
|
208
|
+
if files_with_dialects:
|
|
209
|
+
total = len(files_with_dialects)
|
|
210
|
+
with Progress(
|
|
211
|
+
TextColumn("[progress.description]{task.description}"),
|
|
212
|
+
BarColumn(),
|
|
213
|
+
TaskProgressColumn(),
|
|
214
|
+
console=console,
|
|
215
|
+
transient=False,
|
|
216
|
+
) as progress:
|
|
217
|
+
task = progress.add_task("Parsing", total=total)
|
|
218
|
+
for i, (file_path, file_dialect) in enumerate(
|
|
219
|
+
files_with_dialects, start=1
|
|
220
|
+
):
|
|
221
|
+
console.print(f"Parsing file {i}/{total}: {file_path.name}")
|
|
222
|
+
self.add_file(file_path, file_dialect)
|
|
223
|
+
progress.advance(task)
|
|
206
224
|
|
|
207
225
|
return self
|
|
208
226
|
|
|
@@ -210,6 +228,7 @@ class GraphBuilder:
|
|
|
210
228
|
self,
|
|
211
229
|
file_paths: List[Path],
|
|
212
230
|
dialect: Optional[str] = None,
|
|
231
|
+
show_progress: bool = True,
|
|
213
232
|
) -> "GraphBuilder":
|
|
214
233
|
"""
|
|
215
234
|
Add lineage from multiple SQL files.
|
|
@@ -217,12 +236,31 @@ class GraphBuilder:
|
|
|
217
236
|
Args:
|
|
218
237
|
file_paths: List of paths to SQL files
|
|
219
238
|
dialect: SQL dialect (uses builder default if not specified)
|
|
239
|
+
show_progress: Whether to print progress messages
|
|
220
240
|
|
|
221
241
|
Returns:
|
|
222
242
|
self for method chaining
|
|
223
243
|
"""
|
|
224
|
-
|
|
225
|
-
self
|
|
244
|
+
if not file_paths:
|
|
245
|
+
return self
|
|
246
|
+
|
|
247
|
+
if show_progress:
|
|
248
|
+
total = len(file_paths)
|
|
249
|
+
with Progress(
|
|
250
|
+
TextColumn("[progress.description]{task.description}"),
|
|
251
|
+
BarColumn(),
|
|
252
|
+
TaskProgressColumn(),
|
|
253
|
+
console=console,
|
|
254
|
+
transient=False,
|
|
255
|
+
) as progress:
|
|
256
|
+
task = progress.add_task("Parsing", total=total)
|
|
257
|
+
for i, file_path in enumerate(file_paths, start=1):
|
|
258
|
+
console.print(f"Parsing file {i}/{total}: {file_path.name}")
|
|
259
|
+
self.add_file(file_path, dialect)
|
|
260
|
+
progress.advance(task)
|
|
261
|
+
else:
|
|
262
|
+
for file_path in file_paths:
|
|
263
|
+
self.add_file(file_path, dialect)
|
|
226
264
|
return self
|
|
227
265
|
|
|
228
266
|
def _ensure_node(
|