sql-glider 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.3.dist-info}/METADATA +177 -5
- sql_glider-0.1.3.dist-info/RECORD +34 -0
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.3.dist-info}/entry_points.txt +3 -0
- sqlglider/_version.py +2 -2
- sqlglider/catalog/__init__.py +30 -0
- sqlglider/catalog/base.py +99 -0
- sqlglider/catalog/databricks.py +255 -0
- sqlglider/catalog/registry.py +121 -0
- sqlglider/cli.py +467 -15
- sqlglider/dissection/__init__.py +17 -0
- sqlglider/dissection/analyzer.py +767 -0
- sqlglider/dissection/formatters.py +222 -0
- sqlglider/dissection/models.py +112 -0
- sqlglider/graph/builder.py +46 -8
- sqlglider/lineage/analyzer.py +66 -12
- sqlglider/utils/config.py +25 -0
- sql_glider-0.1.2.dist-info/RECORD +0 -26
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.3.dist-info}/WHEEL +0 -0
- {sql_glider-0.1.2.dist-info → sql_glider-0.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Output formatters for dissection results."""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
from sqlglider.dissection.models import QueryDissectionResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DissectionTextFormatter:
|
|
16
|
+
"""Format dissection results as Rich tables for terminal display."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def format(results: List[QueryDissectionResult], console: Console) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Format and print dissection results as Rich tables.
|
|
22
|
+
|
|
23
|
+
Creates a styled table for each query showing all extracted components.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
results: List of QueryDissectionResult objects
|
|
27
|
+
console: Rich Console instance for output
|
|
28
|
+
"""
|
|
29
|
+
if not results:
|
|
30
|
+
console.print("[yellow]No dissection results found.[/yellow]")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
for i, result in enumerate(results):
|
|
34
|
+
# Add spacing between queries (except for first)
|
|
35
|
+
if i > 0:
|
|
36
|
+
console.print()
|
|
37
|
+
|
|
38
|
+
# Create table with query info as title
|
|
39
|
+
title = (
|
|
40
|
+
f"Query {result.metadata.query_index} "
|
|
41
|
+
f"({result.metadata.statement_type}): "
|
|
42
|
+
f"{result.metadata.query_preview}"
|
|
43
|
+
)
|
|
44
|
+
table = Table(title=title, title_style="bold")
|
|
45
|
+
|
|
46
|
+
table.add_column("Index", style="dim", width=6)
|
|
47
|
+
table.add_column("Type", style="cyan", width=16)
|
|
48
|
+
table.add_column("Name", style="green", min_width=10)
|
|
49
|
+
table.add_column("Depth", style="yellow", width=6)
|
|
50
|
+
table.add_column("Exec?", style="magenta", width=6)
|
|
51
|
+
table.add_column("Location", style="blue", min_width=15)
|
|
52
|
+
table.add_column("SQL Preview", style="dim", min_width=30)
|
|
53
|
+
|
|
54
|
+
# Add rows for each component
|
|
55
|
+
for component in result.components:
|
|
56
|
+
# Truncate SQL for preview
|
|
57
|
+
sql_preview = " ".join(component.sql.split())[:50]
|
|
58
|
+
if len(component.sql) > 50:
|
|
59
|
+
sql_preview += "..."
|
|
60
|
+
|
|
61
|
+
table.add_row(
|
|
62
|
+
str(component.component_index),
|
|
63
|
+
component.component_type.value,
|
|
64
|
+
component.name or "-",
|
|
65
|
+
str(component.depth),
|
|
66
|
+
"Yes" if component.is_executable else "No",
|
|
67
|
+
component.location[:35] + "..."
|
|
68
|
+
if len(component.location) > 35
|
|
69
|
+
else component.location,
|
|
70
|
+
sql_preview,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
console.print(table)
|
|
74
|
+
console.print(
|
|
75
|
+
f"[dim]Total components: {result.metadata.total_components}[/dim]"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DissectionJsonFormatter:
|
|
80
|
+
"""Format dissection results as JSON."""
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def format(results: List[QueryDissectionResult]) -> str:
|
|
84
|
+
"""
|
|
85
|
+
Format dissection results as JSON.
|
|
86
|
+
|
|
87
|
+
Output format:
|
|
88
|
+
{
|
|
89
|
+
"queries": [
|
|
90
|
+
{
|
|
91
|
+
"query_index": 0,
|
|
92
|
+
"query_preview": "SELECT ...",
|
|
93
|
+
"statement_type": "INSERT",
|
|
94
|
+
"total_components": 5,
|
|
95
|
+
"components": [
|
|
96
|
+
{
|
|
97
|
+
"component_type": "CTE",
|
|
98
|
+
"component_index": 0,
|
|
99
|
+
"name": "order_totals",
|
|
100
|
+
"sql": "SELECT ...",
|
|
101
|
+
"parent_index": null,
|
|
102
|
+
"depth": 0,
|
|
103
|
+
"is_executable": true,
|
|
104
|
+
"dependencies": [],
|
|
105
|
+
"location": "WITH clause"
|
|
106
|
+
}
|
|
107
|
+
],
|
|
108
|
+
"original_sql": "WITH order_totals AS ..."
|
|
109
|
+
}
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
results: List of QueryDissectionResult objects
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
JSON-formatted string
|
|
118
|
+
"""
|
|
119
|
+
queries = []
|
|
120
|
+
for result in results:
|
|
121
|
+
query_data = {
|
|
122
|
+
"query_index": result.metadata.query_index,
|
|
123
|
+
"query_preview": result.metadata.query_preview,
|
|
124
|
+
"statement_type": result.metadata.statement_type,
|
|
125
|
+
"total_components": result.metadata.total_components,
|
|
126
|
+
"components": [
|
|
127
|
+
{
|
|
128
|
+
"component_type": component.component_type.value,
|
|
129
|
+
"component_index": component.component_index,
|
|
130
|
+
"name": component.name,
|
|
131
|
+
"sql": component.sql,
|
|
132
|
+
"parent_index": component.parent_index,
|
|
133
|
+
"depth": component.depth,
|
|
134
|
+
"is_executable": component.is_executable,
|
|
135
|
+
"dependencies": component.dependencies,
|
|
136
|
+
"location": component.location,
|
|
137
|
+
}
|
|
138
|
+
for component in result.components
|
|
139
|
+
],
|
|
140
|
+
"original_sql": result.original_sql,
|
|
141
|
+
}
|
|
142
|
+
queries.append(query_data)
|
|
143
|
+
|
|
144
|
+
return json.dumps({"queries": queries}, indent=2)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class DissectionCsvFormatter:
|
|
148
|
+
"""Format dissection results as CSV."""
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def format(results: List[QueryDissectionResult]) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Format dissection results as CSV.
|
|
154
|
+
|
|
155
|
+
Output format:
|
|
156
|
+
query_index,component_index,component_type,name,depth,is_executable,location,dependencies,sql
|
|
157
|
+
0,0,CTE,order_totals,0,true,WITH clause,,"SELECT ..."
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
results: List of QueryDissectionResult objects
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
CSV-formatted string
|
|
164
|
+
"""
|
|
165
|
+
if not results:
|
|
166
|
+
return ""
|
|
167
|
+
|
|
168
|
+
output = StringIO()
|
|
169
|
+
headers = [
|
|
170
|
+
"query_index",
|
|
171
|
+
"component_index",
|
|
172
|
+
"component_type",
|
|
173
|
+
"name",
|
|
174
|
+
"depth",
|
|
175
|
+
"is_executable",
|
|
176
|
+
"location",
|
|
177
|
+
"dependencies",
|
|
178
|
+
"sql",
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
writer = csv.writer(output)
|
|
182
|
+
writer.writerow(headers)
|
|
183
|
+
|
|
184
|
+
# Write data rows
|
|
185
|
+
for result in results:
|
|
186
|
+
query_index = result.metadata.query_index
|
|
187
|
+
for component in result.components:
|
|
188
|
+
# Join dependencies with semicolon
|
|
189
|
+
deps_str = ";".join(component.dependencies)
|
|
190
|
+
writer.writerow(
|
|
191
|
+
[
|
|
192
|
+
query_index,
|
|
193
|
+
component.component_index,
|
|
194
|
+
component.component_type.value,
|
|
195
|
+
component.name or "",
|
|
196
|
+
component.depth,
|
|
197
|
+
"true" if component.is_executable else "false",
|
|
198
|
+
component.location,
|
|
199
|
+
deps_str,
|
|
200
|
+
component.sql,
|
|
201
|
+
]
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return output.getvalue()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class OutputWriter:
|
|
208
|
+
"""Write formatted output to file or stdout."""
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def write(content: str, output_file: Optional[Path] = None) -> None:
|
|
212
|
+
"""
|
|
213
|
+
Write content to file or stdout.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
content: The content to write
|
|
217
|
+
output_file: Optional file path. If None, writes to stdout.
|
|
218
|
+
"""
|
|
219
|
+
if output_file:
|
|
220
|
+
output_file.write_text(content, encoding="utf-8")
|
|
221
|
+
else:
|
|
222
|
+
print(content)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Pydantic models for SQL dissection results."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ComponentType(str, Enum):
|
|
10
|
+
"""Type of SQL component extracted from a query."""
|
|
11
|
+
|
|
12
|
+
CTE = "CTE"
|
|
13
|
+
MAIN_QUERY = "MAIN_QUERY"
|
|
14
|
+
SUBQUERY = "SUBQUERY"
|
|
15
|
+
SCALAR_SUBQUERY = "SCALAR_SUBQUERY"
|
|
16
|
+
TARGET_TABLE = "TARGET_TABLE"
|
|
17
|
+
SOURCE_QUERY = "SOURCE_QUERY"
|
|
18
|
+
UNION_BRANCH = "UNION_BRANCH"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SQLComponent(BaseModel):
|
|
22
|
+
"""Represents a single SQL component extracted from a query."""
|
|
23
|
+
|
|
24
|
+
component_type: ComponentType = Field(
|
|
25
|
+
..., description="Type of component (CTE, SUBQUERY, etc.)"
|
|
26
|
+
)
|
|
27
|
+
component_index: int = Field(
|
|
28
|
+
..., description="0-based index within query (order of extraction)"
|
|
29
|
+
)
|
|
30
|
+
name: Optional[str] = Field(
|
|
31
|
+
None,
|
|
32
|
+
description="Name/alias of component (CTE name, subquery alias, target table)",
|
|
33
|
+
)
|
|
34
|
+
sql: str = Field(
|
|
35
|
+
..., description="Extracted SQL for this component (executable if applicable)"
|
|
36
|
+
)
|
|
37
|
+
parent_index: Optional[int] = Field(
|
|
38
|
+
None, description="Index of parent component (for nested subqueries)"
|
|
39
|
+
)
|
|
40
|
+
depth: int = Field(
|
|
41
|
+
default=0, description="Nesting depth (0 = top-level, 1+ = nested)"
|
|
42
|
+
)
|
|
43
|
+
is_executable: bool = Field(
|
|
44
|
+
default=True, description="Whether this SQL can be executed standalone"
|
|
45
|
+
)
|
|
46
|
+
dependencies: List[str] = Field(
|
|
47
|
+
default_factory=list,
|
|
48
|
+
description="Names of CTEs this component depends on",
|
|
49
|
+
)
|
|
50
|
+
location: str = Field(
|
|
51
|
+
..., description="Human-readable location context (e.g., 'WITH clause')"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class QueryMetadata(BaseModel):
|
|
56
|
+
"""Metadata about a dissected query."""
|
|
57
|
+
|
|
58
|
+
query_index: int = Field(..., description="0-based query index in multi-query file")
|
|
59
|
+
query_preview: str = Field(..., description="First 100 chars of original query")
|
|
60
|
+
statement_type: str = Field(
|
|
61
|
+
..., description="Type of SQL statement (SELECT, INSERT, CREATE, etc.)"
|
|
62
|
+
)
|
|
63
|
+
total_components: int = Field(
|
|
64
|
+
..., description="Total number of components extracted"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class QueryDissectionResult(BaseModel):
|
|
69
|
+
"""Complete dissection result for a single query."""
|
|
70
|
+
|
|
71
|
+
metadata: QueryMetadata
|
|
72
|
+
components: List[SQLComponent] = Field(
|
|
73
|
+
default_factory=list,
|
|
74
|
+
description="All extracted components in order",
|
|
75
|
+
)
|
|
76
|
+
original_sql: str = Field(
|
|
77
|
+
..., description="Original SQL query for reference/validation"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def get_component_by_name(self, name: str) -> Optional[SQLComponent]:
|
|
81
|
+
"""Find a component by name (case-insensitive).
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
name: The component name to search for.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The matching SQLComponent or None if not found.
|
|
88
|
+
"""
|
|
89
|
+
name_lower = name.lower()
|
|
90
|
+
for component in self.components:
|
|
91
|
+
if component.name and component.name.lower() == name_lower:
|
|
92
|
+
return component
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def get_components_by_type(self, comp_type: ComponentType) -> List[SQLComponent]:
|
|
96
|
+
"""Get all components of a specific type.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
comp_type: The ComponentType to filter by.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
List of matching SQLComponent objects.
|
|
103
|
+
"""
|
|
104
|
+
return [c for c in self.components if c.component_type == comp_type]
|
|
105
|
+
|
|
106
|
+
def get_executable_components(self) -> List[SQLComponent]:
|
|
107
|
+
"""Get all executable components.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
List of SQLComponent objects that can be executed standalone.
|
|
111
|
+
"""
|
|
112
|
+
return [c for c in self.components if c.is_executable]
|
sqlglider/graph/builder.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Callable, Dict, List, Optional, Set
|
|
|
6
6
|
|
|
7
7
|
import rustworkx as rx
|
|
8
8
|
from rich.console import Console
|
|
9
|
+
from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn
|
|
9
10
|
|
|
10
11
|
from sqlglider.global_models import AnalysisLevel, NodeFormat
|
|
11
12
|
from sqlglider.graph.models import (
|
|
@@ -166,11 +167,8 @@ class GraphBuilder:
|
|
|
166
167
|
else:
|
|
167
168
|
pattern = glob_pattern
|
|
168
169
|
|
|
169
|
-
for
|
|
170
|
-
|
|
171
|
-
self.add_file(sql_file, dialect)
|
|
172
|
-
|
|
173
|
-
return self
|
|
170
|
+
sql_files = [f for f in sorted(dir_path.glob(pattern)) if f.is_file()]
|
|
171
|
+
return self.add_files(sql_files, dialect)
|
|
174
172
|
|
|
175
173
|
def add_manifest(
|
|
176
174
|
self,
|
|
@@ -194,6 +192,8 @@ class GraphBuilder:
|
|
|
194
192
|
manifest = Manifest.from_csv(manifest_path)
|
|
195
193
|
base_dir = manifest_path.parent
|
|
196
194
|
|
|
195
|
+
# Collect files with their dialects
|
|
196
|
+
files_with_dialects: List[tuple[Path, str]] = []
|
|
197
197
|
for entry in manifest.entries:
|
|
198
198
|
# Resolve file path relative to manifest location
|
|
199
199
|
file_path = Path(entry.file_path)
|
|
@@ -202,7 +202,25 @@ class GraphBuilder:
|
|
|
202
202
|
|
|
203
203
|
# Use entry dialect, then CLI dialect, then builder default
|
|
204
204
|
entry_dialect = entry.dialect or dialect or self.dialect
|
|
205
|
-
|
|
205
|
+
files_with_dialects.append((file_path, entry_dialect))
|
|
206
|
+
|
|
207
|
+
# Process with progress
|
|
208
|
+
if files_with_dialects:
|
|
209
|
+
total = len(files_with_dialects)
|
|
210
|
+
with Progress(
|
|
211
|
+
TextColumn("[progress.description]{task.description}"),
|
|
212
|
+
BarColumn(),
|
|
213
|
+
TaskProgressColumn(),
|
|
214
|
+
console=console,
|
|
215
|
+
transient=False,
|
|
216
|
+
) as progress:
|
|
217
|
+
task = progress.add_task("Parsing", total=total)
|
|
218
|
+
for i, (file_path, file_dialect) in enumerate(
|
|
219
|
+
files_with_dialects, start=1
|
|
220
|
+
):
|
|
221
|
+
console.print(f"Parsing file {i}/{total}: {file_path.name}")
|
|
222
|
+
self.add_file(file_path, file_dialect)
|
|
223
|
+
progress.advance(task)
|
|
206
224
|
|
|
207
225
|
return self
|
|
208
226
|
|
|
@@ -210,6 +228,7 @@ class GraphBuilder:
|
|
|
210
228
|
self,
|
|
211
229
|
file_paths: List[Path],
|
|
212
230
|
dialect: Optional[str] = None,
|
|
231
|
+
show_progress: bool = True,
|
|
213
232
|
) -> "GraphBuilder":
|
|
214
233
|
"""
|
|
215
234
|
Add lineage from multiple SQL files.
|
|
@@ -217,12 +236,31 @@ class GraphBuilder:
|
|
|
217
236
|
Args:
|
|
218
237
|
file_paths: List of paths to SQL files
|
|
219
238
|
dialect: SQL dialect (uses builder default if not specified)
|
|
239
|
+
show_progress: Whether to print progress messages
|
|
220
240
|
|
|
221
241
|
Returns:
|
|
222
242
|
self for method chaining
|
|
223
243
|
"""
|
|
224
|
-
|
|
225
|
-
self
|
|
244
|
+
if not file_paths:
|
|
245
|
+
return self
|
|
246
|
+
|
|
247
|
+
if show_progress:
|
|
248
|
+
total = len(file_paths)
|
|
249
|
+
with Progress(
|
|
250
|
+
TextColumn("[progress.description]{task.description}"),
|
|
251
|
+
BarColumn(),
|
|
252
|
+
TaskProgressColumn(),
|
|
253
|
+
console=console,
|
|
254
|
+
transient=False,
|
|
255
|
+
) as progress:
|
|
256
|
+
task = progress.add_task("Parsing", total=total)
|
|
257
|
+
for i, file_path in enumerate(file_paths, start=1):
|
|
258
|
+
console.print(f"Parsing file {i}/{total}: {file_path.name}")
|
|
259
|
+
self.add_file(file_path, dialect)
|
|
260
|
+
progress.advance(task)
|
|
261
|
+
else:
|
|
262
|
+
for file_path in file_paths:
|
|
263
|
+
self.add_file(file_path, dialect)
|
|
226
264
|
return self
|
|
227
265
|
|
|
228
266
|
def _ensure_node(
|
sqlglider/lineage/analyzer.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Core lineage analysis using SQLGlot."""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Callable, Iterator, List, Optional, Set, Tuple
|
|
4
|
+
from typing import Callable, Iterator, List, Optional, Set, Tuple, Union
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
from sqlglot import exp, parse
|
|
@@ -155,7 +155,8 @@ class LineageAnalyzer:
|
|
|
155
155
|
if target_table:
|
|
156
156
|
# DML/DDL: Use target table for output column qualification
|
|
157
157
|
# The columns are from the SELECT, but qualified with the target table
|
|
158
|
-
|
|
158
|
+
projections = self._get_select_projections(select_node)
|
|
159
|
+
for projection in projections:
|
|
159
160
|
# Get the underlying expression (unwrap alias if present)
|
|
160
161
|
if isinstance(projection, exp.Alias):
|
|
161
162
|
# For aliased columns, use the alias as the column name
|
|
@@ -178,7 +179,10 @@ class LineageAnalyzer:
|
|
|
178
179
|
|
|
179
180
|
else:
|
|
180
181
|
# DQL (pure SELECT): Use the SELECT columns as output
|
|
181
|
-
|
|
182
|
+
projections = self._get_select_projections(select_node)
|
|
183
|
+
# Get the first SELECT for table resolution (handles UNION case)
|
|
184
|
+
first_select = self._get_first_select(select_node)
|
|
185
|
+
for projection in projections:
|
|
182
186
|
# Get the underlying expression (unwrap alias if present)
|
|
183
187
|
if isinstance(projection, exp.Alias):
|
|
184
188
|
source_expr = projection.this
|
|
@@ -195,20 +199,20 @@ class LineageAnalyzer:
|
|
|
195
199
|
table_name = source_expr.table
|
|
196
200
|
col_name = column_name or source_expr.name
|
|
197
201
|
|
|
198
|
-
if table_name:
|
|
202
|
+
if table_name and first_select:
|
|
199
203
|
# Resolve table reference (could be table, CTE, or subquery alias)
|
|
200
204
|
# This works at any nesting level because we're only looking at the immediate context
|
|
201
205
|
resolved_table = self._resolve_table_reference(
|
|
202
|
-
table_name,
|
|
206
|
+
table_name, first_select
|
|
203
207
|
)
|
|
204
208
|
qualified_name = f"{resolved_table}.{col_name}"
|
|
205
209
|
columns.append(qualified_name)
|
|
206
210
|
# Map qualified name to what lineage expects
|
|
207
211
|
self._column_mapping[qualified_name] = lineage_name or col_name
|
|
208
|
-
|
|
212
|
+
elif first_select:
|
|
209
213
|
# No table qualifier - try to infer from FROM clause
|
|
210
214
|
# This handles "SELECT col FROM single_source" cases
|
|
211
|
-
inferred_table = self._infer_single_table_source(
|
|
215
|
+
inferred_table = self._infer_single_table_source(first_select)
|
|
212
216
|
if inferred_table:
|
|
213
217
|
qualified_name = f"{inferred_table}.{col_name}"
|
|
214
218
|
columns.append(qualified_name)
|
|
@@ -219,6 +223,10 @@ class LineageAnalyzer:
|
|
|
219
223
|
# Can't infer table, just use column name
|
|
220
224
|
columns.append(col_name)
|
|
221
225
|
self._column_mapping[col_name] = lineage_name or col_name
|
|
226
|
+
else:
|
|
227
|
+
# No SELECT found, just use column name
|
|
228
|
+
columns.append(col_name)
|
|
229
|
+
self._column_mapping[col_name] = lineage_name or col_name
|
|
222
230
|
else:
|
|
223
231
|
# For other expressions (literals, functions, etc.)
|
|
224
232
|
# Use the alias if available, otherwise the SQL representation
|
|
@@ -232,6 +240,46 @@ class LineageAnalyzer:
|
|
|
232
240
|
|
|
233
241
|
return columns
|
|
234
242
|
|
|
243
|
+
def _get_select_projections(self, node: exp.Expression) -> List[exp.Expression]:
|
|
244
|
+
"""
|
|
245
|
+
Get the SELECT projections from a SELECT or set operation node.
|
|
246
|
+
|
|
247
|
+
For set operations (UNION, INTERSECT, EXCEPT), returns projections from
|
|
248
|
+
the first branch since all branches must have the same number of columns
|
|
249
|
+
with compatible types.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
node: A SELECT or set operation (UNION/INTERSECT/EXCEPT) expression
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
List of projection expressions from the SELECT clause
|
|
256
|
+
"""
|
|
257
|
+
if isinstance(node, exp.Select):
|
|
258
|
+
return list(node.expressions)
|
|
259
|
+
elif isinstance(node, (exp.Union, exp.Intersect, exp.Except)):
|
|
260
|
+
# Recursively get from the left branch (could be nested set operations)
|
|
261
|
+
return self._get_select_projections(node.left)
|
|
262
|
+
return []
|
|
263
|
+
|
|
264
|
+
def _get_first_select(self, node: exp.Expression) -> Optional[exp.Select]:
|
|
265
|
+
"""
|
|
266
|
+
Get the first SELECT node from a SELECT or set operation expression.
|
|
267
|
+
|
|
268
|
+
For set operations (UNION, INTERSECT, EXCEPT), returns the leftmost
|
|
269
|
+
SELECT branch.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
node: A SELECT or set operation (UNION/INTERSECT/EXCEPT) expression
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
The first SELECT node, or None if not found
|
|
276
|
+
"""
|
|
277
|
+
if isinstance(node, exp.Select):
|
|
278
|
+
return node
|
|
279
|
+
elif isinstance(node, (exp.Union, exp.Intersect, exp.Except)):
|
|
280
|
+
return self._get_first_select(node.left)
|
|
281
|
+
return None
|
|
282
|
+
|
|
235
283
|
def analyze_queries(
|
|
236
284
|
self,
|
|
237
285
|
level: AnalysisLevel = AnalysisLevel.COLUMN,
|
|
@@ -795,7 +843,9 @@ class LineageAnalyzer:
|
|
|
795
843
|
|
|
796
844
|
def _get_target_and_select(
|
|
797
845
|
self,
|
|
798
|
-
) -> Optional[
|
|
846
|
+
) -> Optional[
|
|
847
|
+
tuple[Optional[str], Union[exp.Select, exp.Union, exp.Intersect, exp.Except]]
|
|
848
|
+
]:
|
|
799
849
|
"""
|
|
800
850
|
Detect if this is a DML/DDL statement and extract the target table and SELECT node.
|
|
801
851
|
|
|
@@ -817,9 +867,11 @@ class LineageAnalyzer:
|
|
|
817
867
|
target = self.expr.this
|
|
818
868
|
if isinstance(target, exp.Table):
|
|
819
869
|
target_name = self._get_qualified_table_name(target)
|
|
820
|
-
# Find the SELECT within the INSERT
|
|
870
|
+
# Find the SELECT within the INSERT (may be a set operation)
|
|
821
871
|
select_node = self.expr.expression
|
|
822
|
-
if isinstance(
|
|
872
|
+
if isinstance(
|
|
873
|
+
select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
|
|
874
|
+
):
|
|
823
875
|
return (target_name, select_node)
|
|
824
876
|
|
|
825
877
|
# Check for CREATE TABLE AS SELECT (CTAS) or CREATE VIEW AS SELECT
|
|
@@ -831,9 +883,11 @@ class LineageAnalyzer:
|
|
|
831
883
|
target = target.this
|
|
832
884
|
if isinstance(target, exp.Table):
|
|
833
885
|
target_name = self._get_qualified_table_name(target)
|
|
834
|
-
# Find the SELECT in the expression
|
|
886
|
+
# Find the SELECT in the expression (may be a set operation)
|
|
835
887
|
select_node = self.expr.expression
|
|
836
|
-
if isinstance(
|
|
888
|
+
if isinstance(
|
|
889
|
+
select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
|
|
890
|
+
):
|
|
837
891
|
return (target_name, select_node)
|
|
838
892
|
|
|
839
893
|
# Check for MERGE statement
|
sqlglider/utils/config.py
CHANGED
|
@@ -23,6 +23,28 @@ class TemplatingConfig(BaseModel):
|
|
|
23
23
|
variables: Optional[Dict[str, Any]] = None
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
class DatabricksCatalogConfig(BaseModel):
|
|
27
|
+
"""Configuration for Databricks catalog provider.
|
|
28
|
+
|
|
29
|
+
All fields are optional - they can also be set via environment variables.
|
|
30
|
+
The SDK supports unified authentication with multiple methods.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
warehouse_id: Optional[str] = None
|
|
34
|
+
profile: Optional[str] = None # Databricks CLI profile from ~/.databrickscfg
|
|
35
|
+
host: Optional[str] = None
|
|
36
|
+
token: Optional[str] = None # Legacy PAT, prefer OAuth or profile
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CatalogConfig(BaseModel):
|
|
40
|
+
"""Configuration for catalog providers.
|
|
41
|
+
|
|
42
|
+
Contains provider-specific configuration under sub-keys.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
databricks: Optional[DatabricksCatalogConfig] = None
|
|
46
|
+
|
|
47
|
+
|
|
26
48
|
class ConfigSettings(BaseModel):
|
|
27
49
|
"""Configuration settings for SQL Glider.
|
|
28
50
|
|
|
@@ -35,6 +57,9 @@ class ConfigSettings(BaseModel):
|
|
|
35
57
|
output_format: Optional[str] = None
|
|
36
58
|
templater: Optional[str] = None
|
|
37
59
|
templating: Optional[TemplatingConfig] = None
|
|
60
|
+
catalog_type: Optional[str] = None
|
|
61
|
+
ddl_folder: Optional[str] = None
|
|
62
|
+
catalog: Optional[CatalogConfig] = None
|
|
38
63
|
|
|
39
64
|
|
|
40
65
|
def find_config_file(start_path: Optional[Path] = None) -> Optional[Path]:
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
|
|
2
|
-
sqlglider/_version.py,sha256=Ok5oAXdWgR9aghaFXTafTeDW6sYO3uVe6d2Nket57R4,704
|
|
3
|
-
sqlglider/cli.py,sha256=POWIhv0jfvoNtwSoURpxJydco1rvxX9rAvyjuA9FGC8,36445
|
|
4
|
-
sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
|
|
5
|
-
sqlglider/graph/__init__.py,sha256=4DDdrPM75CmeQWt7wHdBsjCm1s70BHGLYdijIbaUEKY,871
|
|
6
|
-
sqlglider/graph/builder.py,sha256=rrcpGAXLz-VHZ1Y73uw6R7kMXHpzBz7tQ2tdV5BY05w,10202
|
|
7
|
-
sqlglider/graph/merge.py,sha256=uUZlm4BN3S9gRL66Cc2mzhbtuh4SVAv2n4cN4eUEQBU,4077
|
|
8
|
-
sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,9330
|
|
9
|
-
sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
|
|
10
|
-
sqlglider/graph/serialization.py,sha256=7JJo31rwSlxnDhdqdTJdK4Dr_ZcSYetXfx3_CmndSac,2662
|
|
11
|
-
sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
|
|
12
|
-
sqlglider/lineage/analyzer.py,sha256=58lyrUc0XsCUrYSb23A02OSBmq7eCtJwc477PbjS3c0,45905
|
|
13
|
-
sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
|
|
14
|
-
sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
|
|
15
|
-
sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
|
|
16
|
-
sqlglider/templating/jinja.py,sha256=o01UG72N4G1-tOT5LKK1Wkccv4nJH2VN4VFaMi5c1-g,5220
|
|
17
|
-
sqlglider/templating/registry.py,sha256=BJU3N2qNVMTUtkgbibyqo8Wme_acXQRw5XI-6ZVgyac,3476
|
|
18
|
-
sqlglider/templating/variables.py,sha256=5593PtLBcOxsnMCSRm2pGAD5I0Y9f__VV3_J_HfXVlQ,8010
|
|
19
|
-
sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,276
|
|
20
|
-
sqlglider/utils/config.py,sha256=mkven_CcE_dNfKiHi0h2CsE5TMQDX9XqbU7GGEELwEY,3959
|
|
21
|
-
sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
|
|
22
|
-
sql_glider-0.1.2.dist-info/METADATA,sha256=JUXRDvhfnJBj2owWMaupDugZj4Y6uDv1R7RCCkaEWlw,22349
|
|
23
|
-
sql_glider-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
24
|
-
sql_glider-0.1.2.dist-info/entry_points.txt,sha256=LWVdQEfvDT5uZ2RQ4Rse8m0HxBCOMbbqDkxdwUh9d78,169
|
|
25
|
-
sql_glider-0.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
26
|
-
sql_glider-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|