sql-glider 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_glider-0.1.8.dist-info/METADATA +893 -0
- sql_glider-0.1.8.dist-info/RECORD +34 -0
- sql_glider-0.1.8.dist-info/WHEEL +4 -0
- sql_glider-0.1.8.dist-info/entry_points.txt +9 -0
- sql_glider-0.1.8.dist-info/licenses/LICENSE +201 -0
- sqlglider/__init__.py +3 -0
- sqlglider/_version.py +34 -0
- sqlglider/catalog/__init__.py +30 -0
- sqlglider/catalog/base.py +99 -0
- sqlglider/catalog/databricks.py +255 -0
- sqlglider/catalog/registry.py +121 -0
- sqlglider/cli.py +1589 -0
- sqlglider/dissection/__init__.py +17 -0
- sqlglider/dissection/analyzer.py +767 -0
- sqlglider/dissection/formatters.py +222 -0
- sqlglider/dissection/models.py +112 -0
- sqlglider/global_models.py +17 -0
- sqlglider/graph/__init__.py +42 -0
- sqlglider/graph/builder.py +349 -0
- sqlglider/graph/merge.py +136 -0
- sqlglider/graph/models.py +289 -0
- sqlglider/graph/query.py +287 -0
- sqlglider/graph/serialization.py +107 -0
- sqlglider/lineage/__init__.py +10 -0
- sqlglider/lineage/analyzer.py +1631 -0
- sqlglider/lineage/formatters.py +335 -0
- sqlglider/templating/__init__.py +51 -0
- sqlglider/templating/base.py +103 -0
- sqlglider/templating/jinja.py +163 -0
- sqlglider/templating/registry.py +124 -0
- sqlglider/templating/variables.py +295 -0
- sqlglider/utils/__init__.py +11 -0
- sqlglider/utils/config.py +155 -0
- sqlglider/utils/file_utils.py +38 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Output formatters for dissection results."""
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
from sqlglider.dissection.models import QueryDissectionResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DissectionTextFormatter:
|
|
16
|
+
"""Format dissection results as Rich tables for terminal display."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def format(results: List[QueryDissectionResult], console: Console) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Format and print dissection results as Rich tables.
|
|
22
|
+
|
|
23
|
+
Creates a styled table for each query showing all extracted components.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
results: List of QueryDissectionResult objects
|
|
27
|
+
console: Rich Console instance for output
|
|
28
|
+
"""
|
|
29
|
+
if not results:
|
|
30
|
+
console.print("[yellow]No dissection results found.[/yellow]")
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
for i, result in enumerate(results):
|
|
34
|
+
# Add spacing between queries (except for first)
|
|
35
|
+
if i > 0:
|
|
36
|
+
console.print()
|
|
37
|
+
|
|
38
|
+
# Create table with query info as title
|
|
39
|
+
title = (
|
|
40
|
+
f"Query {result.metadata.query_index} "
|
|
41
|
+
f"({result.metadata.statement_type}): "
|
|
42
|
+
f"{result.metadata.query_preview}"
|
|
43
|
+
)
|
|
44
|
+
table = Table(title=title, title_style="bold")
|
|
45
|
+
|
|
46
|
+
table.add_column("Index", style="dim", width=6)
|
|
47
|
+
table.add_column("Type", style="cyan", width=16)
|
|
48
|
+
table.add_column("Name", style="green", min_width=10)
|
|
49
|
+
table.add_column("Depth", style="yellow", width=6)
|
|
50
|
+
table.add_column("Exec?", style="magenta", width=6)
|
|
51
|
+
table.add_column("Location", style="blue", min_width=15)
|
|
52
|
+
table.add_column("SQL Preview", style="dim", min_width=30)
|
|
53
|
+
|
|
54
|
+
# Add rows for each component
|
|
55
|
+
for component in result.components:
|
|
56
|
+
# Truncate SQL for preview
|
|
57
|
+
sql_preview = " ".join(component.sql.split())[:50]
|
|
58
|
+
if len(component.sql) > 50:
|
|
59
|
+
sql_preview += "..."
|
|
60
|
+
|
|
61
|
+
table.add_row(
|
|
62
|
+
str(component.component_index),
|
|
63
|
+
component.component_type.value,
|
|
64
|
+
component.name or "-",
|
|
65
|
+
str(component.depth),
|
|
66
|
+
"Yes" if component.is_executable else "No",
|
|
67
|
+
component.location[:35] + "..."
|
|
68
|
+
if len(component.location) > 35
|
|
69
|
+
else component.location,
|
|
70
|
+
sql_preview,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
console.print(table)
|
|
74
|
+
console.print(
|
|
75
|
+
f"[dim]Total components: {result.metadata.total_components}[/dim]"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DissectionJsonFormatter:
|
|
80
|
+
"""Format dissection results as JSON."""
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def format(results: List[QueryDissectionResult]) -> str:
|
|
84
|
+
"""
|
|
85
|
+
Format dissection results as JSON.
|
|
86
|
+
|
|
87
|
+
Output format:
|
|
88
|
+
{
|
|
89
|
+
"queries": [
|
|
90
|
+
{
|
|
91
|
+
"query_index": 0,
|
|
92
|
+
"query_preview": "SELECT ...",
|
|
93
|
+
"statement_type": "INSERT",
|
|
94
|
+
"total_components": 5,
|
|
95
|
+
"components": [
|
|
96
|
+
{
|
|
97
|
+
"component_type": "CTE",
|
|
98
|
+
"component_index": 0,
|
|
99
|
+
"name": "order_totals",
|
|
100
|
+
"sql": "SELECT ...",
|
|
101
|
+
"parent_index": null,
|
|
102
|
+
"depth": 0,
|
|
103
|
+
"is_executable": true,
|
|
104
|
+
"dependencies": [],
|
|
105
|
+
"location": "WITH clause"
|
|
106
|
+
}
|
|
107
|
+
],
|
|
108
|
+
"original_sql": "WITH order_totals AS ..."
|
|
109
|
+
}
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
results: List of QueryDissectionResult objects
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
JSON-formatted string
|
|
118
|
+
"""
|
|
119
|
+
queries = []
|
|
120
|
+
for result in results:
|
|
121
|
+
query_data = {
|
|
122
|
+
"query_index": result.metadata.query_index,
|
|
123
|
+
"query_preview": result.metadata.query_preview,
|
|
124
|
+
"statement_type": result.metadata.statement_type,
|
|
125
|
+
"total_components": result.metadata.total_components,
|
|
126
|
+
"components": [
|
|
127
|
+
{
|
|
128
|
+
"component_type": component.component_type.value,
|
|
129
|
+
"component_index": component.component_index,
|
|
130
|
+
"name": component.name,
|
|
131
|
+
"sql": component.sql,
|
|
132
|
+
"parent_index": component.parent_index,
|
|
133
|
+
"depth": component.depth,
|
|
134
|
+
"is_executable": component.is_executable,
|
|
135
|
+
"dependencies": component.dependencies,
|
|
136
|
+
"location": component.location,
|
|
137
|
+
}
|
|
138
|
+
for component in result.components
|
|
139
|
+
],
|
|
140
|
+
"original_sql": result.original_sql,
|
|
141
|
+
}
|
|
142
|
+
queries.append(query_data)
|
|
143
|
+
|
|
144
|
+
return json.dumps({"queries": queries}, indent=2)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class DissectionCsvFormatter:
|
|
148
|
+
"""Format dissection results as CSV."""
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def format(results: List[QueryDissectionResult]) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Format dissection results as CSV.
|
|
154
|
+
|
|
155
|
+
Output format:
|
|
156
|
+
query_index,component_index,component_type,name,depth,is_executable,location,dependencies,sql
|
|
157
|
+
0,0,CTE,order_totals,0,true,WITH clause,,"SELECT ..."
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
results: List of QueryDissectionResult objects
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
CSV-formatted string
|
|
164
|
+
"""
|
|
165
|
+
if not results:
|
|
166
|
+
return ""
|
|
167
|
+
|
|
168
|
+
output = StringIO()
|
|
169
|
+
headers = [
|
|
170
|
+
"query_index",
|
|
171
|
+
"component_index",
|
|
172
|
+
"component_type",
|
|
173
|
+
"name",
|
|
174
|
+
"depth",
|
|
175
|
+
"is_executable",
|
|
176
|
+
"location",
|
|
177
|
+
"dependencies",
|
|
178
|
+
"sql",
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
writer = csv.writer(output)
|
|
182
|
+
writer.writerow(headers)
|
|
183
|
+
|
|
184
|
+
# Write data rows
|
|
185
|
+
for result in results:
|
|
186
|
+
query_index = result.metadata.query_index
|
|
187
|
+
for component in result.components:
|
|
188
|
+
# Join dependencies with semicolon
|
|
189
|
+
deps_str = ";".join(component.dependencies)
|
|
190
|
+
writer.writerow(
|
|
191
|
+
[
|
|
192
|
+
query_index,
|
|
193
|
+
component.component_index,
|
|
194
|
+
component.component_type.value,
|
|
195
|
+
component.name or "",
|
|
196
|
+
component.depth,
|
|
197
|
+
"true" if component.is_executable else "false",
|
|
198
|
+
component.location,
|
|
199
|
+
deps_str,
|
|
200
|
+
component.sql,
|
|
201
|
+
]
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return output.getvalue()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class OutputWriter:
|
|
208
|
+
"""Write formatted output to file or stdout."""
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def write(content: str, output_file: Optional[Path] = None) -> None:
|
|
212
|
+
"""
|
|
213
|
+
Write content to file or stdout.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
content: The content to write
|
|
217
|
+
output_file: Optional file path. If None, writes to stdout.
|
|
218
|
+
"""
|
|
219
|
+
if output_file:
|
|
220
|
+
output_file.write_text(content, encoding="utf-8")
|
|
221
|
+
else:
|
|
222
|
+
print(content)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Pydantic models for SQL dissection results."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ComponentType(str, Enum):
|
|
10
|
+
"""Type of SQL component extracted from a query."""
|
|
11
|
+
|
|
12
|
+
CTE = "CTE"
|
|
13
|
+
MAIN_QUERY = "MAIN_QUERY"
|
|
14
|
+
SUBQUERY = "SUBQUERY"
|
|
15
|
+
SCALAR_SUBQUERY = "SCALAR_SUBQUERY"
|
|
16
|
+
TARGET_TABLE = "TARGET_TABLE"
|
|
17
|
+
SOURCE_QUERY = "SOURCE_QUERY"
|
|
18
|
+
UNION_BRANCH = "UNION_BRANCH"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SQLComponent(BaseModel):
|
|
22
|
+
"""Represents a single SQL component extracted from a query."""
|
|
23
|
+
|
|
24
|
+
component_type: ComponentType = Field(
|
|
25
|
+
..., description="Type of component (CTE, SUBQUERY, etc.)"
|
|
26
|
+
)
|
|
27
|
+
component_index: int = Field(
|
|
28
|
+
..., description="0-based index within query (order of extraction)"
|
|
29
|
+
)
|
|
30
|
+
name: Optional[str] = Field(
|
|
31
|
+
None,
|
|
32
|
+
description="Name/alias of component (CTE name, subquery alias, target table)",
|
|
33
|
+
)
|
|
34
|
+
sql: str = Field(
|
|
35
|
+
..., description="Extracted SQL for this component (executable if applicable)"
|
|
36
|
+
)
|
|
37
|
+
parent_index: Optional[int] = Field(
|
|
38
|
+
None, description="Index of parent component (for nested subqueries)"
|
|
39
|
+
)
|
|
40
|
+
depth: int = Field(
|
|
41
|
+
default=0, description="Nesting depth (0 = top-level, 1+ = nested)"
|
|
42
|
+
)
|
|
43
|
+
is_executable: bool = Field(
|
|
44
|
+
default=True, description="Whether this SQL can be executed standalone"
|
|
45
|
+
)
|
|
46
|
+
dependencies: List[str] = Field(
|
|
47
|
+
default_factory=list,
|
|
48
|
+
description="Names of CTEs this component depends on",
|
|
49
|
+
)
|
|
50
|
+
location: str = Field(
|
|
51
|
+
..., description="Human-readable location context (e.g., 'WITH clause')"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class QueryMetadata(BaseModel):
|
|
56
|
+
"""Metadata about a dissected query."""
|
|
57
|
+
|
|
58
|
+
query_index: int = Field(..., description="0-based query index in multi-query file")
|
|
59
|
+
query_preview: str = Field(..., description="First 100 chars of original query")
|
|
60
|
+
statement_type: str = Field(
|
|
61
|
+
..., description="Type of SQL statement (SELECT, INSERT, CREATE, etc.)"
|
|
62
|
+
)
|
|
63
|
+
total_components: int = Field(
|
|
64
|
+
..., description="Total number of components extracted"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class QueryDissectionResult(BaseModel):
|
|
69
|
+
"""Complete dissection result for a single query."""
|
|
70
|
+
|
|
71
|
+
metadata: QueryMetadata
|
|
72
|
+
components: List[SQLComponent] = Field(
|
|
73
|
+
default_factory=list,
|
|
74
|
+
description="All extracted components in order",
|
|
75
|
+
)
|
|
76
|
+
original_sql: str = Field(
|
|
77
|
+
..., description="Original SQL query for reference/validation"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def get_component_by_name(self, name: str) -> Optional[SQLComponent]:
|
|
81
|
+
"""Find a component by name (case-insensitive).
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
name: The component name to search for.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The matching SQLComponent or None if not found.
|
|
88
|
+
"""
|
|
89
|
+
name_lower = name.lower()
|
|
90
|
+
for component in self.components:
|
|
91
|
+
if component.name and component.name.lower() == name_lower:
|
|
92
|
+
return component
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def get_components_by_type(self, comp_type: ComponentType) -> List[SQLComponent]:
|
|
96
|
+
"""Get all components of a specific type.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
comp_type: The ComponentType to filter by.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
List of matching SQLComponent objects.
|
|
103
|
+
"""
|
|
104
|
+
return [c for c in self.components if c.component_type == comp_type]
|
|
105
|
+
|
|
106
|
+
def get_executable_components(self) -> List[SQLComponent]:
|
|
107
|
+
"""Get all executable components.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
List of SQLComponent objects that can be executed standalone.
|
|
111
|
+
"""
|
|
112
|
+
return [c for c in self.components if c.is_executable]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Shared models and enums used across sqlglider modules."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AnalysisLevel(str, Enum):
|
|
7
|
+
"""Analysis granularity level for lineage."""
|
|
8
|
+
|
|
9
|
+
COLUMN = "column"
|
|
10
|
+
TABLE = "table"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NodeFormat(str, Enum):
|
|
14
|
+
"""Format for node identifiers in graph output."""
|
|
15
|
+
|
|
16
|
+
QUALIFIED = "qualified"
|
|
17
|
+
STRUCTURED = "structured"
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Graph-based lineage analysis module for SQL Glider."""
|
|
2
|
+
|
|
3
|
+
from sqlglider.graph.builder import GraphBuilder
|
|
4
|
+
from sqlglider.graph.merge import GraphMerger, merge_graphs
|
|
5
|
+
from sqlglider.graph.models import (
|
|
6
|
+
GraphEdge,
|
|
7
|
+
GraphMetadata,
|
|
8
|
+
GraphNode,
|
|
9
|
+
LineageGraph,
|
|
10
|
+
Manifest,
|
|
11
|
+
ManifestEntry,
|
|
12
|
+
)
|
|
13
|
+
from sqlglider.graph.query import GraphQuerier, LineageQueryResult
|
|
14
|
+
from sqlglider.graph.serialization import (
|
|
15
|
+
from_rustworkx,
|
|
16
|
+
load_graph,
|
|
17
|
+
save_graph,
|
|
18
|
+
to_rustworkx,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# Models
|
|
23
|
+
"GraphNode",
|
|
24
|
+
"GraphEdge",
|
|
25
|
+
"GraphMetadata",
|
|
26
|
+
"LineageGraph",
|
|
27
|
+
"Manifest",
|
|
28
|
+
"ManifestEntry",
|
|
29
|
+
# Builder
|
|
30
|
+
"GraphBuilder",
|
|
31
|
+
# Merge
|
|
32
|
+
"GraphMerger",
|
|
33
|
+
"merge_graphs",
|
|
34
|
+
# Query
|
|
35
|
+
"GraphQuerier",
|
|
36
|
+
"LineageQueryResult",
|
|
37
|
+
# Serialization
|
|
38
|
+
"load_graph",
|
|
39
|
+
"save_graph",
|
|
40
|
+
"to_rustworkx",
|
|
41
|
+
"from_rustworkx",
|
|
42
|
+
]
|