sql-glider 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,222 @@
1
+ """Output formatters for dissection results."""
2
+
3
+ import csv
4
+ import json
5
+ from io import StringIO
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ from rich.console import Console
10
+ from rich.table import Table
11
+
12
+ from sqlglider.dissection.models import QueryDissectionResult
13
+
14
+
15
+ class DissectionTextFormatter:
16
+ """Format dissection results as Rich tables for terminal display."""
17
+
18
+ @staticmethod
19
+ def format(results: List[QueryDissectionResult], console: Console) -> None:
20
+ """
21
+ Format and print dissection results as Rich tables.
22
+
23
+ Creates a styled table for each query showing all extracted components.
24
+
25
+ Args:
26
+ results: List of QueryDissectionResult objects
27
+ console: Rich Console instance for output
28
+ """
29
+ if not results:
30
+ console.print("[yellow]No dissection results found.[/yellow]")
31
+ return
32
+
33
+ for i, result in enumerate(results):
34
+ # Add spacing between queries (except for first)
35
+ if i > 0:
36
+ console.print()
37
+
38
+ # Create table with query info as title
39
+ title = (
40
+ f"Query {result.metadata.query_index} "
41
+ f"({result.metadata.statement_type}): "
42
+ f"{result.metadata.query_preview}"
43
+ )
44
+ table = Table(title=title, title_style="bold")
45
+
46
+ table.add_column("Index", style="dim", width=6)
47
+ table.add_column("Type", style="cyan", width=16)
48
+ table.add_column("Name", style="green", min_width=10)
49
+ table.add_column("Depth", style="yellow", width=6)
50
+ table.add_column("Exec?", style="magenta", width=6)
51
+ table.add_column("Location", style="blue", min_width=15)
52
+ table.add_column("SQL Preview", style="dim", min_width=30)
53
+
54
+ # Add rows for each component
55
+ for component in result.components:
56
+ # Truncate SQL for preview
57
+ sql_preview = " ".join(component.sql.split())[:50]
58
+ if len(component.sql) > 50:
59
+ sql_preview += "..."
60
+
61
+ table.add_row(
62
+ str(component.component_index),
63
+ component.component_type.value,
64
+ component.name or "-",
65
+ str(component.depth),
66
+ "Yes" if component.is_executable else "No",
67
+ component.location[:35] + "..."
68
+ if len(component.location) > 35
69
+ else component.location,
70
+ sql_preview,
71
+ )
72
+
73
+ console.print(table)
74
+ console.print(
75
+ f"[dim]Total components: {result.metadata.total_components}[/dim]"
76
+ )
77
+
78
+
79
+ class DissectionJsonFormatter:
80
+ """Format dissection results as JSON."""
81
+
82
+ @staticmethod
83
+ def format(results: List[QueryDissectionResult]) -> str:
84
+ """
85
+ Format dissection results as JSON.
86
+
87
+ Output format:
88
+ {
89
+ "queries": [
90
+ {
91
+ "query_index": 0,
92
+ "query_preview": "SELECT ...",
93
+ "statement_type": "INSERT",
94
+ "total_components": 5,
95
+ "components": [
96
+ {
97
+ "component_type": "CTE",
98
+ "component_index": 0,
99
+ "name": "order_totals",
100
+ "sql": "SELECT ...",
101
+ "parent_index": null,
102
+ "depth": 0,
103
+ "is_executable": true,
104
+ "dependencies": [],
105
+ "location": "WITH clause"
106
+ }
107
+ ],
108
+ "original_sql": "WITH order_totals AS ..."
109
+ }
110
+ ]
111
+ }
112
+
113
+ Args:
114
+ results: List of QueryDissectionResult objects
115
+
116
+ Returns:
117
+ JSON-formatted string
118
+ """
119
+ queries = []
120
+ for result in results:
121
+ query_data = {
122
+ "query_index": result.metadata.query_index,
123
+ "query_preview": result.metadata.query_preview,
124
+ "statement_type": result.metadata.statement_type,
125
+ "total_components": result.metadata.total_components,
126
+ "components": [
127
+ {
128
+ "component_type": component.component_type.value,
129
+ "component_index": component.component_index,
130
+ "name": component.name,
131
+ "sql": component.sql,
132
+ "parent_index": component.parent_index,
133
+ "depth": component.depth,
134
+ "is_executable": component.is_executable,
135
+ "dependencies": component.dependencies,
136
+ "location": component.location,
137
+ }
138
+ for component in result.components
139
+ ],
140
+ "original_sql": result.original_sql,
141
+ }
142
+ queries.append(query_data)
143
+
144
+ return json.dumps({"queries": queries}, indent=2)
145
+
146
+
147
+ class DissectionCsvFormatter:
148
+ """Format dissection results as CSV."""
149
+
150
+ @staticmethod
151
+ def format(results: List[QueryDissectionResult]) -> str:
152
+ """
153
+ Format dissection results as CSV.
154
+
155
+ Output format:
156
+ query_index,component_index,component_type,name,depth,is_executable,location,dependencies,sql
157
+ 0,0,CTE,order_totals,0,true,WITH clause,,"SELECT ..."
158
+
159
+ Args:
160
+ results: List of QueryDissectionResult objects
161
+
162
+ Returns:
163
+ CSV-formatted string
164
+ """
165
+ if not results:
166
+ return ""
167
+
168
+ output = StringIO()
169
+ headers = [
170
+ "query_index",
171
+ "component_index",
172
+ "component_type",
173
+ "name",
174
+ "depth",
175
+ "is_executable",
176
+ "location",
177
+ "dependencies",
178
+ "sql",
179
+ ]
180
+
181
+ writer = csv.writer(output)
182
+ writer.writerow(headers)
183
+
184
+ # Write data rows
185
+ for result in results:
186
+ query_index = result.metadata.query_index
187
+ for component in result.components:
188
+ # Join dependencies with semicolon
189
+ deps_str = ";".join(component.dependencies)
190
+ writer.writerow(
191
+ [
192
+ query_index,
193
+ component.component_index,
194
+ component.component_type.value,
195
+ component.name or "",
196
+ component.depth,
197
+ "true" if component.is_executable else "false",
198
+ component.location,
199
+ deps_str,
200
+ component.sql,
201
+ ]
202
+ )
203
+
204
+ return output.getvalue()
205
+
206
+
207
+ class OutputWriter:
208
+ """Write formatted output to file or stdout."""
209
+
210
+ @staticmethod
211
+ def write(content: str, output_file: Optional[Path] = None) -> None:
212
+ """
213
+ Write content to file or stdout.
214
+
215
+ Args:
216
+ content: The content to write
217
+ output_file: Optional file path. If None, writes to stdout.
218
+ """
219
+ if output_file:
220
+ output_file.write_text(content, encoding="utf-8")
221
+ else:
222
+ print(content)
@@ -0,0 +1,112 @@
1
+ """Pydantic models for SQL dissection results."""
2
+
3
+ from enum import Enum
4
+ from typing import List, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class ComponentType(str, Enum):
10
+ """Type of SQL component extracted from a query."""
11
+
12
+ CTE = "CTE"
13
+ MAIN_QUERY = "MAIN_QUERY"
14
+ SUBQUERY = "SUBQUERY"
15
+ SCALAR_SUBQUERY = "SCALAR_SUBQUERY"
16
+ TARGET_TABLE = "TARGET_TABLE"
17
+ SOURCE_QUERY = "SOURCE_QUERY"
18
+ UNION_BRANCH = "UNION_BRANCH"
19
+
20
+
21
+ class SQLComponent(BaseModel):
22
+ """Represents a single SQL component extracted from a query."""
23
+
24
+ component_type: ComponentType = Field(
25
+ ..., description="Type of component (CTE, SUBQUERY, etc.)"
26
+ )
27
+ component_index: int = Field(
28
+ ..., description="0-based index within query (order of extraction)"
29
+ )
30
+ name: Optional[str] = Field(
31
+ None,
32
+ description="Name/alias of component (CTE name, subquery alias, target table)",
33
+ )
34
+ sql: str = Field(
35
+ ..., description="Extracted SQL for this component (executable if applicable)"
36
+ )
37
+ parent_index: Optional[int] = Field(
38
+ None, description="Index of parent component (for nested subqueries)"
39
+ )
40
+ depth: int = Field(
41
+ default=0, description="Nesting depth (0 = top-level, 1+ = nested)"
42
+ )
43
+ is_executable: bool = Field(
44
+ default=True, description="Whether this SQL can be executed standalone"
45
+ )
46
+ dependencies: List[str] = Field(
47
+ default_factory=list,
48
+ description="Names of CTEs this component depends on",
49
+ )
50
+ location: str = Field(
51
+ ..., description="Human-readable location context (e.g., 'WITH clause')"
52
+ )
53
+
54
+
55
+ class QueryMetadata(BaseModel):
56
+ """Metadata about a dissected query."""
57
+
58
+ query_index: int = Field(..., description="0-based query index in multi-query file")
59
+ query_preview: str = Field(..., description="First 100 chars of original query")
60
+ statement_type: str = Field(
61
+ ..., description="Type of SQL statement (SELECT, INSERT, CREATE, etc.)"
62
+ )
63
+ total_components: int = Field(
64
+ ..., description="Total number of components extracted"
65
+ )
66
+
67
+
68
+ class QueryDissectionResult(BaseModel):
69
+ """Complete dissection result for a single query."""
70
+
71
+ metadata: QueryMetadata
72
+ components: List[SQLComponent] = Field(
73
+ default_factory=list,
74
+ description="All extracted components in order",
75
+ )
76
+ original_sql: str = Field(
77
+ ..., description="Original SQL query for reference/validation"
78
+ )
79
+
80
+ def get_component_by_name(self, name: str) -> Optional[SQLComponent]:
81
+ """Find a component by name (case-insensitive).
82
+
83
+ Args:
84
+ name: The component name to search for.
85
+
86
+ Returns:
87
+ The matching SQLComponent or None if not found.
88
+ """
89
+ name_lower = name.lower()
90
+ for component in self.components:
91
+ if component.name and component.name.lower() == name_lower:
92
+ return component
93
+ return None
94
+
95
+ def get_components_by_type(self, comp_type: ComponentType) -> List[SQLComponent]:
96
+ """Get all components of a specific type.
97
+
98
+ Args:
99
+ comp_type: The ComponentType to filter by.
100
+
101
+ Returns:
102
+ List of matching SQLComponent objects.
103
+ """
104
+ return [c for c in self.components if c.component_type == comp_type]
105
+
106
+ def get_executable_components(self) -> List[SQLComponent]:
107
+ """Get all executable components.
108
+
109
+ Returns:
110
+ List of SQLComponent objects that can be executed standalone.
111
+ """
112
+ return [c for c in self.components if c.is_executable]
@@ -6,6 +6,7 @@ from typing import Callable, Dict, List, Optional, Set
6
6
 
7
7
  import rustworkx as rx
8
8
  from rich.console import Console
9
+ from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn
9
10
 
10
11
  from sqlglider.global_models import AnalysisLevel, NodeFormat
11
12
  from sqlglider.graph.models import (
@@ -166,11 +167,8 @@ class GraphBuilder:
166
167
  else:
167
168
  pattern = glob_pattern
168
169
 
169
- for sql_file in sorted(dir_path.glob(pattern)):
170
- if sql_file.is_file():
171
- self.add_file(sql_file, dialect)
172
-
173
- return self
170
+ sql_files = [f for f in sorted(dir_path.glob(pattern)) if f.is_file()]
171
+ return self.add_files(sql_files, dialect)
174
172
 
175
173
  def add_manifest(
176
174
  self,
@@ -194,6 +192,8 @@ class GraphBuilder:
194
192
  manifest = Manifest.from_csv(manifest_path)
195
193
  base_dir = manifest_path.parent
196
194
 
195
+ # Collect files with their dialects
196
+ files_with_dialects: List[tuple[Path, str]] = []
197
197
  for entry in manifest.entries:
198
198
  # Resolve file path relative to manifest location
199
199
  file_path = Path(entry.file_path)
@@ -202,7 +202,25 @@ class GraphBuilder:
202
202
 
203
203
  # Use entry dialect, then CLI dialect, then builder default
204
204
  entry_dialect = entry.dialect or dialect or self.dialect
205
- self.add_file(file_path, entry_dialect)
205
+ files_with_dialects.append((file_path, entry_dialect))
206
+
207
+ # Process with progress
208
+ if files_with_dialects:
209
+ total = len(files_with_dialects)
210
+ with Progress(
211
+ TextColumn("[progress.description]{task.description}"),
212
+ BarColumn(),
213
+ TaskProgressColumn(),
214
+ console=console,
215
+ transient=False,
216
+ ) as progress:
217
+ task = progress.add_task("Parsing", total=total)
218
+ for i, (file_path, file_dialect) in enumerate(
219
+ files_with_dialects, start=1
220
+ ):
221
+ console.print(f"Parsing file {i}/{total}: {file_path.name}")
222
+ self.add_file(file_path, file_dialect)
223
+ progress.advance(task)
206
224
 
207
225
  return self
208
226
 
@@ -210,6 +228,7 @@ class GraphBuilder:
210
228
  self,
211
229
  file_paths: List[Path],
212
230
  dialect: Optional[str] = None,
231
+ show_progress: bool = True,
213
232
  ) -> "GraphBuilder":
214
233
  """
215
234
  Add lineage from multiple SQL files.
@@ -217,12 +236,31 @@ class GraphBuilder:
217
236
  Args:
218
237
  file_paths: List of paths to SQL files
219
238
  dialect: SQL dialect (uses builder default if not specified)
239
+ show_progress: Whether to print progress messages
220
240
 
221
241
  Returns:
222
242
  self for method chaining
223
243
  """
224
- for file_path in file_paths:
225
- self.add_file(file_path, dialect)
244
+ if not file_paths:
245
+ return self
246
+
247
+ if show_progress:
248
+ total = len(file_paths)
249
+ with Progress(
250
+ TextColumn("[progress.description]{task.description}"),
251
+ BarColumn(),
252
+ TaskProgressColumn(),
253
+ console=console,
254
+ transient=False,
255
+ ) as progress:
256
+ task = progress.add_task("Parsing", total=total)
257
+ for i, file_path in enumerate(file_paths, start=1):
258
+ console.print(f"Parsing file {i}/{total}: {file_path.name}")
259
+ self.add_file(file_path, dialect)
260
+ progress.advance(task)
261
+ else:
262
+ for file_path in file_paths:
263
+ self.add_file(file_path, dialect)
226
264
  return self
227
265
 
228
266
  def _ensure_node(