sql-glider 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_glider-0.1.2.dist-info/METADATA +721 -0
- sql_glider-0.1.2.dist-info/RECORD +26 -0
- sql_glider-0.1.2.dist-info/WHEEL +4 -0
- sql_glider-0.1.2.dist-info/entry_points.txt +6 -0
- sql_glider-0.1.2.dist-info/licenses/LICENSE +201 -0
- sqlglider/__init__.py +3 -0
- sqlglider/_version.py +34 -0
- sqlglider/cli.py +1137 -0
- sqlglider/global_models.py +17 -0
- sqlglider/graph/__init__.py +42 -0
- sqlglider/graph/builder.py +310 -0
- sqlglider/graph/merge.py +136 -0
- sqlglider/graph/models.py +289 -0
- sqlglider/graph/query.py +287 -0
- sqlglider/graph/serialization.py +107 -0
- sqlglider/lineage/__init__.py +10 -0
- sqlglider/lineage/analyzer.py +1183 -0
- sqlglider/lineage/formatters.py +335 -0
- sqlglider/templating/__init__.py +51 -0
- sqlglider/templating/base.py +103 -0
- sqlglider/templating/jinja.py +163 -0
- sqlglider/templating/registry.py +124 -0
- sqlglider/templating/variables.py +295 -0
- sqlglider/utils/__init__.py +11 -0
- sqlglider/utils/config.py +130 -0
- sqlglider/utils/file_utils.py +38 -0
sqlglider/cli.py
ADDED
|
@@ -0,0 +1,1137 @@
|
|
|
1
|
+
"""CLI entry point for SQL Glider."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Callable, List, Optional
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
from sqlglot.errors import ParseError
|
|
12
|
+
from typing_extensions import Annotated
|
|
13
|
+
|
|
14
|
+
from sqlglider.global_models import AnalysisLevel, NodeFormat
|
|
15
|
+
from sqlglider.lineage.analyzer import LineageAnalyzer
|
|
16
|
+
from sqlglider.lineage.formatters import (
|
|
17
|
+
CsvFormatter,
|
|
18
|
+
JsonFormatter,
|
|
19
|
+
OutputWriter,
|
|
20
|
+
TableCsvFormatter,
|
|
21
|
+
TableJsonFormatter,
|
|
22
|
+
TableTextFormatter,
|
|
23
|
+
TextFormatter,
|
|
24
|
+
)
|
|
25
|
+
from sqlglider.templating import (
|
|
26
|
+
TemplaterError,
|
|
27
|
+
get_templater,
|
|
28
|
+
list_templaters,
|
|
29
|
+
load_all_variables,
|
|
30
|
+
)
|
|
31
|
+
from sqlglider.utils.config import load_config
|
|
32
|
+
|
|
33
|
+
app = typer.Typer(
|
|
34
|
+
name="sqlglider",
|
|
35
|
+
help="SQL Utility Toolkit for better understanding, use, and governance of your queries.",
|
|
36
|
+
invoke_without_command=False,
|
|
37
|
+
)
|
|
38
|
+
console = Console()
|
|
39
|
+
err_console = Console(stderr=True)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _apply_templating(
|
|
43
|
+
sql: str,
|
|
44
|
+
templater_name: Optional[str],
|
|
45
|
+
cli_vars: Optional[List[str]],
|
|
46
|
+
vars_file: Optional[Path],
|
|
47
|
+
config,
|
|
48
|
+
source_path: Optional[Path] = None,
|
|
49
|
+
) -> str:
|
|
50
|
+
"""Apply templating to SQL if a templater is specified.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
sql: The SQL string to template.
|
|
54
|
+
templater_name: Name of the templater to use (e.g., "jinja").
|
|
55
|
+
If None, returns sql unchanged.
|
|
56
|
+
cli_vars: List of CLI variable strings in "key=value" format.
|
|
57
|
+
vars_file: Path to a variables file (JSON or YAML).
|
|
58
|
+
config: The loaded ConfigSettings object.
|
|
59
|
+
source_path: Path to source file for resolving includes.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The templated SQL string, or the original if no templater specified.
|
|
63
|
+
"""
|
|
64
|
+
if not templater_name:
|
|
65
|
+
return sql
|
|
66
|
+
|
|
67
|
+
# Get variables from config
|
|
68
|
+
config_vars_file = None
|
|
69
|
+
config_vars = None
|
|
70
|
+
if config.templating:
|
|
71
|
+
if config.templating.variables_file and not vars_file:
|
|
72
|
+
config_vars_file = Path(config.templating.variables_file)
|
|
73
|
+
if not config_vars_file.exists():
|
|
74
|
+
err_console.print(
|
|
75
|
+
f"[yellow]Warning:[/yellow] Variables file from config "
|
|
76
|
+
f"not found: {config_vars_file}"
|
|
77
|
+
)
|
|
78
|
+
config_vars_file = None
|
|
79
|
+
config_vars = config.templating.variables
|
|
80
|
+
|
|
81
|
+
# Load variables from all sources
|
|
82
|
+
variables = load_all_variables(
|
|
83
|
+
cli_vars=cli_vars,
|
|
84
|
+
vars_file=vars_file or config_vars_file,
|
|
85
|
+
config_vars=config_vars,
|
|
86
|
+
use_env=True,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Get templater instance and render
|
|
90
|
+
templater_instance = get_templater(templater_name)
|
|
91
|
+
return templater_instance.render(sql, variables=variables, source_path=source_path)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@app.callback()
|
|
95
|
+
def main():
|
|
96
|
+
"""SQL Glider - SQL Utility Toolkit."""
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@app.command()
|
|
101
|
+
def lineage(
|
|
102
|
+
sql_file: Annotated[
|
|
103
|
+
typer.FileText,
|
|
104
|
+
typer.Argument(
|
|
105
|
+
default_factory=lambda: sys.stdin,
|
|
106
|
+
show_default="stdin",
|
|
107
|
+
help="Path to SQL file to analyze (reads from stdin if not provided)",
|
|
108
|
+
),
|
|
109
|
+
],
|
|
110
|
+
level: Optional[str] = typer.Option(
|
|
111
|
+
None,
|
|
112
|
+
"--level",
|
|
113
|
+
"-l",
|
|
114
|
+
help="Analysis level: 'column' or 'table' (default: column, or from config)",
|
|
115
|
+
),
|
|
116
|
+
dialect: Optional[str] = typer.Option(
|
|
117
|
+
None,
|
|
118
|
+
"--dialect",
|
|
119
|
+
"-d",
|
|
120
|
+
help="SQL dialect (default: spark, or from config)",
|
|
121
|
+
),
|
|
122
|
+
column: Optional[str] = typer.Option(
|
|
123
|
+
None,
|
|
124
|
+
"--column",
|
|
125
|
+
"-c",
|
|
126
|
+
help="Specific output column for forward lineage (default: all columns)",
|
|
127
|
+
),
|
|
128
|
+
source_column: Optional[str] = typer.Option(
|
|
129
|
+
None,
|
|
130
|
+
"--source-column",
|
|
131
|
+
"-s",
|
|
132
|
+
help="Source column for reverse lineage (impact analysis)",
|
|
133
|
+
),
|
|
134
|
+
table_filter: Optional[str] = typer.Option(
|
|
135
|
+
None,
|
|
136
|
+
"--table",
|
|
137
|
+
help="Filter to only queries that reference this table (for multi-query files)",
|
|
138
|
+
),
|
|
139
|
+
output_format: Optional[str] = typer.Option(
|
|
140
|
+
None,
|
|
141
|
+
"--output-format",
|
|
142
|
+
"-f",
|
|
143
|
+
help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
|
|
144
|
+
),
|
|
145
|
+
output_file: Optional[Path] = typer.Option(
|
|
146
|
+
None,
|
|
147
|
+
"--output-file",
|
|
148
|
+
"-o",
|
|
149
|
+
help="Write output to file instead of stdout",
|
|
150
|
+
),
|
|
151
|
+
templater: Optional[str] = typer.Option(
|
|
152
|
+
None,
|
|
153
|
+
"--templater",
|
|
154
|
+
"-t",
|
|
155
|
+
help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
|
|
156
|
+
),
|
|
157
|
+
var: Optional[List[str]] = typer.Option(
|
|
158
|
+
None,
|
|
159
|
+
"--var",
|
|
160
|
+
"-v",
|
|
161
|
+
help="Template variable in key=value format (repeatable)",
|
|
162
|
+
),
|
|
163
|
+
vars_file: Optional[Path] = typer.Option(
|
|
164
|
+
None,
|
|
165
|
+
"--vars-file",
|
|
166
|
+
exists=True,
|
|
167
|
+
help="Path to variables file (JSON or YAML)",
|
|
168
|
+
),
|
|
169
|
+
) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Analyze column or table lineage for a SQL file.
|
|
172
|
+
|
|
173
|
+
Configuration can be set in sqlglider.toml in the current directory.
|
|
174
|
+
CLI arguments override configuration file values.
|
|
175
|
+
|
|
176
|
+
Examples:
|
|
177
|
+
|
|
178
|
+
# Forward lineage: Find sources for output column
|
|
179
|
+
sqlglider lineage query.sql --column order_total
|
|
180
|
+
|
|
181
|
+
# Reverse lineage: Find outputs affected by source column
|
|
182
|
+
sqlglider lineage query.sql --source-column orders.customer_id
|
|
183
|
+
|
|
184
|
+
# Analyze all columns (forward lineage)
|
|
185
|
+
sqlglider lineage query.sql
|
|
186
|
+
|
|
187
|
+
# Analyze table-level lineage
|
|
188
|
+
sqlglider lineage query.sql --level table
|
|
189
|
+
|
|
190
|
+
# Export to JSON
|
|
191
|
+
sqlglider lineage query.sql --output-format json --output-file lineage.json
|
|
192
|
+
|
|
193
|
+
# Use different SQL dialect
|
|
194
|
+
sqlglider lineage query.sql --dialect postgres
|
|
195
|
+
|
|
196
|
+
# Analyze templated SQL with Jinja2
|
|
197
|
+
sqlglider lineage query.sql --templater jinja --var schema=analytics
|
|
198
|
+
|
|
199
|
+
# Use variables file for templating
|
|
200
|
+
sqlglider lineage query.sql --templater jinja --vars-file vars.json
|
|
201
|
+
"""
|
|
202
|
+
# Load configuration from sqlglider.toml (if it exists)
|
|
203
|
+
config = load_config()
|
|
204
|
+
|
|
205
|
+
# Apply priority resolution: CLI args > config > defaults
|
|
206
|
+
dialect = dialect or config.dialect or "spark"
|
|
207
|
+
level_str = level or config.level or "column"
|
|
208
|
+
output_format = output_format or config.output_format or "text"
|
|
209
|
+
templater = templater or config.templater # None means no templating
|
|
210
|
+
# Validate and convert level to enum
|
|
211
|
+
try:
|
|
212
|
+
analysis_level = AnalysisLevel(level_str)
|
|
213
|
+
except ValueError:
|
|
214
|
+
err_console.print(
|
|
215
|
+
f"[red]Error:[/red] Invalid level '{level_str}'. Use 'column' or 'table'."
|
|
216
|
+
)
|
|
217
|
+
raise typer.Exit(1)
|
|
218
|
+
|
|
219
|
+
# Validate output format
|
|
220
|
+
if output_format not in ["text", "json", "csv"]:
|
|
221
|
+
err_console.print(
|
|
222
|
+
f"[red]Error:[/red] Invalid output format '{output_format}'. "
|
|
223
|
+
"Use 'text', 'json', or 'csv'."
|
|
224
|
+
)
|
|
225
|
+
raise typer.Exit(1)
|
|
226
|
+
|
|
227
|
+
# Validate mutual exclusivity of column and source_column
|
|
228
|
+
if column and source_column:
|
|
229
|
+
err_console.print(
|
|
230
|
+
"[red]Error:[/red] Cannot specify both --column and --source-column. "
|
|
231
|
+
"Use --column for forward lineage or --source-column for reverse lineage."
|
|
232
|
+
)
|
|
233
|
+
raise typer.Exit(1)
|
|
234
|
+
|
|
235
|
+
# Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
|
|
236
|
+
is_stdin = sql_file.name == "<stdin>"
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
# Check if stdin is being used without input
|
|
240
|
+
if is_stdin and sys.stdin.isatty():
|
|
241
|
+
err_console.print(
|
|
242
|
+
"[red]Error:[/red] No SQL file provided and stdin is interactive. "
|
|
243
|
+
"Provide a SQL file path or pipe SQL via stdin."
|
|
244
|
+
)
|
|
245
|
+
raise typer.Exit(1)
|
|
246
|
+
|
|
247
|
+
# Read SQL from file or stdin
|
|
248
|
+
sql = sql_file.read()
|
|
249
|
+
|
|
250
|
+
# Determine source path for templating (None if stdin)
|
|
251
|
+
source_path = None if is_stdin else Path(sql_file.name)
|
|
252
|
+
|
|
253
|
+
# Apply templating if specified
|
|
254
|
+
sql = _apply_templating(
|
|
255
|
+
sql,
|
|
256
|
+
templater_name=templater,
|
|
257
|
+
cli_vars=var,
|
|
258
|
+
vars_file=vars_file,
|
|
259
|
+
config=config,
|
|
260
|
+
source_path=source_path,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Create analyzer
|
|
264
|
+
analyzer = LineageAnalyzer(sql, dialect=dialect)
|
|
265
|
+
|
|
266
|
+
# Unified lineage analysis (handles both single and multi-query files)
|
|
267
|
+
results = analyzer.analyze_queries(
|
|
268
|
+
level=analysis_level,
|
|
269
|
+
column=column,
|
|
270
|
+
source_column=source_column,
|
|
271
|
+
table_filter=table_filter,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Print warnings for skipped queries
|
|
275
|
+
for skipped in analyzer.skipped_queries:
|
|
276
|
+
err_console.print(
|
|
277
|
+
f"[yellow]Warning:[/yellow] Skipping query {skipped.query_index} "
|
|
278
|
+
f"({skipped.statement_type}): {skipped.reason}"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Format and output based on output format
|
|
282
|
+
if output_format == "text":
|
|
283
|
+
if output_file:
|
|
284
|
+
# For file output, use a string-based console to capture output
|
|
285
|
+
from io import StringIO
|
|
286
|
+
|
|
287
|
+
from rich.console import Console as FileConsole
|
|
288
|
+
|
|
289
|
+
string_buffer = StringIO()
|
|
290
|
+
file_console = FileConsole(file=string_buffer, force_terminal=False)
|
|
291
|
+
TextFormatter.format(results, file_console)
|
|
292
|
+
output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
|
|
293
|
+
console.print(
|
|
294
|
+
f"[green]Success:[/green] Lineage written to {output_file}"
|
|
295
|
+
)
|
|
296
|
+
else:
|
|
297
|
+
# Direct console output with Rich formatting
|
|
298
|
+
TextFormatter.format(results, console)
|
|
299
|
+
elif output_format == "json":
|
|
300
|
+
formatted = JsonFormatter.format(results)
|
|
301
|
+
OutputWriter.write(formatted, output_file)
|
|
302
|
+
if output_file:
|
|
303
|
+
console.print(
|
|
304
|
+
f"[green]Success:[/green] Lineage written to {output_file}"
|
|
305
|
+
)
|
|
306
|
+
else: # csv
|
|
307
|
+
formatted = CsvFormatter.format(results)
|
|
308
|
+
OutputWriter.write(formatted, output_file)
|
|
309
|
+
if output_file:
|
|
310
|
+
console.print(
|
|
311
|
+
f"[green]Success:[/green] Lineage written to {output_file}"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
except FileNotFoundError as e:
|
|
315
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
316
|
+
raise typer.Exit(1)
|
|
317
|
+
|
|
318
|
+
except ParseError as e:
|
|
319
|
+
err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
|
|
320
|
+
raise typer.Exit(1)
|
|
321
|
+
|
|
322
|
+
except TemplaterError as e:
|
|
323
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
324
|
+
raise typer.Exit(1)
|
|
325
|
+
|
|
326
|
+
except ValueError as e:
|
|
327
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
328
|
+
raise typer.Exit(1)
|
|
329
|
+
|
|
330
|
+
except Exception as e:
|
|
331
|
+
err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
|
|
332
|
+
raise typer.Exit(1)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@app.command()
|
|
336
|
+
def tables(
|
|
337
|
+
sql_file: Annotated[
|
|
338
|
+
typer.FileText,
|
|
339
|
+
typer.Argument(
|
|
340
|
+
default_factory=lambda: sys.stdin,
|
|
341
|
+
show_default="stdin",
|
|
342
|
+
help="Path to SQL file to analyze (reads from stdin if not provided)",
|
|
343
|
+
),
|
|
344
|
+
],
|
|
345
|
+
dialect: Optional[str] = typer.Option(
|
|
346
|
+
None,
|
|
347
|
+
"--dialect",
|
|
348
|
+
"-d",
|
|
349
|
+
help="SQL dialect (default: spark, or from config)",
|
|
350
|
+
),
|
|
351
|
+
table_filter: Optional[str] = typer.Option(
|
|
352
|
+
None,
|
|
353
|
+
"--table",
|
|
354
|
+
help="Filter to only queries that reference this table (for multi-query files)",
|
|
355
|
+
),
|
|
356
|
+
output_format: Optional[str] = typer.Option(
|
|
357
|
+
None,
|
|
358
|
+
"--output-format",
|
|
359
|
+
"-f",
|
|
360
|
+
help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
|
|
361
|
+
),
|
|
362
|
+
output_file: Optional[Path] = typer.Option(
|
|
363
|
+
None,
|
|
364
|
+
"--output-file",
|
|
365
|
+
"-o",
|
|
366
|
+
help="Write output to file instead of stdout",
|
|
367
|
+
),
|
|
368
|
+
templater: Optional[str] = typer.Option(
|
|
369
|
+
None,
|
|
370
|
+
"--templater",
|
|
371
|
+
"-t",
|
|
372
|
+
help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
|
|
373
|
+
),
|
|
374
|
+
var: Optional[List[str]] = typer.Option(
|
|
375
|
+
None,
|
|
376
|
+
"--var",
|
|
377
|
+
"-v",
|
|
378
|
+
help="Template variable in key=value format (repeatable)",
|
|
379
|
+
),
|
|
380
|
+
vars_file: Optional[Path] = typer.Option(
|
|
381
|
+
None,
|
|
382
|
+
"--vars-file",
|
|
383
|
+
exists=True,
|
|
384
|
+
help="Path to variables file (JSON or YAML)",
|
|
385
|
+
),
|
|
386
|
+
) -> None:
|
|
387
|
+
"""
|
|
388
|
+
List all tables involved in a SQL file.
|
|
389
|
+
|
|
390
|
+
Outputs table names with their usage type (INPUT, OUTPUT, or BOTH) and
|
|
391
|
+
object type (TABLE, VIEW, CTE, or UNKNOWN).
|
|
392
|
+
|
|
393
|
+
Configuration can be set in sqlglider.toml in the current directory.
|
|
394
|
+
CLI arguments override configuration file values.
|
|
395
|
+
|
|
396
|
+
Examples:
|
|
397
|
+
|
|
398
|
+
# List all tables in a SQL file
|
|
399
|
+
sqlglider tables query.sql
|
|
400
|
+
|
|
401
|
+
# Export to JSON
|
|
402
|
+
sqlglider tables query.sql --output-format json
|
|
403
|
+
|
|
404
|
+
# Export to CSV file
|
|
405
|
+
sqlglider tables query.sql --output-format csv --output-file tables.csv
|
|
406
|
+
|
|
407
|
+
# Use different SQL dialect
|
|
408
|
+
sqlglider tables query.sql --dialect postgres
|
|
409
|
+
|
|
410
|
+
# Filter to queries referencing a specific table
|
|
411
|
+
sqlglider tables query.sql --table customers
|
|
412
|
+
|
|
413
|
+
# Analyze templated SQL with Jinja2
|
|
414
|
+
sqlglider tables query.sql --templater jinja --var schema=analytics
|
|
415
|
+
"""
|
|
416
|
+
# Load configuration from sqlglider.toml (if it exists)
|
|
417
|
+
config = load_config()
|
|
418
|
+
|
|
419
|
+
# Apply priority resolution: CLI args > config > defaults
|
|
420
|
+
dialect = dialect or config.dialect or "spark"
|
|
421
|
+
output_format = output_format or config.output_format or "text"
|
|
422
|
+
templater = templater or config.templater # None means no templating
|
|
423
|
+
|
|
424
|
+
# Validate output format
|
|
425
|
+
if output_format not in ["text", "json", "csv"]:
|
|
426
|
+
err_console.print(
|
|
427
|
+
f"[red]Error:[/red] Invalid output format '{output_format}'. "
|
|
428
|
+
"Use 'text', 'json', or 'csv'."
|
|
429
|
+
)
|
|
430
|
+
raise typer.Exit(1)
|
|
431
|
+
|
|
432
|
+
# Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
|
|
433
|
+
is_stdin = sql_file.name == "<stdin>"
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
# Check if stdin is being used without input
|
|
437
|
+
if is_stdin and sys.stdin.isatty():
|
|
438
|
+
err_console.print(
|
|
439
|
+
"[red]Error:[/red] No SQL file provided and stdin is interactive. "
|
|
440
|
+
"Provide a SQL file path or pipe SQL via stdin."
|
|
441
|
+
)
|
|
442
|
+
raise typer.Exit(1)
|
|
443
|
+
|
|
444
|
+
# Read SQL from file or stdin
|
|
445
|
+
sql = sql_file.read()
|
|
446
|
+
|
|
447
|
+
# Determine source path for templating (None if stdin)
|
|
448
|
+
source_path = None if is_stdin else Path(sql_file.name)
|
|
449
|
+
|
|
450
|
+
# Apply templating if specified
|
|
451
|
+
sql = _apply_templating(
|
|
452
|
+
sql,
|
|
453
|
+
templater_name=templater,
|
|
454
|
+
cli_vars=var,
|
|
455
|
+
vars_file=vars_file,
|
|
456
|
+
config=config,
|
|
457
|
+
source_path=source_path,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# Create analyzer
|
|
461
|
+
analyzer = LineageAnalyzer(sql, dialect=dialect)
|
|
462
|
+
|
|
463
|
+
# Analyze tables
|
|
464
|
+
results = analyzer.analyze_tables(table_filter=table_filter)
|
|
465
|
+
|
|
466
|
+
# Format and output based on output format
|
|
467
|
+
if output_format == "text":
|
|
468
|
+
if output_file:
|
|
469
|
+
# For file output, use a string-based console to capture output
|
|
470
|
+
from io import StringIO
|
|
471
|
+
|
|
472
|
+
from rich.console import Console as FileConsole
|
|
473
|
+
|
|
474
|
+
string_buffer = StringIO()
|
|
475
|
+
file_console = FileConsole(file=string_buffer, force_terminal=False)
|
|
476
|
+
TableTextFormatter.format(results, file_console)
|
|
477
|
+
output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
|
|
478
|
+
console.print(
|
|
479
|
+
f"[green]Success:[/green] Tables written to {output_file}"
|
|
480
|
+
)
|
|
481
|
+
else:
|
|
482
|
+
# Direct console output with Rich formatting
|
|
483
|
+
TableTextFormatter.format(results, console)
|
|
484
|
+
elif output_format == "json":
|
|
485
|
+
formatted = TableJsonFormatter.format(results)
|
|
486
|
+
OutputWriter.write(formatted, output_file)
|
|
487
|
+
if output_file:
|
|
488
|
+
console.print(
|
|
489
|
+
f"[green]Success:[/green] Tables written to {output_file}"
|
|
490
|
+
)
|
|
491
|
+
else: # csv
|
|
492
|
+
formatted = TableCsvFormatter.format(results)
|
|
493
|
+
OutputWriter.write(formatted, output_file)
|
|
494
|
+
if output_file:
|
|
495
|
+
console.print(
|
|
496
|
+
f"[green]Success:[/green] Tables written to {output_file}"
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
except FileNotFoundError as e:
|
|
500
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
501
|
+
raise typer.Exit(1)
|
|
502
|
+
|
|
503
|
+
except ParseError as e:
|
|
504
|
+
err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
|
|
505
|
+
raise typer.Exit(1)
|
|
506
|
+
|
|
507
|
+
except TemplaterError as e:
|
|
508
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
509
|
+
raise typer.Exit(1)
|
|
510
|
+
|
|
511
|
+
except ValueError as e:
|
|
512
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
513
|
+
raise typer.Exit(1)
|
|
514
|
+
|
|
515
|
+
except Exception as e:
|
|
516
|
+
err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
|
|
517
|
+
raise typer.Exit(1)
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
@app.command()
|
|
521
|
+
def template(
|
|
522
|
+
sql_file: Annotated[
|
|
523
|
+
typer.FileText,
|
|
524
|
+
typer.Argument(
|
|
525
|
+
default_factory=lambda: sys.stdin,
|
|
526
|
+
show_default="stdin",
|
|
527
|
+
help="Path to SQL template file to render (reads from stdin if not provided)",
|
|
528
|
+
),
|
|
529
|
+
],
|
|
530
|
+
templater: Optional[str] = typer.Option(
|
|
531
|
+
None,
|
|
532
|
+
"--templater",
|
|
533
|
+
"-t",
|
|
534
|
+
help="Templater to use (default: jinja, or from config)",
|
|
535
|
+
),
|
|
536
|
+
var: Optional[List[str]] = typer.Option(
|
|
537
|
+
None,
|
|
538
|
+
"--var",
|
|
539
|
+
"-v",
|
|
540
|
+
help="Template variable in key=value format (repeatable)",
|
|
541
|
+
),
|
|
542
|
+
vars_file: Optional[Path] = typer.Option(
|
|
543
|
+
None,
|
|
544
|
+
"--vars-file",
|
|
545
|
+
exists=True,
|
|
546
|
+
help="Path to variables file (JSON or YAML)",
|
|
547
|
+
),
|
|
548
|
+
output_file: Optional[Path] = typer.Option(
|
|
549
|
+
None,
|
|
550
|
+
"--output-file",
|
|
551
|
+
"-o",
|
|
552
|
+
help="Write output to file instead of stdout",
|
|
553
|
+
),
|
|
554
|
+
list_available: bool = typer.Option(
|
|
555
|
+
False,
|
|
556
|
+
"--list",
|
|
557
|
+
"-l",
|
|
558
|
+
help="List available templaters and exit",
|
|
559
|
+
),
|
|
560
|
+
) -> None:
|
|
561
|
+
"""
|
|
562
|
+
Render a SQL template file with variable substitution.
|
|
563
|
+
|
|
564
|
+
Uses the specified templater (default: jinja) to process the SQL file
|
|
565
|
+
with template variables. Variables can be provided via CLI, file, or config.
|
|
566
|
+
|
|
567
|
+
Configuration can be set in sqlglider.toml in the current directory.
|
|
568
|
+
CLI arguments override configuration file values.
|
|
569
|
+
|
|
570
|
+
Examples:
|
|
571
|
+
|
|
572
|
+
# Basic template rendering
|
|
573
|
+
sqlglider template query.sql --var schema=analytics --var table=users
|
|
574
|
+
|
|
575
|
+
# Using a variables file
|
|
576
|
+
sqlglider template query.sql --vars-file vars.json
|
|
577
|
+
|
|
578
|
+
# Output to file
|
|
579
|
+
sqlglider template query.sql --var schema=prod -o rendered.sql
|
|
580
|
+
|
|
581
|
+
# List available templaters
|
|
582
|
+
sqlglider template query.sql --list
|
|
583
|
+
|
|
584
|
+
# Use specific templater
|
|
585
|
+
sqlglider template query.sql --templater jinja --var name=test
|
|
586
|
+
"""
|
|
587
|
+
# Handle --list option
|
|
588
|
+
if list_available:
|
|
589
|
+
available = list_templaters()
|
|
590
|
+
if available:
|
|
591
|
+
console.print("[bold]Available templaters:[/bold]")
|
|
592
|
+
for name in available:
|
|
593
|
+
console.print(f" - {name}")
|
|
594
|
+
else:
|
|
595
|
+
console.print("[yellow]No templaters available[/yellow]")
|
|
596
|
+
raise typer.Exit(0)
|
|
597
|
+
|
|
598
|
+
# Load configuration from sqlglider.toml (if it exists)
|
|
599
|
+
config = load_config()
|
|
600
|
+
|
|
601
|
+
# Apply priority resolution: CLI args > config > defaults
|
|
602
|
+
# For template command, default to "jinja" (always apply templating)
|
|
603
|
+
templater = templater or config.templater or "jinja"
|
|
604
|
+
|
|
605
|
+
# Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
|
|
606
|
+
is_stdin = sql_file.name == "<stdin>"
|
|
607
|
+
|
|
608
|
+
try:
|
|
609
|
+
# Check if stdin is being used without input
|
|
610
|
+
if is_stdin and sys.stdin.isatty():
|
|
611
|
+
err_console.print(
|
|
612
|
+
"[red]Error:[/red] No SQL file provided and stdin is interactive. "
|
|
613
|
+
"Provide a SQL file path or pipe SQL via stdin."
|
|
614
|
+
)
|
|
615
|
+
raise typer.Exit(1)
|
|
616
|
+
|
|
617
|
+
# Read SQL from file or stdin
|
|
618
|
+
sql = sql_file.read()
|
|
619
|
+
|
|
620
|
+
# Determine source path for templating (None if stdin)
|
|
621
|
+
source_path = None if is_stdin else Path(sql_file.name)
|
|
622
|
+
|
|
623
|
+
# Apply templating (always for template command)
|
|
624
|
+
rendered = _apply_templating(
|
|
625
|
+
sql,
|
|
626
|
+
templater_name=templater,
|
|
627
|
+
cli_vars=var,
|
|
628
|
+
vars_file=vars_file,
|
|
629
|
+
config=config,
|
|
630
|
+
source_path=source_path,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Write output
|
|
634
|
+
if output_file:
|
|
635
|
+
output_file.write_text(rendered, encoding="utf-8")
|
|
636
|
+
console.print(
|
|
637
|
+
f"[green]Success:[/green] Rendered SQL written to {output_file}"
|
|
638
|
+
)
|
|
639
|
+
else:
|
|
640
|
+
print(rendered)
|
|
641
|
+
|
|
642
|
+
except FileNotFoundError as e:
|
|
643
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
644
|
+
raise typer.Exit(1)
|
|
645
|
+
|
|
646
|
+
except TemplaterError as e:
|
|
647
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
648
|
+
raise typer.Exit(1)
|
|
649
|
+
|
|
650
|
+
except ValueError as e:
|
|
651
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
652
|
+
raise typer.Exit(1)
|
|
653
|
+
|
|
654
|
+
except Exception as e:
|
|
655
|
+
err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
|
|
656
|
+
raise typer.Exit(1)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# Graph command group
|
|
660
|
+
graph_app = typer.Typer(
|
|
661
|
+
name="graph",
|
|
662
|
+
help="Graph-based lineage analysis commands.",
|
|
663
|
+
)
|
|
664
|
+
app.add_typer(graph_app, name="graph")
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
@graph_app.command("build")
|
|
668
|
+
def graph_build(
|
|
669
|
+
paths: List[Path] = typer.Argument(
|
|
670
|
+
None,
|
|
671
|
+
help="SQL file(s) or directory path to process",
|
|
672
|
+
),
|
|
673
|
+
output: Path = typer.Option(
|
|
674
|
+
...,
|
|
675
|
+
"--output",
|
|
676
|
+
"-o",
|
|
677
|
+
help="Output file path for serialized graph (required)",
|
|
678
|
+
),
|
|
679
|
+
recursive: bool = typer.Option(
|
|
680
|
+
False,
|
|
681
|
+
"--recursive",
|
|
682
|
+
"-r",
|
|
683
|
+
help="Recursively search directories for SQL files",
|
|
684
|
+
),
|
|
685
|
+
glob_pattern: str = typer.Option(
|
|
686
|
+
"*.sql",
|
|
687
|
+
"--glob",
|
|
688
|
+
"-g",
|
|
689
|
+
help="Glob pattern for matching SQL files in directories",
|
|
690
|
+
),
|
|
691
|
+
manifest: Optional[Path] = typer.Option(
|
|
692
|
+
None,
|
|
693
|
+
"--manifest",
|
|
694
|
+
"-m",
|
|
695
|
+
exists=True,
|
|
696
|
+
help="Path to manifest CSV file with file_path and optional dialect columns",
|
|
697
|
+
),
|
|
698
|
+
node_format: str = typer.Option(
|
|
699
|
+
"qualified",
|
|
700
|
+
"--node-format",
|
|
701
|
+
"-n",
|
|
702
|
+
help="Node identifier format: 'qualified' or 'structured'",
|
|
703
|
+
),
|
|
704
|
+
dialect: Optional[str] = typer.Option(
|
|
705
|
+
None,
|
|
706
|
+
"--dialect",
|
|
707
|
+
"-d",
|
|
708
|
+
help="SQL dialect (default: spark, falls back if not in manifest)",
|
|
709
|
+
),
|
|
710
|
+
templater: Optional[str] = typer.Option(
|
|
711
|
+
None,
|
|
712
|
+
"--templater",
|
|
713
|
+
"-t",
|
|
714
|
+
help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
|
|
715
|
+
),
|
|
716
|
+
var: Optional[List[str]] = typer.Option(
|
|
717
|
+
None,
|
|
718
|
+
"--var",
|
|
719
|
+
"-v",
|
|
720
|
+
help="Template variable in key=value format (repeatable)",
|
|
721
|
+
),
|
|
722
|
+
vars_file: Optional[Path] = typer.Option(
|
|
723
|
+
None,
|
|
724
|
+
"--vars-file",
|
|
725
|
+
exists=True,
|
|
726
|
+
help="Path to variables file (JSON or YAML)",
|
|
727
|
+
),
|
|
728
|
+
) -> None:
|
|
729
|
+
"""
|
|
730
|
+
Build a lineage graph from SQL files.
|
|
731
|
+
|
|
732
|
+
Supports multiple input modes:
|
|
733
|
+
- Single file: sqlglider graph build query.sql -o graph.json
|
|
734
|
+
- Multiple files: sqlglider graph build query1.sql query2.sql -o graph.json
|
|
735
|
+
- Directory: sqlglider graph build ./queries/ -r -o graph.json
|
|
736
|
+
- Manifest: sqlglider graph build --manifest manifest.csv -o graph.json
|
|
737
|
+
|
|
738
|
+
Examples:
|
|
739
|
+
|
|
740
|
+
# Build from single file
|
|
741
|
+
sqlglider graph build query.sql -o graph.json
|
|
742
|
+
|
|
743
|
+
# Build from directory (recursive)
|
|
744
|
+
sqlglider graph build ./queries/ -r -o graph.json
|
|
745
|
+
|
|
746
|
+
# Build from manifest with custom dialect
|
|
747
|
+
sqlglider graph build --manifest manifest.csv -o graph.json --dialect postgres
|
|
748
|
+
|
|
749
|
+
# Build with structured node format
|
|
750
|
+
sqlglider graph build query.sql -o graph.json --node-format structured
|
|
751
|
+
|
|
752
|
+
# Build with Jinja2 templating
|
|
753
|
+
sqlglider graph build ./queries/ -o graph.json --templater jinja --var schema=prod
|
|
754
|
+
"""
|
|
755
|
+
from sqlglider.graph.builder import GraphBuilder
|
|
756
|
+
from sqlglider.graph.serialization import save_graph
|
|
757
|
+
|
|
758
|
+
# Load config for defaults
|
|
759
|
+
config = load_config()
|
|
760
|
+
dialect = dialect or config.dialect or "spark"
|
|
761
|
+
templater = templater or config.templater # None means no templating
|
|
762
|
+
|
|
763
|
+
# Validate and convert node format to enum
|
|
764
|
+
try:
|
|
765
|
+
node_format_enum = NodeFormat(node_format)
|
|
766
|
+
except ValueError:
|
|
767
|
+
err_console.print(
|
|
768
|
+
f"[red]Error:[/red] Invalid node format '{node_format}'. "
|
|
769
|
+
"Use 'qualified' or 'structured'."
|
|
770
|
+
)
|
|
771
|
+
raise typer.Exit(1)
|
|
772
|
+
|
|
773
|
+
# Validate inputs
|
|
774
|
+
if not paths and not manifest:
|
|
775
|
+
err_console.print(
|
|
776
|
+
"[red]Error:[/red] Must provide either file/directory paths or --manifest option."
|
|
777
|
+
)
|
|
778
|
+
raise typer.Exit(1)
|
|
779
|
+
|
|
780
|
+
# Create SQL preprocessor if templating is enabled
|
|
781
|
+
sql_preprocessor: Optional[Callable[[str, Path], str]] = None
|
|
782
|
+
if templater:
|
|
783
|
+
# Load variables once for all files
|
|
784
|
+
config_vars_file = None
|
|
785
|
+
config_vars = None
|
|
786
|
+
if config.templating:
|
|
787
|
+
if config.templating.variables_file and not vars_file:
|
|
788
|
+
config_vars_file = Path(config.templating.variables_file)
|
|
789
|
+
if not config_vars_file.exists():
|
|
790
|
+
err_console.print(
|
|
791
|
+
f"[yellow]Warning:[/yellow] Variables file from config "
|
|
792
|
+
f"not found: {config_vars_file}"
|
|
793
|
+
)
|
|
794
|
+
config_vars_file = None
|
|
795
|
+
config_vars = config.templating.variables
|
|
796
|
+
|
|
797
|
+
variables = load_all_variables(
|
|
798
|
+
cli_vars=var,
|
|
799
|
+
vars_file=vars_file or config_vars_file,
|
|
800
|
+
config_vars=config_vars,
|
|
801
|
+
use_env=True,
|
|
802
|
+
)
|
|
803
|
+
|
|
804
|
+
templater_instance = get_templater(templater)
|
|
805
|
+
|
|
806
|
+
def _preprocess(sql: str, file_path: Path) -> str:
|
|
807
|
+
return templater_instance.render(
|
|
808
|
+
sql, variables=variables, source_path=file_path
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
sql_preprocessor = _preprocess
|
|
812
|
+
|
|
813
|
+
try:
|
|
814
|
+
builder = GraphBuilder(
|
|
815
|
+
node_format=node_format_enum,
|
|
816
|
+
dialect=dialect,
|
|
817
|
+
sql_preprocessor=sql_preprocessor,
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
# Process manifest if provided
|
|
821
|
+
if manifest:
|
|
822
|
+
builder.add_manifest(manifest, dialect=dialect)
|
|
823
|
+
|
|
824
|
+
# Process paths
|
|
825
|
+
if paths:
|
|
826
|
+
for path in paths:
|
|
827
|
+
if path.is_dir():
|
|
828
|
+
builder.add_directory(
|
|
829
|
+
path,
|
|
830
|
+
recursive=recursive,
|
|
831
|
+
glob_pattern=glob_pattern,
|
|
832
|
+
dialect=dialect,
|
|
833
|
+
)
|
|
834
|
+
elif path.is_file():
|
|
835
|
+
builder.add_file(path, dialect=dialect)
|
|
836
|
+
else:
|
|
837
|
+
err_console.print(f"[red]Error:[/red] Path not found: {path}")
|
|
838
|
+
raise typer.Exit(1)
|
|
839
|
+
|
|
840
|
+
# Build and save graph
|
|
841
|
+
graph = builder.build()
|
|
842
|
+
save_graph(graph, output)
|
|
843
|
+
|
|
844
|
+
console.print(
|
|
845
|
+
f"[green]Success:[/green] Graph saved to {output} "
|
|
846
|
+
f"({graph.metadata.total_nodes} nodes, {graph.metadata.total_edges} edges)"
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
except FileNotFoundError as e:
|
|
850
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
851
|
+
raise typer.Exit(1)
|
|
852
|
+
|
|
853
|
+
except ParseError as e:
|
|
854
|
+
err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
|
|
855
|
+
raise typer.Exit(1)
|
|
856
|
+
|
|
857
|
+
except TemplaterError as e:
|
|
858
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
859
|
+
raise typer.Exit(1)
|
|
860
|
+
|
|
861
|
+
except ValueError as e:
|
|
862
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
863
|
+
raise typer.Exit(1)
|
|
864
|
+
|
|
865
|
+
except Exception as e:
|
|
866
|
+
err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
|
|
867
|
+
raise typer.Exit(1)
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
@graph_app.command("merge")
|
|
871
|
+
def graph_merge(
|
|
872
|
+
inputs: List[Path] = typer.Argument(
|
|
873
|
+
None,
|
|
874
|
+
help="JSON graph files to merge",
|
|
875
|
+
),
|
|
876
|
+
output: Path = typer.Option(
|
|
877
|
+
...,
|
|
878
|
+
"--output",
|
|
879
|
+
"-o",
|
|
880
|
+
help="Output file path for merged graph (required)",
|
|
881
|
+
),
|
|
882
|
+
glob_pattern: Optional[str] = typer.Option(
|
|
883
|
+
None,
|
|
884
|
+
"--glob",
|
|
885
|
+
"-g",
|
|
886
|
+
help="Glob pattern for matching graph JSON files (e.g., 'graphs/*.json')",
|
|
887
|
+
),
|
|
888
|
+
) -> None:
|
|
889
|
+
"""
|
|
890
|
+
Merge multiple lineage graphs into one.
|
|
891
|
+
|
|
892
|
+
Nodes are deduplicated by identifier. Edges are deduplicated by source-target pair.
|
|
893
|
+
|
|
894
|
+
Examples:
|
|
895
|
+
|
|
896
|
+
# Merge specific files
|
|
897
|
+
sqlglider graph merge graph1.json graph2.json -o merged.json
|
|
898
|
+
|
|
899
|
+
# Merge with glob pattern
|
|
900
|
+
sqlglider graph merge --glob "graphs/*.json" -o merged.json
|
|
901
|
+
|
|
902
|
+
# Combine both
|
|
903
|
+
sqlglider graph merge extra.json --glob "graphs/*.json" -o merged.json
|
|
904
|
+
"""
|
|
905
|
+
from sqlglider.graph.merge import GraphMerger
|
|
906
|
+
from sqlglider.graph.serialization import save_graph
|
|
907
|
+
|
|
908
|
+
# Validate inputs
|
|
909
|
+
if not inputs and not glob_pattern:
|
|
910
|
+
err_console.print(
|
|
911
|
+
"[red]Error:[/red] Must provide either graph files or --glob option."
|
|
912
|
+
)
|
|
913
|
+
raise typer.Exit(1)
|
|
914
|
+
|
|
915
|
+
try:
|
|
916
|
+
merger = GraphMerger()
|
|
917
|
+
|
|
918
|
+
# Process glob pattern if provided
|
|
919
|
+
if glob_pattern:
|
|
920
|
+
glob_files = sorted(Path(".").glob(glob_pattern))
|
|
921
|
+
if not glob_files:
|
|
922
|
+
err_console.print(
|
|
923
|
+
f"[yellow]Warning:[/yellow] No files matched pattern: {glob_pattern}"
|
|
924
|
+
)
|
|
925
|
+
for graph_file in glob_files:
|
|
926
|
+
if graph_file.is_file():
|
|
927
|
+
merger.add_file(graph_file)
|
|
928
|
+
|
|
929
|
+
# Process explicit inputs
|
|
930
|
+
if inputs:
|
|
931
|
+
for graph_file in inputs:
|
|
932
|
+
if not graph_file.exists():
|
|
933
|
+
err_console.print(f"[red]Error:[/red] File not found: {graph_file}")
|
|
934
|
+
raise typer.Exit(1)
|
|
935
|
+
merger.add_file(graph_file)
|
|
936
|
+
|
|
937
|
+
# Merge and save
|
|
938
|
+
merged_graph = merger.merge()
|
|
939
|
+
save_graph(merged_graph, output)
|
|
940
|
+
|
|
941
|
+
console.print(
|
|
942
|
+
f"[green]Success:[/green] Merged graph saved to {output} "
|
|
943
|
+
f"({merged_graph.metadata.total_nodes} nodes, {merged_graph.metadata.total_edges} edges, "
|
|
944
|
+
f"{len(merged_graph.metadata.source_files)} source files)"
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
except FileNotFoundError as e:
|
|
948
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
949
|
+
raise typer.Exit(1)
|
|
950
|
+
|
|
951
|
+
except ValueError as e:
|
|
952
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
953
|
+
raise typer.Exit(1)
|
|
954
|
+
|
|
955
|
+
except Exception as e:
|
|
956
|
+
err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
|
|
957
|
+
raise typer.Exit(1)
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
@graph_app.command("query")
|
|
961
|
+
def graph_query(
|
|
962
|
+
graph_file: Path = typer.Argument(
|
|
963
|
+
...,
|
|
964
|
+
exists=True,
|
|
965
|
+
help="Path to graph JSON file",
|
|
966
|
+
),
|
|
967
|
+
upstream: Optional[str] = typer.Option(
|
|
968
|
+
None,
|
|
969
|
+
"--upstream",
|
|
970
|
+
"-u",
|
|
971
|
+
help="Find all source columns that contribute to this column",
|
|
972
|
+
),
|
|
973
|
+
downstream: Optional[str] = typer.Option(
|
|
974
|
+
None,
|
|
975
|
+
"--downstream",
|
|
976
|
+
"-d",
|
|
977
|
+
help="Find all columns affected by this source column",
|
|
978
|
+
),
|
|
979
|
+
output_format: str = typer.Option(
|
|
980
|
+
"text",
|
|
981
|
+
"--output-format",
|
|
982
|
+
"-f",
|
|
983
|
+
help="Output format: 'text', 'json', or 'csv'",
|
|
984
|
+
),
|
|
985
|
+
) -> None:
|
|
986
|
+
"""
|
|
987
|
+
Query a lineage graph for upstream or downstream dependencies.
|
|
988
|
+
|
|
989
|
+
Examples:
|
|
990
|
+
|
|
991
|
+
# Find all source columns for a target
|
|
992
|
+
sqlglider graph query graph.json --upstream orders.customer_id
|
|
993
|
+
|
|
994
|
+
# Find all columns affected by a source
|
|
995
|
+
sqlglider graph query graph.json --downstream customers.customer_id
|
|
996
|
+
|
|
997
|
+
# JSON output
|
|
998
|
+
sqlglider graph query graph.json --upstream orders.total -f json
|
|
999
|
+
|
|
1000
|
+
# CSV output
|
|
1001
|
+
sqlglider graph query graph.json --downstream orders.order_id -f csv
|
|
1002
|
+
"""
|
|
1003
|
+
from sqlglider.graph.query import GraphQuerier
|
|
1004
|
+
|
|
1005
|
+
# Validate options
|
|
1006
|
+
if not upstream and not downstream:
|
|
1007
|
+
err_console.print(
|
|
1008
|
+
"[red]Error:[/red] Must specify either --upstream or --downstream."
|
|
1009
|
+
)
|
|
1010
|
+
raise typer.Exit(1)
|
|
1011
|
+
|
|
1012
|
+
if upstream and downstream:
|
|
1013
|
+
err_console.print(
|
|
1014
|
+
"[red]Error:[/red] Cannot specify both --upstream and --downstream. "
|
|
1015
|
+
"Choose one direction."
|
|
1016
|
+
)
|
|
1017
|
+
raise typer.Exit(1)
|
|
1018
|
+
|
|
1019
|
+
if output_format not in ["text", "json", "csv"]:
|
|
1020
|
+
err_console.print(
|
|
1021
|
+
f"[red]Error:[/red] Invalid output format '{output_format}'. "
|
|
1022
|
+
"Use 'text', 'json', or 'csv'."
|
|
1023
|
+
)
|
|
1024
|
+
raise typer.Exit(1)
|
|
1025
|
+
|
|
1026
|
+
try:
|
|
1027
|
+
querier = GraphQuerier.from_file(graph_file)
|
|
1028
|
+
|
|
1029
|
+
if upstream:
|
|
1030
|
+
result = querier.find_upstream(upstream)
|
|
1031
|
+
else:
|
|
1032
|
+
assert downstream is not None # Validated above
|
|
1033
|
+
result = querier.find_downstream(downstream)
|
|
1034
|
+
|
|
1035
|
+
# Format and output
|
|
1036
|
+
if output_format == "text":
|
|
1037
|
+
_format_query_result_text(result)
|
|
1038
|
+
elif output_format == "json":
|
|
1039
|
+
_format_query_result_json(result)
|
|
1040
|
+
else: # csv
|
|
1041
|
+
_format_query_result_csv(result)
|
|
1042
|
+
|
|
1043
|
+
except FileNotFoundError as e:
|
|
1044
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
1045
|
+
raise typer.Exit(1)
|
|
1046
|
+
|
|
1047
|
+
except ValueError as e:
|
|
1048
|
+
err_console.print(f"[red]Error:[/red] {e}")
|
|
1049
|
+
raise typer.Exit(1)
|
|
1050
|
+
|
|
1051
|
+
except Exception as e:
|
|
1052
|
+
err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
|
|
1053
|
+
raise typer.Exit(1)
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def _format_query_result_text(result) -> None:
|
|
1057
|
+
"""Format query result as text table."""
|
|
1058
|
+
direction_label = (
|
|
1059
|
+
"Sources" if result.direction == "upstream" else "Affected Columns"
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
table = Table(title=f"{direction_label} for '{result.query_column}'")
|
|
1063
|
+
table.add_column("Column", style="cyan")
|
|
1064
|
+
table.add_column("Table", style="green")
|
|
1065
|
+
table.add_column("Hops", style="yellow", justify="right")
|
|
1066
|
+
table.add_column("Root", style="magenta", justify="center")
|
|
1067
|
+
table.add_column("Leaf", style="magenta", justify="center")
|
|
1068
|
+
table.add_column("Paths", style="dim")
|
|
1069
|
+
table.add_column("File", style="dim")
|
|
1070
|
+
|
|
1071
|
+
for node in result.related_columns:
|
|
1072
|
+
# Format paths as newline-separated arrow strings
|
|
1073
|
+
paths_str = (
|
|
1074
|
+
"\n".join(p.to_arrow_string() for p in node.paths) if node.paths else "-"
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
table.add_row(
|
|
1078
|
+
node.column or node.identifier,
|
|
1079
|
+
node.table or "",
|
|
1080
|
+
str(node.hops),
|
|
1081
|
+
"Y" if node.is_root else "N",
|
|
1082
|
+
"Y" if node.is_leaf else "N",
|
|
1083
|
+
paths_str,
|
|
1084
|
+
Path(node.file_path).name if node.file_path else "",
|
|
1085
|
+
)
|
|
1086
|
+
|
|
1087
|
+
if len(result) == 0:
|
|
1088
|
+
console.print(
|
|
1089
|
+
f"[yellow]No {direction_label.lower()} found for '{result.query_column}'[/yellow]"
|
|
1090
|
+
)
|
|
1091
|
+
else:
|
|
1092
|
+
console.print(table)
|
|
1093
|
+
console.print(f"\n[dim]Total: {len(result)} column(s)[/dim]")
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
def _format_query_result_json(result) -> None:
|
|
1097
|
+
"""Format query result as JSON."""
|
|
1098
|
+
columns = []
|
|
1099
|
+
for node in result.related_columns:
|
|
1100
|
+
node_data = node.model_dump()
|
|
1101
|
+
# Serialize paths as arrays of node identifiers for cleaner output
|
|
1102
|
+
node_data["paths"] = [p.nodes for p in node.paths]
|
|
1103
|
+
columns.append(node_data)
|
|
1104
|
+
|
|
1105
|
+
output = {
|
|
1106
|
+
"query_column": result.query_column,
|
|
1107
|
+
"direction": result.direction,
|
|
1108
|
+
"count": len(result),
|
|
1109
|
+
"columns": columns,
|
|
1110
|
+
}
|
|
1111
|
+
print(json.dumps(output, indent=2))
|
|
1112
|
+
|
|
1113
|
+
|
|
1114
|
+
def _format_query_result_csv(result) -> None:
|
|
1115
|
+
"""Format query result as CSV."""
|
|
1116
|
+
print(
|
|
1117
|
+
"identifier,table,column,hops,output_column,is_root,is_leaf,paths,file_path,query_index"
|
|
1118
|
+
)
|
|
1119
|
+
for node in result.related_columns:
|
|
1120
|
+
file_path = node.file_path.replace('"', '""') if node.file_path else ""
|
|
1121
|
+
# Format paths as semicolon-separated arrow strings
|
|
1122
|
+
paths_str = (
|
|
1123
|
+
";".join(p.to_arrow_string() for p in node.paths) if node.paths else ""
|
|
1124
|
+
)
|
|
1125
|
+
paths_str = paths_str.replace('"', '""')
|
|
1126
|
+
|
|
1127
|
+
print(
|
|
1128
|
+
f'"{node.identifier}","{node.table or ""}","{node.column or ""}",'
|
|
1129
|
+
f'{node.hops},"{node.output_column}",'
|
|
1130
|
+
f"{'true' if node.is_root else 'false'},"
|
|
1131
|
+
f"{'true' if node.is_leaf else 'false'},"
|
|
1132
|
+
f'"{paths_str}","{file_path}",{node.query_index}'
|
|
1133
|
+
)
|
|
1134
|
+
|
|
1135
|
+
|
|
1136
|
+
if __name__ == "__main__":
|
|
1137
|
+
app()
|