sql-glider 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglider/cli.py ADDED
@@ -0,0 +1,1137 @@
1
+ """CLI entry point for SQL Glider."""
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Callable, List, Optional
7
+
8
+ import typer
9
+ from rich.console import Console
10
+ from rich.table import Table
11
+ from sqlglot.errors import ParseError
12
+ from typing_extensions import Annotated
13
+
14
+ from sqlglider.global_models import AnalysisLevel, NodeFormat
15
+ from sqlglider.lineage.analyzer import LineageAnalyzer
16
+ from sqlglider.lineage.formatters import (
17
+ CsvFormatter,
18
+ JsonFormatter,
19
+ OutputWriter,
20
+ TableCsvFormatter,
21
+ TableJsonFormatter,
22
+ TableTextFormatter,
23
+ TextFormatter,
24
+ )
25
+ from sqlglider.templating import (
26
+ TemplaterError,
27
+ get_templater,
28
+ list_templaters,
29
+ load_all_variables,
30
+ )
31
+ from sqlglider.utils.config import load_config
32
+
33
+ app = typer.Typer(
34
+ name="sqlglider",
35
+ help="SQL Utility Toolkit for better understanding, use, and governance of your queries.",
36
+ invoke_without_command=False,
37
+ )
38
+ console = Console()
39
+ err_console = Console(stderr=True)
40
+
41
+
42
+ def _apply_templating(
43
+ sql: str,
44
+ templater_name: Optional[str],
45
+ cli_vars: Optional[List[str]],
46
+ vars_file: Optional[Path],
47
+ config,
48
+ source_path: Optional[Path] = None,
49
+ ) -> str:
50
+ """Apply templating to SQL if a templater is specified.
51
+
52
+ Args:
53
+ sql: The SQL string to template.
54
+ templater_name: Name of the templater to use (e.g., "jinja").
55
+ If None, returns sql unchanged.
56
+ cli_vars: List of CLI variable strings in "key=value" format.
57
+ vars_file: Path to a variables file (JSON or YAML).
58
+ config: The loaded ConfigSettings object.
59
+ source_path: Path to source file for resolving includes.
60
+
61
+ Returns:
62
+ The templated SQL string, or the original if no templater specified.
63
+ """
64
+ if not templater_name:
65
+ return sql
66
+
67
+ # Get variables from config
68
+ config_vars_file = None
69
+ config_vars = None
70
+ if config.templating:
71
+ if config.templating.variables_file and not vars_file:
72
+ config_vars_file = Path(config.templating.variables_file)
73
+ if not config_vars_file.exists():
74
+ err_console.print(
75
+ f"[yellow]Warning:[/yellow] Variables file from config "
76
+ f"not found: {config_vars_file}"
77
+ )
78
+ config_vars_file = None
79
+ config_vars = config.templating.variables
80
+
81
+ # Load variables from all sources
82
+ variables = load_all_variables(
83
+ cli_vars=cli_vars,
84
+ vars_file=vars_file or config_vars_file,
85
+ config_vars=config_vars,
86
+ use_env=True,
87
+ )
88
+
89
+ # Get templater instance and render
90
+ templater_instance = get_templater(templater_name)
91
+ return templater_instance.render(sql, variables=variables, source_path=source_path)
92
+
93
+
94
+ @app.callback()
95
+ def main():
96
+ """SQL Glider - SQL Utility Toolkit."""
97
+ pass
98
+
99
+
100
+ @app.command()
101
+ def lineage(
102
+ sql_file: Annotated[
103
+ typer.FileText,
104
+ typer.Argument(
105
+ default_factory=lambda: sys.stdin,
106
+ show_default="stdin",
107
+ help="Path to SQL file to analyze (reads from stdin if not provided)",
108
+ ),
109
+ ],
110
+ level: Optional[str] = typer.Option(
111
+ None,
112
+ "--level",
113
+ "-l",
114
+ help="Analysis level: 'column' or 'table' (default: column, or from config)",
115
+ ),
116
+ dialect: Optional[str] = typer.Option(
117
+ None,
118
+ "--dialect",
119
+ "-d",
120
+ help="SQL dialect (default: spark, or from config)",
121
+ ),
122
+ column: Optional[str] = typer.Option(
123
+ None,
124
+ "--column",
125
+ "-c",
126
+ help="Specific output column for forward lineage (default: all columns)",
127
+ ),
128
+ source_column: Optional[str] = typer.Option(
129
+ None,
130
+ "--source-column",
131
+ "-s",
132
+ help="Source column for reverse lineage (impact analysis)",
133
+ ),
134
+ table_filter: Optional[str] = typer.Option(
135
+ None,
136
+ "--table",
137
+ help="Filter to only queries that reference this table (for multi-query files)",
138
+ ),
139
+ output_format: Optional[str] = typer.Option(
140
+ None,
141
+ "--output-format",
142
+ "-f",
143
+ help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
144
+ ),
145
+ output_file: Optional[Path] = typer.Option(
146
+ None,
147
+ "--output-file",
148
+ "-o",
149
+ help="Write output to file instead of stdout",
150
+ ),
151
+ templater: Optional[str] = typer.Option(
152
+ None,
153
+ "--templater",
154
+ "-t",
155
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
156
+ ),
157
+ var: Optional[List[str]] = typer.Option(
158
+ None,
159
+ "--var",
160
+ "-v",
161
+ help="Template variable in key=value format (repeatable)",
162
+ ),
163
+ vars_file: Optional[Path] = typer.Option(
164
+ None,
165
+ "--vars-file",
166
+ exists=True,
167
+ help="Path to variables file (JSON or YAML)",
168
+ ),
169
+ ) -> None:
170
+ """
171
+ Analyze column or table lineage for a SQL file.
172
+
173
+ Configuration can be set in sqlglider.toml in the current directory.
174
+ CLI arguments override configuration file values.
175
+
176
+ Examples:
177
+
178
+ # Forward lineage: Find sources for output column
179
+ sqlglider lineage query.sql --column order_total
180
+
181
+ # Reverse lineage: Find outputs affected by source column
182
+ sqlglider lineage query.sql --source-column orders.customer_id
183
+
184
+ # Analyze all columns (forward lineage)
185
+ sqlglider lineage query.sql
186
+
187
+ # Analyze table-level lineage
188
+ sqlglider lineage query.sql --level table
189
+
190
+ # Export to JSON
191
+ sqlglider lineage query.sql --output-format json --output-file lineage.json
192
+
193
+ # Use different SQL dialect
194
+ sqlglider lineage query.sql --dialect postgres
195
+
196
+ # Analyze templated SQL with Jinja2
197
+ sqlglider lineage query.sql --templater jinja --var schema=analytics
198
+
199
+ # Use variables file for templating
200
+ sqlglider lineage query.sql --templater jinja --vars-file vars.json
201
+ """
202
+ # Load configuration from sqlglider.toml (if it exists)
203
+ config = load_config()
204
+
205
+ # Apply priority resolution: CLI args > config > defaults
206
+ dialect = dialect or config.dialect or "spark"
207
+ level_str = level or config.level or "column"
208
+ output_format = output_format or config.output_format or "text"
209
+ templater = templater or config.templater # None means no templating
210
+ # Validate and convert level to enum
211
+ try:
212
+ analysis_level = AnalysisLevel(level_str)
213
+ except ValueError:
214
+ err_console.print(
215
+ f"[red]Error:[/red] Invalid level '{level_str}'. Use 'column' or 'table'."
216
+ )
217
+ raise typer.Exit(1)
218
+
219
+ # Validate output format
220
+ if output_format not in ["text", "json", "csv"]:
221
+ err_console.print(
222
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
223
+ "Use 'text', 'json', or 'csv'."
224
+ )
225
+ raise typer.Exit(1)
226
+
227
+ # Validate mutual exclusivity of column and source_column
228
+ if column and source_column:
229
+ err_console.print(
230
+ "[red]Error:[/red] Cannot specify both --column and --source-column. "
231
+ "Use --column for forward lineage or --source-column for reverse lineage."
232
+ )
233
+ raise typer.Exit(1)
234
+
235
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
236
+ is_stdin = sql_file.name == "<stdin>"
237
+
238
+ try:
239
+ # Check if stdin is being used without input
240
+ if is_stdin and sys.stdin.isatty():
241
+ err_console.print(
242
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
243
+ "Provide a SQL file path or pipe SQL via stdin."
244
+ )
245
+ raise typer.Exit(1)
246
+
247
+ # Read SQL from file or stdin
248
+ sql = sql_file.read()
249
+
250
+ # Determine source path for templating (None if stdin)
251
+ source_path = None if is_stdin else Path(sql_file.name)
252
+
253
+ # Apply templating if specified
254
+ sql = _apply_templating(
255
+ sql,
256
+ templater_name=templater,
257
+ cli_vars=var,
258
+ vars_file=vars_file,
259
+ config=config,
260
+ source_path=source_path,
261
+ )
262
+
263
+ # Create analyzer
264
+ analyzer = LineageAnalyzer(sql, dialect=dialect)
265
+
266
+ # Unified lineage analysis (handles both single and multi-query files)
267
+ results = analyzer.analyze_queries(
268
+ level=analysis_level,
269
+ column=column,
270
+ source_column=source_column,
271
+ table_filter=table_filter,
272
+ )
273
+
274
+ # Print warnings for skipped queries
275
+ for skipped in analyzer.skipped_queries:
276
+ err_console.print(
277
+ f"[yellow]Warning:[/yellow] Skipping query {skipped.query_index} "
278
+ f"({skipped.statement_type}): {skipped.reason}"
279
+ )
280
+
281
+ # Format and output based on output format
282
+ if output_format == "text":
283
+ if output_file:
284
+ # For file output, use a string-based console to capture output
285
+ from io import StringIO
286
+
287
+ from rich.console import Console as FileConsole
288
+
289
+ string_buffer = StringIO()
290
+ file_console = FileConsole(file=string_buffer, force_terminal=False)
291
+ TextFormatter.format(results, file_console)
292
+ output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
293
+ console.print(
294
+ f"[green]Success:[/green] Lineage written to {output_file}"
295
+ )
296
+ else:
297
+ # Direct console output with Rich formatting
298
+ TextFormatter.format(results, console)
299
+ elif output_format == "json":
300
+ formatted = JsonFormatter.format(results)
301
+ OutputWriter.write(formatted, output_file)
302
+ if output_file:
303
+ console.print(
304
+ f"[green]Success:[/green] Lineage written to {output_file}"
305
+ )
306
+ else: # csv
307
+ formatted = CsvFormatter.format(results)
308
+ OutputWriter.write(formatted, output_file)
309
+ if output_file:
310
+ console.print(
311
+ f"[green]Success:[/green] Lineage written to {output_file}"
312
+ )
313
+
314
+ except FileNotFoundError as e:
315
+ err_console.print(f"[red]Error:[/red] {e}")
316
+ raise typer.Exit(1)
317
+
318
+ except ParseError as e:
319
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
320
+ raise typer.Exit(1)
321
+
322
+ except TemplaterError as e:
323
+ err_console.print(f"[red]Error:[/red] {e}")
324
+ raise typer.Exit(1)
325
+
326
+ except ValueError as e:
327
+ err_console.print(f"[red]Error:[/red] {e}")
328
+ raise typer.Exit(1)
329
+
330
+ except Exception as e:
331
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
332
+ raise typer.Exit(1)
333
+
334
+
335
+ @app.command()
336
+ def tables(
337
+ sql_file: Annotated[
338
+ typer.FileText,
339
+ typer.Argument(
340
+ default_factory=lambda: sys.stdin,
341
+ show_default="stdin",
342
+ help="Path to SQL file to analyze (reads from stdin if not provided)",
343
+ ),
344
+ ],
345
+ dialect: Optional[str] = typer.Option(
346
+ None,
347
+ "--dialect",
348
+ "-d",
349
+ help="SQL dialect (default: spark, or from config)",
350
+ ),
351
+ table_filter: Optional[str] = typer.Option(
352
+ None,
353
+ "--table",
354
+ help="Filter to only queries that reference this table (for multi-query files)",
355
+ ),
356
+ output_format: Optional[str] = typer.Option(
357
+ None,
358
+ "--output-format",
359
+ "-f",
360
+ help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
361
+ ),
362
+ output_file: Optional[Path] = typer.Option(
363
+ None,
364
+ "--output-file",
365
+ "-o",
366
+ help="Write output to file instead of stdout",
367
+ ),
368
+ templater: Optional[str] = typer.Option(
369
+ None,
370
+ "--templater",
371
+ "-t",
372
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
373
+ ),
374
+ var: Optional[List[str]] = typer.Option(
375
+ None,
376
+ "--var",
377
+ "-v",
378
+ help="Template variable in key=value format (repeatable)",
379
+ ),
380
+ vars_file: Optional[Path] = typer.Option(
381
+ None,
382
+ "--vars-file",
383
+ exists=True,
384
+ help="Path to variables file (JSON or YAML)",
385
+ ),
386
+ ) -> None:
387
+ """
388
+ List all tables involved in a SQL file.
389
+
390
+ Outputs table names with their usage type (INPUT, OUTPUT, or BOTH) and
391
+ object type (TABLE, VIEW, CTE, or UNKNOWN).
392
+
393
+ Configuration can be set in sqlglider.toml in the current directory.
394
+ CLI arguments override configuration file values.
395
+
396
+ Examples:
397
+
398
+ # List all tables in a SQL file
399
+ sqlglider tables query.sql
400
+
401
+ # Export to JSON
402
+ sqlglider tables query.sql --output-format json
403
+
404
+ # Export to CSV file
405
+ sqlglider tables query.sql --output-format csv --output-file tables.csv
406
+
407
+ # Use different SQL dialect
408
+ sqlglider tables query.sql --dialect postgres
409
+
410
+ # Filter to queries referencing a specific table
411
+ sqlglider tables query.sql --table customers
412
+
413
+ # Analyze templated SQL with Jinja2
414
+ sqlglider tables query.sql --templater jinja --var schema=analytics
415
+ """
416
+ # Load configuration from sqlglider.toml (if it exists)
417
+ config = load_config()
418
+
419
+ # Apply priority resolution: CLI args > config > defaults
420
+ dialect = dialect or config.dialect or "spark"
421
+ output_format = output_format or config.output_format or "text"
422
+ templater = templater or config.templater # None means no templating
423
+
424
+ # Validate output format
425
+ if output_format not in ["text", "json", "csv"]:
426
+ err_console.print(
427
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
428
+ "Use 'text', 'json', or 'csv'."
429
+ )
430
+ raise typer.Exit(1)
431
+
432
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
433
+ is_stdin = sql_file.name == "<stdin>"
434
+
435
+ try:
436
+ # Check if stdin is being used without input
437
+ if is_stdin and sys.stdin.isatty():
438
+ err_console.print(
439
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
440
+ "Provide a SQL file path or pipe SQL via stdin."
441
+ )
442
+ raise typer.Exit(1)
443
+
444
+ # Read SQL from file or stdin
445
+ sql = sql_file.read()
446
+
447
+ # Determine source path for templating (None if stdin)
448
+ source_path = None if is_stdin else Path(sql_file.name)
449
+
450
+ # Apply templating if specified
451
+ sql = _apply_templating(
452
+ sql,
453
+ templater_name=templater,
454
+ cli_vars=var,
455
+ vars_file=vars_file,
456
+ config=config,
457
+ source_path=source_path,
458
+ )
459
+
460
+ # Create analyzer
461
+ analyzer = LineageAnalyzer(sql, dialect=dialect)
462
+
463
+ # Analyze tables
464
+ results = analyzer.analyze_tables(table_filter=table_filter)
465
+
466
+ # Format and output based on output format
467
+ if output_format == "text":
468
+ if output_file:
469
+ # For file output, use a string-based console to capture output
470
+ from io import StringIO
471
+
472
+ from rich.console import Console as FileConsole
473
+
474
+ string_buffer = StringIO()
475
+ file_console = FileConsole(file=string_buffer, force_terminal=False)
476
+ TableTextFormatter.format(results, file_console)
477
+ output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
478
+ console.print(
479
+ f"[green]Success:[/green] Tables written to {output_file}"
480
+ )
481
+ else:
482
+ # Direct console output with Rich formatting
483
+ TableTextFormatter.format(results, console)
484
+ elif output_format == "json":
485
+ formatted = TableJsonFormatter.format(results)
486
+ OutputWriter.write(formatted, output_file)
487
+ if output_file:
488
+ console.print(
489
+ f"[green]Success:[/green] Tables written to {output_file}"
490
+ )
491
+ else: # csv
492
+ formatted = TableCsvFormatter.format(results)
493
+ OutputWriter.write(formatted, output_file)
494
+ if output_file:
495
+ console.print(
496
+ f"[green]Success:[/green] Tables written to {output_file}"
497
+ )
498
+
499
+ except FileNotFoundError as e:
500
+ err_console.print(f"[red]Error:[/red] {e}")
501
+ raise typer.Exit(1)
502
+
503
+ except ParseError as e:
504
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
505
+ raise typer.Exit(1)
506
+
507
+ except TemplaterError as e:
508
+ err_console.print(f"[red]Error:[/red] {e}")
509
+ raise typer.Exit(1)
510
+
511
+ except ValueError as e:
512
+ err_console.print(f"[red]Error:[/red] {e}")
513
+ raise typer.Exit(1)
514
+
515
+ except Exception as e:
516
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
517
+ raise typer.Exit(1)
518
+
519
+
520
+ @app.command()
521
+ def template(
522
+ sql_file: Annotated[
523
+ typer.FileText,
524
+ typer.Argument(
525
+ default_factory=lambda: sys.stdin,
526
+ show_default="stdin",
527
+ help="Path to SQL template file to render (reads from stdin if not provided)",
528
+ ),
529
+ ],
530
+ templater: Optional[str] = typer.Option(
531
+ None,
532
+ "--templater",
533
+ "-t",
534
+ help="Templater to use (default: jinja, or from config)",
535
+ ),
536
+ var: Optional[List[str]] = typer.Option(
537
+ None,
538
+ "--var",
539
+ "-v",
540
+ help="Template variable in key=value format (repeatable)",
541
+ ),
542
+ vars_file: Optional[Path] = typer.Option(
543
+ None,
544
+ "--vars-file",
545
+ exists=True,
546
+ help="Path to variables file (JSON or YAML)",
547
+ ),
548
+ output_file: Optional[Path] = typer.Option(
549
+ None,
550
+ "--output-file",
551
+ "-o",
552
+ help="Write output to file instead of stdout",
553
+ ),
554
+ list_available: bool = typer.Option(
555
+ False,
556
+ "--list",
557
+ "-l",
558
+ help="List available templaters and exit",
559
+ ),
560
+ ) -> None:
561
+ """
562
+ Render a SQL template file with variable substitution.
563
+
564
+ Uses the specified templater (default: jinja) to process the SQL file
565
+ with template variables. Variables can be provided via CLI, file, or config.
566
+
567
+ Configuration can be set in sqlglider.toml in the current directory.
568
+ CLI arguments override configuration file values.
569
+
570
+ Examples:
571
+
572
+ # Basic template rendering
573
+ sqlglider template query.sql --var schema=analytics --var table=users
574
+
575
+ # Using a variables file
576
+ sqlglider template query.sql --vars-file vars.json
577
+
578
+ # Output to file
579
+ sqlglider template query.sql --var schema=prod -o rendered.sql
580
+
581
+ # List available templaters
582
+ sqlglider template query.sql --list
583
+
584
+ # Use specific templater
585
+ sqlglider template query.sql --templater jinja --var name=test
586
+ """
587
+ # Handle --list option
588
+ if list_available:
589
+ available = list_templaters()
590
+ if available:
591
+ console.print("[bold]Available templaters:[/bold]")
592
+ for name in available:
593
+ console.print(f" - {name}")
594
+ else:
595
+ console.print("[yellow]No templaters available[/yellow]")
596
+ raise typer.Exit(0)
597
+
598
+ # Load configuration from sqlglider.toml (if it exists)
599
+ config = load_config()
600
+
601
+ # Apply priority resolution: CLI args > config > defaults
602
+ # For template command, default to "jinja" (always apply templating)
603
+ templater = templater or config.templater or "jinja"
604
+
605
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
606
+ is_stdin = sql_file.name == "<stdin>"
607
+
608
+ try:
609
+ # Check if stdin is being used without input
610
+ if is_stdin and sys.stdin.isatty():
611
+ err_console.print(
612
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
613
+ "Provide a SQL file path or pipe SQL via stdin."
614
+ )
615
+ raise typer.Exit(1)
616
+
617
+ # Read SQL from file or stdin
618
+ sql = sql_file.read()
619
+
620
+ # Determine source path for templating (None if stdin)
621
+ source_path = None if is_stdin else Path(sql_file.name)
622
+
623
+ # Apply templating (always for template command)
624
+ rendered = _apply_templating(
625
+ sql,
626
+ templater_name=templater,
627
+ cli_vars=var,
628
+ vars_file=vars_file,
629
+ config=config,
630
+ source_path=source_path,
631
+ )
632
+
633
+ # Write output
634
+ if output_file:
635
+ output_file.write_text(rendered, encoding="utf-8")
636
+ console.print(
637
+ f"[green]Success:[/green] Rendered SQL written to {output_file}"
638
+ )
639
+ else:
640
+ print(rendered)
641
+
642
+ except FileNotFoundError as e:
643
+ err_console.print(f"[red]Error:[/red] {e}")
644
+ raise typer.Exit(1)
645
+
646
+ except TemplaterError as e:
647
+ err_console.print(f"[red]Error:[/red] {e}")
648
+ raise typer.Exit(1)
649
+
650
+ except ValueError as e:
651
+ err_console.print(f"[red]Error:[/red] {e}")
652
+ raise typer.Exit(1)
653
+
654
+ except Exception as e:
655
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
656
+ raise typer.Exit(1)
657
+
658
+
659
+ # Graph command group
660
+ graph_app = typer.Typer(
661
+ name="graph",
662
+ help="Graph-based lineage analysis commands.",
663
+ )
664
+ app.add_typer(graph_app, name="graph")
665
+
666
+
667
+ @graph_app.command("build")
668
+ def graph_build(
669
+ paths: List[Path] = typer.Argument(
670
+ None,
671
+ help="SQL file(s) or directory path to process",
672
+ ),
673
+ output: Path = typer.Option(
674
+ ...,
675
+ "--output",
676
+ "-o",
677
+ help="Output file path for serialized graph (required)",
678
+ ),
679
+ recursive: bool = typer.Option(
680
+ False,
681
+ "--recursive",
682
+ "-r",
683
+ help="Recursively search directories for SQL files",
684
+ ),
685
+ glob_pattern: str = typer.Option(
686
+ "*.sql",
687
+ "--glob",
688
+ "-g",
689
+ help="Glob pattern for matching SQL files in directories",
690
+ ),
691
+ manifest: Optional[Path] = typer.Option(
692
+ None,
693
+ "--manifest",
694
+ "-m",
695
+ exists=True,
696
+ help="Path to manifest CSV file with file_path and optional dialect columns",
697
+ ),
698
+ node_format: str = typer.Option(
699
+ "qualified",
700
+ "--node-format",
701
+ "-n",
702
+ help="Node identifier format: 'qualified' or 'structured'",
703
+ ),
704
+ dialect: Optional[str] = typer.Option(
705
+ None,
706
+ "--dialect",
707
+ "-d",
708
+ help="SQL dialect (default: spark, falls back if not in manifest)",
709
+ ),
710
+ templater: Optional[str] = typer.Option(
711
+ None,
712
+ "--templater",
713
+ "-t",
714
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
715
+ ),
716
+ var: Optional[List[str]] = typer.Option(
717
+ None,
718
+ "--var",
719
+ "-v",
720
+ help="Template variable in key=value format (repeatable)",
721
+ ),
722
+ vars_file: Optional[Path] = typer.Option(
723
+ None,
724
+ "--vars-file",
725
+ exists=True,
726
+ help="Path to variables file (JSON or YAML)",
727
+ ),
728
+ ) -> None:
729
+ """
730
+ Build a lineage graph from SQL files.
731
+
732
+ Supports multiple input modes:
733
+ - Single file: sqlglider graph build query.sql -o graph.json
734
+ - Multiple files: sqlglider graph build query1.sql query2.sql -o graph.json
735
+ - Directory: sqlglider graph build ./queries/ -r -o graph.json
736
+ - Manifest: sqlglider graph build --manifest manifest.csv -o graph.json
737
+
738
+ Examples:
739
+
740
+ # Build from single file
741
+ sqlglider graph build query.sql -o graph.json
742
+
743
+ # Build from directory (recursive)
744
+ sqlglider graph build ./queries/ -r -o graph.json
745
+
746
+ # Build from manifest with custom dialect
747
+ sqlglider graph build --manifest manifest.csv -o graph.json --dialect postgres
748
+
749
+ # Build with structured node format
750
+ sqlglider graph build query.sql -o graph.json --node-format structured
751
+
752
+ # Build with Jinja2 templating
753
+ sqlglider graph build ./queries/ -o graph.json --templater jinja --var schema=prod
754
+ """
755
+ from sqlglider.graph.builder import GraphBuilder
756
+ from sqlglider.graph.serialization import save_graph
757
+
758
+ # Load config for defaults
759
+ config = load_config()
760
+ dialect = dialect or config.dialect or "spark"
761
+ templater = templater or config.templater # None means no templating
762
+
763
+ # Validate and convert node format to enum
764
+ try:
765
+ node_format_enum = NodeFormat(node_format)
766
+ except ValueError:
767
+ err_console.print(
768
+ f"[red]Error:[/red] Invalid node format '{node_format}'. "
769
+ "Use 'qualified' or 'structured'."
770
+ )
771
+ raise typer.Exit(1)
772
+
773
+ # Validate inputs
774
+ if not paths and not manifest:
775
+ err_console.print(
776
+ "[red]Error:[/red] Must provide either file/directory paths or --manifest option."
777
+ )
778
+ raise typer.Exit(1)
779
+
780
+ # Create SQL preprocessor if templating is enabled
781
+ sql_preprocessor: Optional[Callable[[str, Path], str]] = None
782
+ if templater:
783
+ # Load variables once for all files
784
+ config_vars_file = None
785
+ config_vars = None
786
+ if config.templating:
787
+ if config.templating.variables_file and not vars_file:
788
+ config_vars_file = Path(config.templating.variables_file)
789
+ if not config_vars_file.exists():
790
+ err_console.print(
791
+ f"[yellow]Warning:[/yellow] Variables file from config "
792
+ f"not found: {config_vars_file}"
793
+ )
794
+ config_vars_file = None
795
+ config_vars = config.templating.variables
796
+
797
+ variables = load_all_variables(
798
+ cli_vars=var,
799
+ vars_file=vars_file or config_vars_file,
800
+ config_vars=config_vars,
801
+ use_env=True,
802
+ )
803
+
804
+ templater_instance = get_templater(templater)
805
+
806
+ def _preprocess(sql: str, file_path: Path) -> str:
807
+ return templater_instance.render(
808
+ sql, variables=variables, source_path=file_path
809
+ )
810
+
811
+ sql_preprocessor = _preprocess
812
+
813
+ try:
814
+ builder = GraphBuilder(
815
+ node_format=node_format_enum,
816
+ dialect=dialect,
817
+ sql_preprocessor=sql_preprocessor,
818
+ )
819
+
820
+ # Process manifest if provided
821
+ if manifest:
822
+ builder.add_manifest(manifest, dialect=dialect)
823
+
824
+ # Process paths
825
+ if paths:
826
+ for path in paths:
827
+ if path.is_dir():
828
+ builder.add_directory(
829
+ path,
830
+ recursive=recursive,
831
+ glob_pattern=glob_pattern,
832
+ dialect=dialect,
833
+ )
834
+ elif path.is_file():
835
+ builder.add_file(path, dialect=dialect)
836
+ else:
837
+ err_console.print(f"[red]Error:[/red] Path not found: {path}")
838
+ raise typer.Exit(1)
839
+
840
+ # Build and save graph
841
+ graph = builder.build()
842
+ save_graph(graph, output)
843
+
844
+ console.print(
845
+ f"[green]Success:[/green] Graph saved to {output} "
846
+ f"({graph.metadata.total_nodes} nodes, {graph.metadata.total_edges} edges)"
847
+ )
848
+
849
+ except FileNotFoundError as e:
850
+ err_console.print(f"[red]Error:[/red] {e}")
851
+ raise typer.Exit(1)
852
+
853
+ except ParseError as e:
854
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
855
+ raise typer.Exit(1)
856
+
857
+ except TemplaterError as e:
858
+ err_console.print(f"[red]Error:[/red] {e}")
859
+ raise typer.Exit(1)
860
+
861
+ except ValueError as e:
862
+ err_console.print(f"[red]Error:[/red] {e}")
863
+ raise typer.Exit(1)
864
+
865
+ except Exception as e:
866
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
867
+ raise typer.Exit(1)
868
+
869
+
870
+ @graph_app.command("merge")
871
+ def graph_merge(
872
+ inputs: List[Path] = typer.Argument(
873
+ None,
874
+ help="JSON graph files to merge",
875
+ ),
876
+ output: Path = typer.Option(
877
+ ...,
878
+ "--output",
879
+ "-o",
880
+ help="Output file path for merged graph (required)",
881
+ ),
882
+ glob_pattern: Optional[str] = typer.Option(
883
+ None,
884
+ "--glob",
885
+ "-g",
886
+ help="Glob pattern for matching graph JSON files (e.g., 'graphs/*.json')",
887
+ ),
888
+ ) -> None:
889
+ """
890
+ Merge multiple lineage graphs into one.
891
+
892
+ Nodes are deduplicated by identifier. Edges are deduplicated by source-target pair.
893
+
894
+ Examples:
895
+
896
+ # Merge specific files
897
+ sqlglider graph merge graph1.json graph2.json -o merged.json
898
+
899
+ # Merge with glob pattern
900
+ sqlglider graph merge --glob "graphs/*.json" -o merged.json
901
+
902
+ # Combine both
903
+ sqlglider graph merge extra.json --glob "graphs/*.json" -o merged.json
904
+ """
905
+ from sqlglider.graph.merge import GraphMerger
906
+ from sqlglider.graph.serialization import save_graph
907
+
908
+ # Validate inputs
909
+ if not inputs and not glob_pattern:
910
+ err_console.print(
911
+ "[red]Error:[/red] Must provide either graph files or --glob option."
912
+ )
913
+ raise typer.Exit(1)
914
+
915
+ try:
916
+ merger = GraphMerger()
917
+
918
+ # Process glob pattern if provided
919
+ if glob_pattern:
920
+ glob_files = sorted(Path(".").glob(glob_pattern))
921
+ if not glob_files:
922
+ err_console.print(
923
+ f"[yellow]Warning:[/yellow] No files matched pattern: {glob_pattern}"
924
+ )
925
+ for graph_file in glob_files:
926
+ if graph_file.is_file():
927
+ merger.add_file(graph_file)
928
+
929
+ # Process explicit inputs
930
+ if inputs:
931
+ for graph_file in inputs:
932
+ if not graph_file.exists():
933
+ err_console.print(f"[red]Error:[/red] File not found: {graph_file}")
934
+ raise typer.Exit(1)
935
+ merger.add_file(graph_file)
936
+
937
+ # Merge and save
938
+ merged_graph = merger.merge()
939
+ save_graph(merged_graph, output)
940
+
941
+ console.print(
942
+ f"[green]Success:[/green] Merged graph saved to {output} "
943
+ f"({merged_graph.metadata.total_nodes} nodes, {merged_graph.metadata.total_edges} edges, "
944
+ f"{len(merged_graph.metadata.source_files)} source files)"
945
+ )
946
+
947
+ except FileNotFoundError as e:
948
+ err_console.print(f"[red]Error:[/red] {e}")
949
+ raise typer.Exit(1)
950
+
951
+ except ValueError as e:
952
+ err_console.print(f"[red]Error:[/red] {e}")
953
+ raise typer.Exit(1)
954
+
955
+ except Exception as e:
956
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
957
+ raise typer.Exit(1)
958
+
959
+
960
+ @graph_app.command("query")
961
+ def graph_query(
962
+ graph_file: Path = typer.Argument(
963
+ ...,
964
+ exists=True,
965
+ help="Path to graph JSON file",
966
+ ),
967
+ upstream: Optional[str] = typer.Option(
968
+ None,
969
+ "--upstream",
970
+ "-u",
971
+ help="Find all source columns that contribute to this column",
972
+ ),
973
+ downstream: Optional[str] = typer.Option(
974
+ None,
975
+ "--downstream",
976
+ "-d",
977
+ help="Find all columns affected by this source column",
978
+ ),
979
+ output_format: str = typer.Option(
980
+ "text",
981
+ "--output-format",
982
+ "-f",
983
+ help="Output format: 'text', 'json', or 'csv'",
984
+ ),
985
+ ) -> None:
986
+ """
987
+ Query a lineage graph for upstream or downstream dependencies.
988
+
989
+ Examples:
990
+
991
+ # Find all source columns for a target
992
+ sqlglider graph query graph.json --upstream orders.customer_id
993
+
994
+ # Find all columns affected by a source
995
+ sqlglider graph query graph.json --downstream customers.customer_id
996
+
997
+ # JSON output
998
+ sqlglider graph query graph.json --upstream orders.total -f json
999
+
1000
+ # CSV output
1001
+ sqlglider graph query graph.json --downstream orders.order_id -f csv
1002
+ """
1003
+ from sqlglider.graph.query import GraphQuerier
1004
+
1005
+ # Validate options
1006
+ if not upstream and not downstream:
1007
+ err_console.print(
1008
+ "[red]Error:[/red] Must specify either --upstream or --downstream."
1009
+ )
1010
+ raise typer.Exit(1)
1011
+
1012
+ if upstream and downstream:
1013
+ err_console.print(
1014
+ "[red]Error:[/red] Cannot specify both --upstream and --downstream. "
1015
+ "Choose one direction."
1016
+ )
1017
+ raise typer.Exit(1)
1018
+
1019
+ if output_format not in ["text", "json", "csv"]:
1020
+ err_console.print(
1021
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
1022
+ "Use 'text', 'json', or 'csv'."
1023
+ )
1024
+ raise typer.Exit(1)
1025
+
1026
+ try:
1027
+ querier = GraphQuerier.from_file(graph_file)
1028
+
1029
+ if upstream:
1030
+ result = querier.find_upstream(upstream)
1031
+ else:
1032
+ assert downstream is not None # Validated above
1033
+ result = querier.find_downstream(downstream)
1034
+
1035
+ # Format and output
1036
+ if output_format == "text":
1037
+ _format_query_result_text(result)
1038
+ elif output_format == "json":
1039
+ _format_query_result_json(result)
1040
+ else: # csv
1041
+ _format_query_result_csv(result)
1042
+
1043
+ except FileNotFoundError as e:
1044
+ err_console.print(f"[red]Error:[/red] {e}")
1045
+ raise typer.Exit(1)
1046
+
1047
+ except ValueError as e:
1048
+ err_console.print(f"[red]Error:[/red] {e}")
1049
+ raise typer.Exit(1)
1050
+
1051
+ except Exception as e:
1052
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
1053
+ raise typer.Exit(1)
1054
+
1055
+
1056
+ def _format_query_result_text(result) -> None:
1057
+ """Format query result as text table."""
1058
+ direction_label = (
1059
+ "Sources" if result.direction == "upstream" else "Affected Columns"
1060
+ )
1061
+
1062
+ table = Table(title=f"{direction_label} for '{result.query_column}'")
1063
+ table.add_column("Column", style="cyan")
1064
+ table.add_column("Table", style="green")
1065
+ table.add_column("Hops", style="yellow", justify="right")
1066
+ table.add_column("Root", style="magenta", justify="center")
1067
+ table.add_column("Leaf", style="magenta", justify="center")
1068
+ table.add_column("Paths", style="dim")
1069
+ table.add_column("File", style="dim")
1070
+
1071
+ for node in result.related_columns:
1072
+ # Format paths as newline-separated arrow strings
1073
+ paths_str = (
1074
+ "\n".join(p.to_arrow_string() for p in node.paths) if node.paths else "-"
1075
+ )
1076
+
1077
+ table.add_row(
1078
+ node.column or node.identifier,
1079
+ node.table or "",
1080
+ str(node.hops),
1081
+ "Y" if node.is_root else "N",
1082
+ "Y" if node.is_leaf else "N",
1083
+ paths_str,
1084
+ Path(node.file_path).name if node.file_path else "",
1085
+ )
1086
+
1087
+ if len(result) == 0:
1088
+ console.print(
1089
+ f"[yellow]No {direction_label.lower()} found for '{result.query_column}'[/yellow]"
1090
+ )
1091
+ else:
1092
+ console.print(table)
1093
+ console.print(f"\n[dim]Total: {len(result)} column(s)[/dim]")
1094
+
1095
+
1096
+ def _format_query_result_json(result) -> None:
1097
+ """Format query result as JSON."""
1098
+ columns = []
1099
+ for node in result.related_columns:
1100
+ node_data = node.model_dump()
1101
+ # Serialize paths as arrays of node identifiers for cleaner output
1102
+ node_data["paths"] = [p.nodes for p in node.paths]
1103
+ columns.append(node_data)
1104
+
1105
+ output = {
1106
+ "query_column": result.query_column,
1107
+ "direction": result.direction,
1108
+ "count": len(result),
1109
+ "columns": columns,
1110
+ }
1111
+ print(json.dumps(output, indent=2))
1112
+
1113
+
1114
+ def _format_query_result_csv(result) -> None:
1115
+ """Format query result as CSV."""
1116
+ print(
1117
+ "identifier,table,column,hops,output_column,is_root,is_leaf,paths,file_path,query_index"
1118
+ )
1119
+ for node in result.related_columns:
1120
+ file_path = node.file_path.replace('"', '""') if node.file_path else ""
1121
+ # Format paths as semicolon-separated arrow strings
1122
+ paths_str = (
1123
+ ";".join(p.to_arrow_string() for p in node.paths) if node.paths else ""
1124
+ )
1125
+ paths_str = paths_str.replace('"', '""')
1126
+
1127
+ print(
1128
+ f'"{node.identifier}","{node.table or ""}","{node.column or ""}",'
1129
+ f'{node.hops},"{node.output_column}",'
1130
+ f"{'true' if node.is_root else 'false'},"
1131
+ f"{'true' if node.is_leaf else 'false'},"
1132
+ f'"{paths_str}","{file_path}",{node.query_index}'
1133
+ )
1134
+
1135
+
1136
+ if __name__ == "__main__":
1137
+ app()