sql-glider 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglider/cli.py ADDED
@@ -0,0 +1,1589 @@
1
+ """CLI entry point for SQL Glider."""
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Callable, List, Optional
7
+
8
+ import typer
9
+ from rich.console import Console
10
+ from rich.table import Table
11
+ from sqlglot.errors import ParseError
12
+ from typing_extensions import Annotated
13
+
14
+ from sqlglider.global_models import AnalysisLevel, NodeFormat
15
+ from sqlglider.lineage.analyzer import LineageAnalyzer
16
+ from sqlglider.lineage.formatters import (
17
+ CsvFormatter,
18
+ JsonFormatter,
19
+ OutputWriter,
20
+ TableCsvFormatter,
21
+ TableJsonFormatter,
22
+ TableTextFormatter,
23
+ TextFormatter,
24
+ )
25
+ from sqlglider.templating import (
26
+ TemplaterError,
27
+ get_templater,
28
+ list_templaters,
29
+ load_all_variables,
30
+ )
31
+ from sqlglider.utils.config import load_config
32
+
33
+ app = typer.Typer(
34
+ name="sqlglider",
35
+ help="SQL Utility Toolkit for better understanding, use, and governance of your queries.",
36
+ invoke_without_command=False,
37
+ )
38
+ console = Console()
39
+ err_console = Console(stderr=True)
40
+
41
+
42
+ def _apply_templating(
43
+ sql: str,
44
+ templater_name: Optional[str],
45
+ cli_vars: Optional[List[str]],
46
+ vars_file: Optional[Path],
47
+ config,
48
+ source_path: Optional[Path] = None,
49
+ ) -> str:
50
+ """Apply templating to SQL if a templater is specified.
51
+
52
+ Args:
53
+ sql: The SQL string to template.
54
+ templater_name: Name of the templater to use (e.g., "jinja").
55
+ If None, returns sql unchanged.
56
+ cli_vars: List of CLI variable strings in "key=value" format.
57
+ vars_file: Path to a variables file (JSON or YAML).
58
+ config: The loaded ConfigSettings object.
59
+ source_path: Path to source file for resolving includes.
60
+
61
+ Returns:
62
+ The templated SQL string, or the original if no templater specified.
63
+ """
64
+ if not templater_name:
65
+ return sql
66
+
67
+ # Get variables from config
68
+ config_vars_file = None
69
+ config_vars = None
70
+ if config.templating:
71
+ if config.templating.variables_file and not vars_file:
72
+ config_vars_file = Path(config.templating.variables_file)
73
+ if not config_vars_file.exists():
74
+ err_console.print(
75
+ f"[yellow]Warning:[/yellow] Variables file from config "
76
+ f"not found: {config_vars_file}"
77
+ )
78
+ config_vars_file = None
79
+ config_vars = config.templating.variables
80
+
81
+ # Load variables from all sources
82
+ variables = load_all_variables(
83
+ cli_vars=cli_vars,
84
+ vars_file=vars_file or config_vars_file,
85
+ config_vars=config_vars,
86
+ use_env=True,
87
+ )
88
+
89
+ # Get templater instance and render
90
+ templater_instance = get_templater(templater_name)
91
+ return templater_instance.render(sql, variables=variables, source_path=source_path)
92
+
93
+
94
+ @app.callback()
95
+ def main():
96
+ """SQL Glider - SQL Utility Toolkit."""
97
+ pass
98
+
99
+
100
+ @app.command()
101
+ def lineage(
102
+ sql_file: Annotated[
103
+ typer.FileText,
104
+ typer.Argument(
105
+ default_factory=lambda: sys.stdin,
106
+ show_default="stdin",
107
+ help="Path to SQL file to analyze (reads from stdin if not provided)",
108
+ ),
109
+ ],
110
+ level: Optional[str] = typer.Option(
111
+ None,
112
+ "--level",
113
+ "-l",
114
+ help="Analysis level: 'column' or 'table' (default: column, or from config)",
115
+ ),
116
+ dialect: Optional[str] = typer.Option(
117
+ None,
118
+ "--dialect",
119
+ "-d",
120
+ help="SQL dialect (default: spark, or from config)",
121
+ ),
122
+ column: Optional[str] = typer.Option(
123
+ None,
124
+ "--column",
125
+ "-c",
126
+ help="Specific output column for forward lineage (default: all columns)",
127
+ ),
128
+ source_column: Optional[str] = typer.Option(
129
+ None,
130
+ "--source-column",
131
+ "-s",
132
+ help="Source column for reverse lineage (impact analysis)",
133
+ ),
134
+ table_filter: Optional[str] = typer.Option(
135
+ None,
136
+ "--table",
137
+ help="Filter to only queries that reference this table (for multi-query files)",
138
+ ),
139
+ output_format: Optional[str] = typer.Option(
140
+ None,
141
+ "--output-format",
142
+ "-f",
143
+ help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
144
+ ),
145
+ output_file: Optional[Path] = typer.Option(
146
+ None,
147
+ "--output-file",
148
+ "-o",
149
+ help="Write output to file instead of stdout",
150
+ ),
151
+ templater: Optional[str] = typer.Option(
152
+ None,
153
+ "--templater",
154
+ "-t",
155
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
156
+ ),
157
+ var: Optional[List[str]] = typer.Option(
158
+ None,
159
+ "--var",
160
+ "-v",
161
+ help="Template variable in key=value format (repeatable)",
162
+ ),
163
+ vars_file: Optional[Path] = typer.Option(
164
+ None,
165
+ "--vars-file",
166
+ exists=True,
167
+ help="Path to variables file (JSON or YAML)",
168
+ ),
169
+ ) -> None:
170
+ """
171
+ Analyze column or table lineage for a SQL file.
172
+
173
+ Configuration can be set in sqlglider.toml in the current directory.
174
+ CLI arguments override configuration file values.
175
+
176
+ Examples:
177
+
178
+ # Forward lineage: Find sources for output column
179
+ sqlglider lineage query.sql --column order_total
180
+
181
+ # Reverse lineage: Find outputs affected by source column
182
+ sqlglider lineage query.sql --source-column orders.customer_id
183
+
184
+ # Analyze all columns (forward lineage)
185
+ sqlglider lineage query.sql
186
+
187
+ # Analyze table-level lineage
188
+ sqlglider lineage query.sql --level table
189
+
190
+ # Export to JSON
191
+ sqlglider lineage query.sql --output-format json --output-file lineage.json
192
+
193
+ # Use different SQL dialect
194
+ sqlglider lineage query.sql --dialect postgres
195
+
196
+ # Analyze templated SQL with Jinja2
197
+ sqlglider lineage query.sql --templater jinja --var schema=analytics
198
+
199
+ # Use variables file for templating
200
+ sqlglider lineage query.sql --templater jinja --vars-file vars.json
201
+ """
202
+ # Load configuration from sqlglider.toml (if it exists)
203
+ config = load_config()
204
+
205
+ # Apply priority resolution: CLI args > config > defaults
206
+ dialect = dialect or config.dialect or "spark"
207
+ level_str = level or config.level or "column"
208
+ output_format = output_format or config.output_format or "text"
209
+ templater = templater or config.templater # None means no templating
210
+ # Validate and convert level to enum
211
+ try:
212
+ analysis_level = AnalysisLevel(level_str)
213
+ except ValueError:
214
+ err_console.print(
215
+ f"[red]Error:[/red] Invalid level '{level_str}'. Use 'column' or 'table'."
216
+ )
217
+ raise typer.Exit(1)
218
+
219
+ # Validate output format
220
+ if output_format not in ["text", "json", "csv"]:
221
+ err_console.print(
222
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
223
+ "Use 'text', 'json', or 'csv'."
224
+ )
225
+ raise typer.Exit(1)
226
+
227
+ # Validate mutual exclusivity of column and source_column
228
+ if column and source_column:
229
+ err_console.print(
230
+ "[red]Error:[/red] Cannot specify both --column and --source-column. "
231
+ "Use --column for forward lineage or --source-column for reverse lineage."
232
+ )
233
+ raise typer.Exit(1)
234
+
235
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
236
+ is_stdin = sql_file.name == "<stdin>"
237
+
238
+ try:
239
+ # Check if stdin is being used without input
240
+ if is_stdin and sys.stdin.isatty():
241
+ err_console.print(
242
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
243
+ "Provide a SQL file path or pipe SQL via stdin."
244
+ )
245
+ raise typer.Exit(1)
246
+
247
+ # Read SQL from file or stdin
248
+ sql = sql_file.read()
249
+
250
+ # Determine source path for templating (None if stdin)
251
+ source_path = None if is_stdin else Path(sql_file.name)
252
+
253
+ # Apply templating if specified
254
+ sql = _apply_templating(
255
+ sql,
256
+ templater_name=templater,
257
+ cli_vars=var,
258
+ vars_file=vars_file,
259
+ config=config,
260
+ source_path=source_path,
261
+ )
262
+
263
+ # Create analyzer
264
+ analyzer = LineageAnalyzer(sql, dialect=dialect)
265
+
266
+ # Unified lineage analysis (handles both single and multi-query files)
267
+ results = analyzer.analyze_queries(
268
+ level=analysis_level,
269
+ column=column,
270
+ source_column=source_column,
271
+ table_filter=table_filter,
272
+ )
273
+
274
+ # Print warnings for skipped queries
275
+ for skipped in analyzer.skipped_queries:
276
+ err_console.print(
277
+ f"[yellow]Warning:[/yellow] Skipping query {skipped.query_index} "
278
+ f"({skipped.statement_type}): {skipped.reason}"
279
+ )
280
+
281
+ # Format and output based on output format
282
+ if output_format == "text":
283
+ if output_file:
284
+ # For file output, use a string-based console to capture output
285
+ from io import StringIO
286
+
287
+ from rich.console import Console as FileConsole
288
+
289
+ string_buffer = StringIO()
290
+ file_console = FileConsole(file=string_buffer, force_terminal=False)
291
+ TextFormatter.format(results, file_console)
292
+ output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
293
+ console.print(
294
+ f"[green]Success:[/green] Lineage written to {output_file}"
295
+ )
296
+ else:
297
+ # Direct console output with Rich formatting
298
+ TextFormatter.format(results, console)
299
+ elif output_format == "json":
300
+ formatted = JsonFormatter.format(results)
301
+ OutputWriter.write(formatted, output_file)
302
+ if output_file:
303
+ console.print(
304
+ f"[green]Success:[/green] Lineage written to {output_file}"
305
+ )
306
+ else: # csv
307
+ formatted = CsvFormatter.format(results)
308
+ OutputWriter.write(formatted, output_file)
309
+ if output_file:
310
+ console.print(
311
+ f"[green]Success:[/green] Lineage written to {output_file}"
312
+ )
313
+
314
+ except FileNotFoundError as e:
315
+ err_console.print(f"[red]Error:[/red] {e}")
316
+ raise typer.Exit(1)
317
+
318
+ except ParseError as e:
319
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
320
+ raise typer.Exit(1)
321
+
322
+ except TemplaterError as e:
323
+ err_console.print(f"[red]Error:[/red] {e}")
324
+ raise typer.Exit(1)
325
+
326
+ except ValueError as e:
327
+ err_console.print(f"[red]Error:[/red] {e}")
328
+ raise typer.Exit(1)
329
+
330
+ except Exception as e:
331
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
332
+ raise typer.Exit(1)
333
+
334
+
335
+ # Tables command group
336
+ tables_app = typer.Typer(
337
+ name="tables",
338
+ help="Table-related analysis commands.",
339
+ )
340
+ app.add_typer(tables_app, name="tables")
341
+
342
+
343
+ @tables_app.command("overview")
344
+ def tables_overview(
345
+ sql_file: Annotated[
346
+ typer.FileText,
347
+ typer.Argument(
348
+ default_factory=lambda: sys.stdin,
349
+ show_default="stdin",
350
+ help="Path to SQL file to analyze (reads from stdin if not provided)",
351
+ ),
352
+ ],
353
+ dialect: Optional[str] = typer.Option(
354
+ None,
355
+ "--dialect",
356
+ "-d",
357
+ help="SQL dialect (default: spark, or from config)",
358
+ ),
359
+ table_filter: Optional[str] = typer.Option(
360
+ None,
361
+ "--table",
362
+ help="Filter to only queries that reference this table (for multi-query files)",
363
+ ),
364
+ output_format: Optional[str] = typer.Option(
365
+ None,
366
+ "--output-format",
367
+ "-f",
368
+ help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
369
+ ),
370
+ output_file: Optional[Path] = typer.Option(
371
+ None,
372
+ "--output-file",
373
+ "-o",
374
+ help="Write output to file instead of stdout",
375
+ ),
376
+ templater: Optional[str] = typer.Option(
377
+ None,
378
+ "--templater",
379
+ "-t",
380
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
381
+ ),
382
+ var: Optional[List[str]] = typer.Option(
383
+ None,
384
+ "--var",
385
+ "-v",
386
+ help="Template variable in key=value format (repeatable)",
387
+ ),
388
+ vars_file: Optional[Path] = typer.Option(
389
+ None,
390
+ "--vars-file",
391
+ exists=True,
392
+ help="Path to variables file (JSON or YAML)",
393
+ ),
394
+ ) -> None:
395
+ """
396
+ List all tables involved in a SQL file.
397
+
398
+ Outputs table names with their usage type (INPUT, OUTPUT, or BOTH) and
399
+ object type (TABLE, VIEW, CTE, or UNKNOWN).
400
+
401
+ Configuration can be set in sqlglider.toml in the current directory.
402
+ CLI arguments override configuration file values.
403
+
404
+ Examples:
405
+
406
+ # List all tables in a SQL file
407
+ sqlglider tables overview query.sql
408
+
409
+ # Export to JSON
410
+ sqlglider tables overview query.sql --output-format json
411
+
412
+ # Export to CSV file
413
+ sqlglider tables overview query.sql --output-format csv --output-file tables.csv
414
+
415
+ # Use different SQL dialect
416
+ sqlglider tables overview query.sql --dialect postgres
417
+
418
+ # Filter to queries referencing a specific table
419
+ sqlglider tables overview query.sql --table customers
420
+
421
+ # Analyze templated SQL with Jinja2
422
+ sqlglider tables overview query.sql --templater jinja --var schema=analytics
423
+ """
424
+ # Load configuration from sqlglider.toml (if it exists)
425
+ config = load_config()
426
+
427
+ # Apply priority resolution: CLI args > config > defaults
428
+ dialect = dialect or config.dialect or "spark"
429
+ output_format = output_format or config.output_format or "text"
430
+ templater = templater or config.templater # None means no templating
431
+
432
+ # Validate output format
433
+ if output_format not in ["text", "json", "csv"]:
434
+ err_console.print(
435
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
436
+ "Use 'text', 'json', or 'csv'."
437
+ )
438
+ raise typer.Exit(1)
439
+
440
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
441
+ is_stdin = sql_file.name == "<stdin>"
442
+
443
+ try:
444
+ # Check if stdin is being used without input
445
+ if is_stdin and sys.stdin.isatty():
446
+ err_console.print(
447
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
448
+ "Provide a SQL file path or pipe SQL via stdin."
449
+ )
450
+ raise typer.Exit(1)
451
+
452
+ # Read SQL from file or stdin
453
+ sql = sql_file.read()
454
+
455
+ # Determine source path for templating (None if stdin)
456
+ source_path = None if is_stdin else Path(sql_file.name)
457
+
458
+ # Apply templating if specified
459
+ sql = _apply_templating(
460
+ sql,
461
+ templater_name=templater,
462
+ cli_vars=var,
463
+ vars_file=vars_file,
464
+ config=config,
465
+ source_path=source_path,
466
+ )
467
+
468
+ # Create analyzer
469
+ analyzer = LineageAnalyzer(sql, dialect=dialect)
470
+
471
+ # Analyze tables
472
+ results = analyzer.analyze_tables(table_filter=table_filter)
473
+
474
+ # Format and output based on output format
475
+ if output_format == "text":
476
+ if output_file:
477
+ # For file output, use a string-based console to capture output
478
+ from io import StringIO
479
+
480
+ from rich.console import Console as FileConsole
481
+
482
+ string_buffer = StringIO()
483
+ file_console = FileConsole(file=string_buffer, force_terminal=False)
484
+ TableTextFormatter.format(results, file_console)
485
+ output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
486
+ console.print(
487
+ f"[green]Success:[/green] Tables written to {output_file}"
488
+ )
489
+ else:
490
+ # Direct console output with Rich formatting
491
+ TableTextFormatter.format(results, console)
492
+ elif output_format == "json":
493
+ formatted = TableJsonFormatter.format(results)
494
+ OutputWriter.write(formatted, output_file)
495
+ if output_file:
496
+ console.print(
497
+ f"[green]Success:[/green] Tables written to {output_file}"
498
+ )
499
+ else: # csv
500
+ formatted = TableCsvFormatter.format(results)
501
+ OutputWriter.write(formatted, output_file)
502
+ if output_file:
503
+ console.print(
504
+ f"[green]Success:[/green] Tables written to {output_file}"
505
+ )
506
+
507
+ except FileNotFoundError as e:
508
+ err_console.print(f"[red]Error:[/red] {e}")
509
+ raise typer.Exit(1)
510
+
511
+ except ParseError as e:
512
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
513
+ raise typer.Exit(1)
514
+
515
+ except TemplaterError as e:
516
+ err_console.print(f"[red]Error:[/red] {e}")
517
+ raise typer.Exit(1)
518
+
519
+ except ValueError as e:
520
+ err_console.print(f"[red]Error:[/red] {e}")
521
+ raise typer.Exit(1)
522
+
523
+ except Exception as e:
524
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
525
+ raise typer.Exit(1)
526
+
527
+
528
+ @tables_app.command("pull")
529
+ def tables_pull(
530
+ sql_file: Annotated[
531
+ typer.FileText,
532
+ typer.Argument(
533
+ default_factory=lambda: sys.stdin,
534
+ show_default="stdin",
535
+ help="Path to SQL file to analyze (reads from stdin if not provided)",
536
+ ),
537
+ ],
538
+ catalog_type: Optional[str] = typer.Option(
539
+ None,
540
+ "--catalog-type",
541
+ "-c",
542
+ help="Catalog provider (e.g., 'databricks'). Required if not in config.",
543
+ ),
544
+ ddl_folder: Optional[Path] = typer.Option(
545
+ None,
546
+ "--ddl-folder",
547
+ "-o",
548
+ help="Output folder for DDL files. If not provided, outputs to stdout.",
549
+ ),
550
+ dialect: Optional[str] = typer.Option(
551
+ None,
552
+ "--dialect",
553
+ "-d",
554
+ help="SQL dialect (default: spark, or from config)",
555
+ ),
556
+ templater: Optional[str] = typer.Option(
557
+ None,
558
+ "--templater",
559
+ "-t",
560
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
561
+ ),
562
+ var: Optional[List[str]] = typer.Option(
563
+ None,
564
+ "--var",
565
+ "-v",
566
+ help="Template variable in key=value format (repeatable)",
567
+ ),
568
+ vars_file: Optional[Path] = typer.Option(
569
+ None,
570
+ "--vars-file",
571
+ exists=True,
572
+ help="Path to variables file (JSON or YAML)",
573
+ ),
574
+ list_available: bool = typer.Option(
575
+ False,
576
+ "--list",
577
+ "-l",
578
+ help="List available catalog providers and exit",
579
+ ),
580
+ ) -> None:
581
+ """
582
+ Pull DDL definitions from a remote catalog for tables used in SQL.
583
+
584
+ Analyzes the SQL file to find referenced tables, then fetches their DDL
585
+ from the specified catalog provider (e.g., Databricks Unity Catalog).
586
+
587
+ CTEs are automatically excluded since they don't exist in remote catalogs.
588
+
589
+ Configuration can be set in sqlglider.toml in the current directory.
590
+ CLI arguments override configuration file values.
591
+
592
+ Examples:
593
+
594
+ # Pull DDL for tables in a SQL file (output to stdout)
595
+ sqlglider tables pull query.sql --catalog-type databricks
596
+
597
+ # Pull DDL to a folder (one file per table)
598
+ sqlglider tables pull query.sql -c databricks -o ./ddl/
599
+
600
+ # Use config file for catalog settings
601
+ sqlglider tables pull query.sql
602
+
603
+ # With templating
604
+ sqlglider tables pull query.sql -c databricks --templater jinja --var schema=prod
605
+
606
+ # List available catalog providers
607
+ sqlglider tables pull --list
608
+ """
609
+ from sqlglider.catalog import CatalogError, get_catalog, list_catalogs
610
+ from sqlglider.lineage.analyzer import ObjectType
611
+
612
+ # Handle --list option
613
+ if list_available:
614
+ available = list_catalogs()
615
+ if available:
616
+ console.print("[bold]Available catalog providers:[/bold]")
617
+ for name in available:
618
+ console.print(f" - {name}")
619
+ else:
620
+ console.print(
621
+ "[yellow]No catalog providers available.[/yellow]\n"
622
+ "Install a provider with: pip install sql-glider[databricks]"
623
+ )
624
+ raise typer.Exit(0)
625
+
626
+ # Load configuration from sqlglider.toml (if it exists)
627
+ config = load_config()
628
+
629
+ # Apply priority resolution: CLI args > config > defaults
630
+ dialect = dialect or config.dialect or "spark"
631
+ templater = templater or config.templater # None means no templating
632
+ catalog_type = catalog_type or config.catalog_type
633
+ ddl_folder_str = config.ddl_folder if ddl_folder is None else None
634
+ if ddl_folder is None and ddl_folder_str:
635
+ ddl_folder = Path(ddl_folder_str)
636
+
637
+ # Validate catalog_type is provided
638
+ if not catalog_type:
639
+ err_console.print(
640
+ "[red]Error:[/red] No catalog provider specified. "
641
+ "Use --catalog-type or set catalog_type in sqlglider.toml."
642
+ )
643
+ raise typer.Exit(1)
644
+
645
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
646
+ is_stdin = sql_file.name == "<stdin>"
647
+
648
+ try:
649
+ # Check if stdin is being used without input
650
+ if is_stdin and sys.stdin.isatty():
651
+ err_console.print(
652
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
653
+ "Provide a SQL file path or pipe SQL via stdin."
654
+ )
655
+ raise typer.Exit(1)
656
+
657
+ # Read SQL from file or stdin
658
+ sql = sql_file.read()
659
+
660
+ # Determine source path for templating (None if stdin)
661
+ source_path = None if is_stdin else Path(sql_file.name)
662
+
663
+ # Apply templating if specified
664
+ sql = _apply_templating(
665
+ sql,
666
+ templater_name=templater,
667
+ cli_vars=var,
668
+ vars_file=vars_file,
669
+ config=config,
670
+ source_path=source_path,
671
+ )
672
+
673
+ # Create analyzer and extract tables
674
+ analyzer = LineageAnalyzer(sql, dialect=dialect)
675
+ table_results = analyzer.analyze_tables()
676
+
677
+ # Collect unique table names, excluding CTEs
678
+ table_names: set[str] = set()
679
+ for result in table_results:
680
+ for table_info in result.tables:
681
+ if table_info.object_type != ObjectType.CTE:
682
+ table_names.add(table_info.name)
683
+
684
+ if not table_names:
685
+ console.print("[yellow]No tables found in SQL (CTEs excluded).[/yellow]")
686
+ raise typer.Exit(0)
687
+
688
+ # Get catalog instance and configure it
689
+ catalog = get_catalog(catalog_type)
690
+
691
+ # Build catalog config from config file
692
+ catalog_config: dict[str, str] = {}
693
+ if (
694
+ config.catalog
695
+ and catalog_type == "databricks"
696
+ and config.catalog.databricks
697
+ ):
698
+ db_config = config.catalog.databricks
699
+ if db_config.warehouse_id:
700
+ catalog_config["warehouse_id"] = db_config.warehouse_id
701
+ if db_config.profile:
702
+ catalog_config["profile"] = db_config.profile
703
+ if db_config.host:
704
+ catalog_config["host"] = db_config.host
705
+ if db_config.token:
706
+ catalog_config["token"] = db_config.token
707
+
708
+ catalog.configure(catalog_config)
709
+
710
+ # Fetch DDL for all tables
711
+ console.print(
712
+ f"[dim]Fetching DDL for {len(table_names)} table(s) from {catalog_type}...[/dim]"
713
+ )
714
+ ddl_results = catalog.get_ddl_batch(list(table_names))
715
+
716
+ # Count successes and failures
717
+ successes = 0
718
+ failures = 0
719
+
720
+ # Output DDL
721
+ if ddl_folder:
722
+ # Create output folder if it doesn't exist
723
+ ddl_folder.mkdir(parents=True, exist_ok=True)
724
+
725
+ for table_name, ddl in ddl_results.items():
726
+ if ddl.startswith("ERROR:"):
727
+ err_console.print(f"[yellow]Warning:[/yellow] {table_name}: {ddl}")
728
+ failures += 1
729
+ else:
730
+ # Write DDL to file named by table identifier
731
+ file_name = f"{table_name}.sql"
732
+ file_path = ddl_folder / file_name
733
+ file_path.write_text(ddl, encoding="utf-8")
734
+ successes += 1
735
+
736
+ console.print(
737
+ f"[green]Success:[/green] Wrote {successes} DDL file(s) to {ddl_folder}"
738
+ )
739
+ if failures > 0:
740
+ console.print(
741
+ f"[yellow]Warning:[/yellow] {failures} table(s) failed to fetch"
742
+ )
743
+ else:
744
+ # Output to stdout
745
+ for table_name, ddl in ddl_results.items():
746
+ if ddl.startswith("ERROR:"):
747
+ err_console.print(f"[yellow]Warning:[/yellow] {table_name}: {ddl}")
748
+ failures += 1
749
+ else:
750
+ print(f"-- Table: {table_name}")
751
+ print(ddl)
752
+ print()
753
+ successes += 1
754
+
755
+ if failures > 0:
756
+ err_console.print(
757
+ f"\n[yellow]Warning:[/yellow] {failures} table(s) failed to fetch"
758
+ )
759
+
760
+ except FileNotFoundError as e:
761
+ err_console.print(f"[red]Error:[/red] {e}")
762
+ raise typer.Exit(1)
763
+
764
+ except ParseError as e:
765
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
766
+ raise typer.Exit(1)
767
+
768
+ except TemplaterError as e:
769
+ err_console.print(f"[red]Error:[/red] {e}")
770
+ raise typer.Exit(1)
771
+
772
+ except CatalogError as e:
773
+ err_console.print(f"[red]Error:[/red] {e}")
774
+ raise typer.Exit(1)
775
+
776
+ except ValueError as e:
777
+ err_console.print(f"[red]Error:[/red] {e}")
778
+ raise typer.Exit(1)
779
+
780
+ except Exception as e:
781
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
782
+ raise typer.Exit(1)
783
+
784
+
785
+ @app.command()
786
+ def template(
787
+ sql_file: Annotated[
788
+ typer.FileText,
789
+ typer.Argument(
790
+ default_factory=lambda: sys.stdin,
791
+ show_default="stdin",
792
+ help="Path to SQL template file to render (reads from stdin if not provided)",
793
+ ),
794
+ ],
795
+ templater: Optional[str] = typer.Option(
796
+ None,
797
+ "--templater",
798
+ "-t",
799
+ help="Templater to use (default: jinja, or from config)",
800
+ ),
801
+ var: Optional[List[str]] = typer.Option(
802
+ None,
803
+ "--var",
804
+ "-v",
805
+ help="Template variable in key=value format (repeatable)",
806
+ ),
807
+ vars_file: Optional[Path] = typer.Option(
808
+ None,
809
+ "--vars-file",
810
+ exists=True,
811
+ help="Path to variables file (JSON or YAML)",
812
+ ),
813
+ output_file: Optional[Path] = typer.Option(
814
+ None,
815
+ "--output-file",
816
+ "-o",
817
+ help="Write output to file instead of stdout",
818
+ ),
819
+ list_available: bool = typer.Option(
820
+ False,
821
+ "--list",
822
+ "-l",
823
+ help="List available templaters and exit",
824
+ ),
825
+ ) -> None:
826
+ """
827
+ Render a SQL template file with variable substitution.
828
+
829
+ Uses the specified templater (default: jinja) to process the SQL file
830
+ with template variables. Variables can be provided via CLI, file, or config.
831
+
832
+ Configuration can be set in sqlglider.toml in the current directory.
833
+ CLI arguments override configuration file values.
834
+
835
+ Examples:
836
+
837
+ # Basic template rendering
838
+ sqlglider template query.sql --var schema=analytics --var table=users
839
+
840
+ # Using a variables file
841
+ sqlglider template query.sql --vars-file vars.json
842
+
843
+ # Output to file
844
+ sqlglider template query.sql --var schema=prod -o rendered.sql
845
+
846
+ # List available templaters
847
+ sqlglider template query.sql --list
848
+
849
+ # Use specific templater
850
+ sqlglider template query.sql --templater jinja --var name=test
851
+ """
852
+ # Handle --list option
853
+ if list_available:
854
+ available = list_templaters()
855
+ if available:
856
+ console.print("[bold]Available templaters:[/bold]")
857
+ for name in available:
858
+ console.print(f" - {name}")
859
+ else:
860
+ console.print("[yellow]No templaters available[/yellow]")
861
+ raise typer.Exit(0)
862
+
863
+ # Load configuration from sqlglider.toml (if it exists)
864
+ config = load_config()
865
+
866
+ # Apply priority resolution: CLI args > config > defaults
867
+ # For template command, default to "jinja" (always apply templating)
868
+ templater = templater or config.templater or "jinja"
869
+
870
+ # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
871
+ is_stdin = sql_file.name == "<stdin>"
872
+
873
+ try:
874
+ # Check if stdin is being used without input
875
+ if is_stdin and sys.stdin.isatty():
876
+ err_console.print(
877
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
878
+ "Provide a SQL file path or pipe SQL via stdin."
879
+ )
880
+ raise typer.Exit(1)
881
+
882
+ # Read SQL from file or stdin
883
+ sql = sql_file.read()
884
+
885
+ # Determine source path for templating (None if stdin)
886
+ source_path = None if is_stdin else Path(sql_file.name)
887
+
888
+ # Apply templating (always for template command)
889
+ rendered = _apply_templating(
890
+ sql,
891
+ templater_name=templater,
892
+ cli_vars=var,
893
+ vars_file=vars_file,
894
+ config=config,
895
+ source_path=source_path,
896
+ )
897
+
898
+ # Write output
899
+ if output_file:
900
+ output_file.write_text(rendered, encoding="utf-8")
901
+ console.print(
902
+ f"[green]Success:[/green] Rendered SQL written to {output_file}"
903
+ )
904
+ else:
905
+ print(rendered)
906
+
907
+ except FileNotFoundError as e:
908
+ err_console.print(f"[red]Error:[/red] {e}")
909
+ raise typer.Exit(1)
910
+
911
+ except TemplaterError as e:
912
+ err_console.print(f"[red]Error:[/red] {e}")
913
+ raise typer.Exit(1)
914
+
915
+ except ValueError as e:
916
+ err_console.print(f"[red]Error:[/red] {e}")
917
+ raise typer.Exit(1)
918
+
919
+ except Exception as e:
920
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
921
+ raise typer.Exit(1)
922
+
923
+
924
+ # Graph command group
925
+ graph_app = typer.Typer(
926
+ name="graph",
927
+ help="Graph-based lineage analysis commands.",
928
+ )
929
+ app.add_typer(graph_app, name="graph")
930
+
931
+
932
+ @graph_app.command("build")
933
+ def graph_build(
934
+ paths: List[Path] = typer.Argument(
935
+ None,
936
+ help="SQL file(s) or directory path to process",
937
+ ),
938
+ output: Path = typer.Option(
939
+ ...,
940
+ "--output",
941
+ "-o",
942
+ help="Output file path for serialized graph (required)",
943
+ ),
944
+ recursive: bool = typer.Option(
945
+ False,
946
+ "--recursive",
947
+ "-r",
948
+ help="Recursively search directories for SQL files",
949
+ ),
950
+ glob_pattern: str = typer.Option(
951
+ "*.sql",
952
+ "--glob",
953
+ "-g",
954
+ help="Glob pattern for matching SQL files in directories",
955
+ ),
956
+ manifest: Optional[Path] = typer.Option(
957
+ None,
958
+ "--manifest",
959
+ "-m",
960
+ exists=True,
961
+ help="Path to manifest CSV file with file_path and optional dialect columns",
962
+ ),
963
+ node_format: str = typer.Option(
964
+ "qualified",
965
+ "--node-format",
966
+ "-n",
967
+ help="Node identifier format: 'qualified' or 'structured'",
968
+ ),
969
+ dialect: Optional[str] = typer.Option(
970
+ None,
971
+ "--dialect",
972
+ "-d",
973
+ help="SQL dialect (default: spark, falls back if not in manifest)",
974
+ ),
975
+ templater: Optional[str] = typer.Option(
976
+ None,
977
+ "--templater",
978
+ "-t",
979
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
980
+ ),
981
+ var: Optional[List[str]] = typer.Option(
982
+ None,
983
+ "--var",
984
+ "-v",
985
+ help="Template variable in key=value format (repeatable)",
986
+ ),
987
+ vars_file: Optional[Path] = typer.Option(
988
+ None,
989
+ "--vars-file",
990
+ exists=True,
991
+ help="Path to variables file (JSON or YAML)",
992
+ ),
993
+ ) -> None:
994
+ """
995
+ Build a lineage graph from SQL files.
996
+
997
+ Supports multiple input modes:
998
+ - Single file: sqlglider graph build query.sql -o graph.json
999
+ - Multiple files: sqlglider graph build query1.sql query2.sql -o graph.json
1000
+ - Directory: sqlglider graph build ./queries/ -r -o graph.json
1001
+ - Manifest: sqlglider graph build --manifest manifest.csv -o graph.json
1002
+
1003
+ Examples:
1004
+
1005
+ # Build from single file
1006
+ sqlglider graph build query.sql -o graph.json
1007
+
1008
+ # Build from directory (recursive)
1009
+ sqlglider graph build ./queries/ -r -o graph.json
1010
+
1011
+ # Build from manifest with custom dialect
1012
+ sqlglider graph build --manifest manifest.csv -o graph.json --dialect postgres
1013
+
1014
+ # Build with structured node format
1015
+ sqlglider graph build query.sql -o graph.json --node-format structured
1016
+
1017
+ # Build with Jinja2 templating
1018
+ sqlglider graph build ./queries/ -o graph.json --templater jinja --var schema=prod
1019
+ """
1020
+ from sqlglider.graph.builder import GraphBuilder
1021
+ from sqlglider.graph.serialization import save_graph
1022
+
1023
+ # Load config for defaults
1024
+ config = load_config()
1025
+ dialect = dialect or config.dialect or "spark"
1026
+ templater = templater or config.templater # None means no templating
1027
+
1028
+ # Validate and convert node format to enum
1029
+ try:
1030
+ node_format_enum = NodeFormat(node_format)
1031
+ except ValueError:
1032
+ err_console.print(
1033
+ f"[red]Error:[/red] Invalid node format '{node_format}'. "
1034
+ "Use 'qualified' or 'structured'."
1035
+ )
1036
+ raise typer.Exit(1)
1037
+
1038
+ # Validate inputs
1039
+ if not paths and not manifest:
1040
+ err_console.print(
1041
+ "[red]Error:[/red] Must provide either file/directory paths or --manifest option."
1042
+ )
1043
+ raise typer.Exit(1)
1044
+
1045
+ # Create SQL preprocessor if templating is enabled
1046
+ sql_preprocessor: Optional[Callable[[str, Path], str]] = None
1047
+ if templater:
1048
+ # Load variables once for all files
1049
+ config_vars_file = None
1050
+ config_vars = None
1051
+ if config.templating:
1052
+ if config.templating.variables_file and not vars_file:
1053
+ config_vars_file = Path(config.templating.variables_file)
1054
+ if not config_vars_file.exists():
1055
+ err_console.print(
1056
+ f"[yellow]Warning:[/yellow] Variables file from config "
1057
+ f"not found: {config_vars_file}"
1058
+ )
1059
+ config_vars_file = None
1060
+ config_vars = config.templating.variables
1061
+
1062
+ variables = load_all_variables(
1063
+ cli_vars=var,
1064
+ vars_file=vars_file or config_vars_file,
1065
+ config_vars=config_vars,
1066
+ use_env=True,
1067
+ )
1068
+
1069
+ templater_instance = get_templater(templater)
1070
+
1071
+ def _preprocess(sql: str, file_path: Path) -> str:
1072
+ return templater_instance.render(
1073
+ sql, variables=variables, source_path=file_path
1074
+ )
1075
+
1076
+ sql_preprocessor = _preprocess
1077
+
1078
+ try:
1079
+ builder = GraphBuilder(
1080
+ node_format=node_format_enum,
1081
+ dialect=dialect,
1082
+ sql_preprocessor=sql_preprocessor,
1083
+ )
1084
+
1085
+ # Process manifest if provided
1086
+ if manifest:
1087
+ builder.add_manifest(manifest, dialect=dialect)
1088
+
1089
+ # Process paths - collect all files first for progress tracking
1090
+ if paths:
1091
+ all_files: list[Path] = []
1092
+ for path in paths:
1093
+ if path.is_dir():
1094
+ pattern = f"**/{glob_pattern}" if recursive else glob_pattern
1095
+ all_files.extend(
1096
+ f for f in sorted(path.glob(pattern)) if f.is_file()
1097
+ )
1098
+ elif path.is_file():
1099
+ all_files.append(path)
1100
+ else:
1101
+ err_console.print(f"[red]Error:[/red] Path not found: {path}")
1102
+ raise typer.Exit(1)
1103
+ builder.add_files(all_files, dialect=dialect)
1104
+
1105
+ # Build and save graph
1106
+ graph = builder.build()
1107
+ save_graph(graph, output)
1108
+
1109
+ console.print(
1110
+ f"[green]Success:[/green] Graph saved to {output} "
1111
+ f"({graph.metadata.total_nodes} nodes, {graph.metadata.total_edges} edges)"
1112
+ )
1113
+
1114
+ except FileNotFoundError as e:
1115
+ err_console.print(f"[red]Error:[/red] {e}")
1116
+ raise typer.Exit(1)
1117
+
1118
+ except ParseError as e:
1119
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
1120
+ raise typer.Exit(1)
1121
+
1122
+ except TemplaterError as e:
1123
+ err_console.print(f"[red]Error:[/red] {e}")
1124
+ raise typer.Exit(1)
1125
+
1126
+ except ValueError as e:
1127
+ err_console.print(f"[red]Error:[/red] {e}")
1128
+ raise typer.Exit(1)
1129
+
1130
+ except Exception as e:
1131
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
1132
+ raise typer.Exit(1)
1133
+
1134
+
1135
+ @graph_app.command("merge")
1136
+ def graph_merge(
1137
+ inputs: List[Path] = typer.Argument(
1138
+ None,
1139
+ help="JSON graph files to merge",
1140
+ ),
1141
+ output: Path = typer.Option(
1142
+ ...,
1143
+ "--output",
1144
+ "-o",
1145
+ help="Output file path for merged graph (required)",
1146
+ ),
1147
+ glob_pattern: Optional[str] = typer.Option(
1148
+ None,
1149
+ "--glob",
1150
+ "-g",
1151
+ help="Glob pattern for matching graph JSON files (e.g., 'graphs/*.json')",
1152
+ ),
1153
+ ) -> None:
1154
+ """
1155
+ Merge multiple lineage graphs into one.
1156
+
1157
+ Nodes are deduplicated by identifier. Edges are deduplicated by source-target pair.
1158
+
1159
+ Examples:
1160
+
1161
+ # Merge specific files
1162
+ sqlglider graph merge graph1.json graph2.json -o merged.json
1163
+
1164
+ # Merge with glob pattern
1165
+ sqlglider graph merge --glob "graphs/*.json" -o merged.json
1166
+
1167
+ # Combine both
1168
+ sqlglider graph merge extra.json --glob "graphs/*.json" -o merged.json
1169
+ """
1170
+ from sqlglider.graph.merge import GraphMerger
1171
+ from sqlglider.graph.serialization import save_graph
1172
+
1173
+ # Validate inputs
1174
+ if not inputs and not glob_pattern:
1175
+ err_console.print(
1176
+ "[red]Error:[/red] Must provide either graph files or --glob option."
1177
+ )
1178
+ raise typer.Exit(1)
1179
+
1180
+ try:
1181
+ merger = GraphMerger()
1182
+
1183
+ # Process glob pattern if provided
1184
+ if glob_pattern:
1185
+ glob_files = sorted(Path(".").glob(glob_pattern))
1186
+ if not glob_files:
1187
+ err_console.print(
1188
+ f"[yellow]Warning:[/yellow] No files matched pattern: {glob_pattern}"
1189
+ )
1190
+ for graph_file in glob_files:
1191
+ if graph_file.is_file():
1192
+ merger.add_file(graph_file)
1193
+
1194
+ # Process explicit inputs
1195
+ if inputs:
1196
+ for graph_file in inputs:
1197
+ if not graph_file.exists():
1198
+ err_console.print(f"[red]Error:[/red] File not found: {graph_file}")
1199
+ raise typer.Exit(1)
1200
+ merger.add_file(graph_file)
1201
+
1202
+ # Merge and save
1203
+ merged_graph = merger.merge()
1204
+ save_graph(merged_graph, output)
1205
+
1206
+ console.print(
1207
+ f"[green]Success:[/green] Merged graph saved to {output} "
1208
+ f"({merged_graph.metadata.total_nodes} nodes, {merged_graph.metadata.total_edges} edges, "
1209
+ f"{len(merged_graph.metadata.source_files)} source files)"
1210
+ )
1211
+
1212
+ except FileNotFoundError as e:
1213
+ err_console.print(f"[red]Error:[/red] {e}")
1214
+ raise typer.Exit(1)
1215
+
1216
+ except ValueError as e:
1217
+ err_console.print(f"[red]Error:[/red] {e}")
1218
+ raise typer.Exit(1)
1219
+
1220
+ except Exception as e:
1221
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
1222
+ raise typer.Exit(1)
1223
+
1224
+
1225
+ @graph_app.command("query")
1226
+ def graph_query(
1227
+ graph_file: Path = typer.Argument(
1228
+ ...,
1229
+ exists=True,
1230
+ help="Path to graph JSON file",
1231
+ ),
1232
+ upstream: Optional[str] = typer.Option(
1233
+ None,
1234
+ "--upstream",
1235
+ "-u",
1236
+ help="Find all source columns that contribute to this column",
1237
+ ),
1238
+ downstream: Optional[str] = typer.Option(
1239
+ None,
1240
+ "--downstream",
1241
+ "-d",
1242
+ help="Find all columns affected by this source column",
1243
+ ),
1244
+ output_format: str = typer.Option(
1245
+ "text",
1246
+ "--output-format",
1247
+ "-f",
1248
+ help="Output format: 'text', 'json', or 'csv'",
1249
+ ),
1250
+ ) -> None:
1251
+ """
1252
+ Query a lineage graph for upstream or downstream dependencies.
1253
+
1254
+ Examples:
1255
+
1256
+ # Find all source columns for a target
1257
+ sqlglider graph query graph.json --upstream orders.customer_id
1258
+
1259
+ # Find all columns affected by a source
1260
+ sqlglider graph query graph.json --downstream customers.customer_id
1261
+
1262
+ # JSON output
1263
+ sqlglider graph query graph.json --upstream orders.total -f json
1264
+
1265
+ # CSV output
1266
+ sqlglider graph query graph.json --downstream orders.order_id -f csv
1267
+ """
1268
+ from sqlglider.graph.query import GraphQuerier
1269
+
1270
+ # Validate options
1271
+ if not upstream and not downstream:
1272
+ err_console.print(
1273
+ "[red]Error:[/red] Must specify either --upstream or --downstream."
1274
+ )
1275
+ raise typer.Exit(1)
1276
+
1277
+ if upstream and downstream:
1278
+ err_console.print(
1279
+ "[red]Error:[/red] Cannot specify both --upstream and --downstream. "
1280
+ "Choose one direction."
1281
+ )
1282
+ raise typer.Exit(1)
1283
+
1284
+ if output_format not in ["text", "json", "csv"]:
1285
+ err_console.print(
1286
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
1287
+ "Use 'text', 'json', or 'csv'."
1288
+ )
1289
+ raise typer.Exit(1)
1290
+
1291
+ try:
1292
+ querier = GraphQuerier.from_file(graph_file)
1293
+
1294
+ if upstream:
1295
+ result = querier.find_upstream(upstream)
1296
+ else:
1297
+ assert downstream is not None # Validated above
1298
+ result = querier.find_downstream(downstream)
1299
+
1300
+ # Format and output
1301
+ if output_format == "text":
1302
+ _format_query_result_text(result)
1303
+ elif output_format == "json":
1304
+ _format_query_result_json(result)
1305
+ else: # csv
1306
+ _format_query_result_csv(result)
1307
+
1308
+ except FileNotFoundError as e:
1309
+ err_console.print(f"[red]Error:[/red] {e}")
1310
+ raise typer.Exit(1)
1311
+
1312
+ except ValueError as e:
1313
+ err_console.print(f"[red]Error:[/red] {e}")
1314
+ raise typer.Exit(1)
1315
+
1316
+ except Exception as e:
1317
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
1318
+ raise typer.Exit(1)
1319
+
1320
+
1321
+ def _format_query_result_text(result) -> None:
1322
+ """Format query result as text table."""
1323
+ direction_label = (
1324
+ "Sources" if result.direction == "upstream" else "Affected Columns"
1325
+ )
1326
+
1327
+ table = Table(title=f"{direction_label} for '{result.query_column}'")
1328
+ table.add_column("Column", style="cyan")
1329
+ table.add_column("Table", style="green")
1330
+ table.add_column("Hops", style="yellow", justify="right")
1331
+ table.add_column("Root", style="magenta", justify="center")
1332
+ table.add_column("Leaf", style="magenta", justify="center")
1333
+ table.add_column("Paths", style="dim")
1334
+ table.add_column("File", style="dim")
1335
+
1336
+ for node in result.related_columns:
1337
+ # Format paths as newline-separated arrow strings
1338
+ paths_str = (
1339
+ "\n".join(p.to_arrow_string() for p in node.paths) if node.paths else "-"
1340
+ )
1341
+
1342
+ table.add_row(
1343
+ node.column or node.identifier,
1344
+ node.table or "",
1345
+ str(node.hops),
1346
+ "Y" if node.is_root else "N",
1347
+ "Y" if node.is_leaf else "N",
1348
+ paths_str,
1349
+ Path(node.file_path).name if node.file_path else "",
1350
+ )
1351
+
1352
+ if len(result) == 0:
1353
+ console.print(
1354
+ f"[yellow]No {direction_label.lower()} found for '{result.query_column}'[/yellow]"
1355
+ )
1356
+ else:
1357
+ console.print(table)
1358
+ console.print(f"\n[dim]Total: {len(result)} column(s)[/dim]")
1359
+
1360
+
1361
+ def _format_query_result_json(result) -> None:
1362
+ """Format query result as JSON."""
1363
+ columns = []
1364
+ for node in result.related_columns:
1365
+ node_data = node.model_dump()
1366
+ # Serialize paths as arrays of node identifiers for cleaner output
1367
+ node_data["paths"] = [p.nodes for p in node.paths]
1368
+ columns.append(node_data)
1369
+
1370
+ output = {
1371
+ "query_column": result.query_column,
1372
+ "direction": result.direction,
1373
+ "count": len(result),
1374
+ "columns": columns,
1375
+ }
1376
+ print(json.dumps(output, indent=2))
1377
+
1378
+
1379
+ def _format_query_result_csv(result) -> None:
1380
+ """Format query result as CSV."""
1381
+ print(
1382
+ "identifier,table,column,hops,output_column,is_root,is_leaf,paths,file_path,query_index"
1383
+ )
1384
+ for node in result.related_columns:
1385
+ file_path = node.file_path.replace('"', '""') if node.file_path else ""
1386
+ # Format paths as semicolon-separated arrow strings
1387
+ paths_str = (
1388
+ ";".join(p.to_arrow_string() for p in node.paths) if node.paths else ""
1389
+ )
1390
+ paths_str = paths_str.replace('"', '""')
1391
+
1392
+ print(
1393
+ f'"{node.identifier}","{node.table or ""}","{node.column or ""}",'
1394
+ f'{node.hops},"{node.output_column}",'
1395
+ f"{'true' if node.is_root else 'false'},"
1396
+ f"{'true' if node.is_leaf else 'false'},"
1397
+ f'"{paths_str}","{file_path}",{node.query_index}'
1398
+ )
1399
+
1400
+
1401
+ @app.command()
1402
+ def dissect(
1403
+ sql_file: Annotated[
1404
+ typer.FileText,
1405
+ typer.Argument(
1406
+ default_factory=lambda: sys.stdin,
1407
+ show_default="stdin",
1408
+ help="Path to SQL file to dissect (reads from stdin if not provided)",
1409
+ ),
1410
+ ],
1411
+ dialect: Optional[str] = typer.Option(
1412
+ None,
1413
+ "--dialect",
1414
+ "-d",
1415
+ help="SQL dialect (default: spark, or from config)",
1416
+ ),
1417
+ output_format: Optional[str] = typer.Option(
1418
+ None,
1419
+ "--output-format",
1420
+ "-f",
1421
+ help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
1422
+ ),
1423
+ output_file: Optional[Path] = typer.Option(
1424
+ None,
1425
+ "--output-file",
1426
+ "-o",
1427
+ help="Write output to file instead of stdout",
1428
+ ),
1429
+ templater: Optional[str] = typer.Option(
1430
+ None,
1431
+ "--templater",
1432
+ "-t",
1433
+ help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
1434
+ ),
1435
+ var: Optional[List[str]] = typer.Option(
1436
+ None,
1437
+ "--var",
1438
+ "-v",
1439
+ help="Template variable in key=value format (repeatable)",
1440
+ ),
1441
+ vars_file: Optional[Path] = typer.Option(
1442
+ None,
1443
+ "--vars-file",
1444
+ exists=True,
1445
+ help="Path to variables file (JSON or YAML)",
1446
+ ),
1447
+ ) -> None:
1448
+ """
1449
+ Dissect SQL queries into constituent components.
1450
+
1451
+ Extracts CTEs, subqueries, main query, DML targets, source SELECTs,
1452
+ UNION branches, and scalar subqueries for analysis and unit testing.
1453
+
1454
+ Configuration can be set in sqlglider.toml in the current directory.
1455
+ CLI arguments override configuration file values.
1456
+
1457
+ Examples:
1458
+
1459
+ # Dissect a SQL file
1460
+ sqlglider dissect query.sql
1461
+
1462
+ # Export to JSON format
1463
+ sqlglider dissect query.sql --output-format json
1464
+
1465
+ # Export to CSV file
1466
+ sqlglider dissect query.sql --output-format csv --output-file dissected.csv
1467
+
1468
+ # Use different SQL dialect
1469
+ sqlglider dissect query.sql --dialect postgres
1470
+
1471
+ # Dissect templated SQL with Jinja2
1472
+ sqlglider dissect query.sql --templater jinja --var schema=analytics
1473
+ """
1474
+ from sqlglider.dissection.analyzer import DissectionAnalyzer
1475
+ from sqlglider.dissection.formatters import (
1476
+ DissectionCsvFormatter,
1477
+ DissectionJsonFormatter,
1478
+ DissectionTextFormatter,
1479
+ )
1480
+ from sqlglider.dissection.formatters import (
1481
+ OutputWriter as DissectionOutputWriter,
1482
+ )
1483
+
1484
+ # Load configuration from sqlglider.toml (if it exists)
1485
+ config = load_config()
1486
+
1487
+ # Apply priority resolution: CLI args > config > defaults
1488
+ dialect = dialect or config.dialect or "spark"
1489
+ output_format = output_format or config.output_format or "text"
1490
+ templater = templater or config.templater # None means no templating
1491
+
1492
+ # Validate output format
1493
+ if output_format not in ["text", "json", "csv"]:
1494
+ err_console.print(
1495
+ f"[red]Error:[/red] Invalid output format '{output_format}'. "
1496
+ "Use 'text', 'json', or 'csv'."
1497
+ )
1498
+ raise typer.Exit(1)
1499
+
1500
+ # Check if reading from stdin
1501
+ is_stdin = sql_file.name == "<stdin>"
1502
+
1503
+ try:
1504
+ # Check if stdin is being used without input
1505
+ if is_stdin and sys.stdin.isatty():
1506
+ err_console.print(
1507
+ "[red]Error:[/red] No SQL file provided and stdin is interactive. "
1508
+ "Provide a SQL file path or pipe SQL via stdin."
1509
+ )
1510
+ raise typer.Exit(1)
1511
+
1512
+ # Read SQL from file or stdin
1513
+ sql = sql_file.read()
1514
+
1515
+ # Determine source path for templating (None if stdin)
1516
+ source_path = None if is_stdin else Path(sql_file.name)
1517
+
1518
+ # Apply templating if specified
1519
+ sql = _apply_templating(
1520
+ sql,
1521
+ templater_name=templater,
1522
+ cli_vars=var,
1523
+ vars_file=vars_file,
1524
+ config=config,
1525
+ source_path=source_path,
1526
+ )
1527
+
1528
+ # Create analyzer
1529
+ analyzer = DissectionAnalyzer(sql, dialect=dialect)
1530
+
1531
+ # Dissect queries
1532
+ results = analyzer.dissect_queries()
1533
+
1534
+ # Format and output based on output format
1535
+ if output_format == "text":
1536
+ if output_file:
1537
+ # For file output, use a string-based console to capture output
1538
+ from io import StringIO
1539
+
1540
+ from rich.console import Console as FileConsole
1541
+
1542
+ string_buffer = StringIO()
1543
+ file_console = FileConsole(file=string_buffer, force_terminal=False)
1544
+ DissectionTextFormatter.format(results, file_console)
1545
+ output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
1546
+ console.print(
1547
+ f"[green]Success:[/green] Dissection written to {output_file}"
1548
+ )
1549
+ else:
1550
+ # Direct console output with Rich formatting
1551
+ DissectionTextFormatter.format(results, console)
1552
+ elif output_format == "json":
1553
+ formatted = DissectionJsonFormatter.format(results)
1554
+ DissectionOutputWriter.write(formatted, output_file)
1555
+ if output_file:
1556
+ console.print(
1557
+ f"[green]Success:[/green] Dissection written to {output_file}"
1558
+ )
1559
+ else: # csv
1560
+ formatted = DissectionCsvFormatter.format(results)
1561
+ DissectionOutputWriter.write(formatted, output_file)
1562
+ if output_file:
1563
+ console.print(
1564
+ f"[green]Success:[/green] Dissection written to {output_file}"
1565
+ )
1566
+
1567
+ except FileNotFoundError as e:
1568
+ err_console.print(f"[red]Error:[/red] {e}")
1569
+ raise typer.Exit(1)
1570
+
1571
+ except ParseError as e:
1572
+ err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
1573
+ raise typer.Exit(1)
1574
+
1575
+ except TemplaterError as e:
1576
+ err_console.print(f"[red]Error:[/red] {e}")
1577
+ raise typer.Exit(1)
1578
+
1579
+ except ValueError as e:
1580
+ err_console.print(f"[red]Error:[/red] {e}")
1581
+ raise typer.Exit(1)
1582
+
1583
+ except Exception as e:
1584
+ err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
1585
+ raise typer.Exit(1)
1586
+
1587
+
1588
+ if __name__ == "__main__":
1589
+ app()