sql-code-graph 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. sql_code_graph-0.2.1.dist-info/METADATA +171 -0
  2. sql_code_graph-0.2.1.dist-info/RECORD +55 -0
  3. sql_code_graph-0.2.1.dist-info/WHEEL +4 -0
  4. sql_code_graph-0.2.1.dist-info/entry_points.txt +2 -0
  5. sqlcg/__init__.py +5 -0
  6. sqlcg/__main__.py +6 -0
  7. sqlcg/cli/__init__.py +1 -0
  8. sqlcg/cli/commands/__init__.py +1 -0
  9. sqlcg/cli/commands/analyze.py +93 -0
  10. sqlcg/cli/commands/db.py +83 -0
  11. sqlcg/cli/commands/find.py +63 -0
  12. sqlcg/cli/commands/gain.py +169 -0
  13. sqlcg/cli/commands/git.py +73 -0
  14. sqlcg/cli/commands/index.py +92 -0
  15. sqlcg/cli/commands/install.py +60 -0
  16. sqlcg/cli/commands/mcp.py +54 -0
  17. sqlcg/cli/commands/report.py +135 -0
  18. sqlcg/cli/commands/watch.py +57 -0
  19. sqlcg/cli/main.py +40 -0
  20. sqlcg/core/__init__.py +8 -0
  21. sqlcg/core/config.py +104 -0
  22. sqlcg/core/graph_db.py +179 -0
  23. sqlcg/core/jobs.py +105 -0
  24. sqlcg/core/kuzu_backend.py +269 -0
  25. sqlcg/core/neo4j_backend.py +195 -0
  26. sqlcg/core/queries.py +82 -0
  27. sqlcg/core/schema.cypher +104 -0
  28. sqlcg/core/schema.py +48 -0
  29. sqlcg/indexer/__init__.py +1 -0
  30. sqlcg/indexer/dbt_adapter.py +23 -0
  31. sqlcg/indexer/indexer.py +317 -0
  32. sqlcg/indexer/walker.py +55 -0
  33. sqlcg/indexer/watcher.py +195 -0
  34. sqlcg/lineage/__init__.py +1 -0
  35. sqlcg/lineage/aggregator.py +58 -0
  36. sqlcg/lineage/schema_resolver.py +198 -0
  37. sqlcg/metrics/__init__.py +5 -0
  38. sqlcg/metrics/store.py +273 -0
  39. sqlcg/parsers/__init__.py +30 -0
  40. sqlcg/parsers/ansi_parser.py +215 -0
  41. sqlcg/parsers/base.py +414 -0
  42. sqlcg/parsers/bigquery_parser.py +77 -0
  43. sqlcg/parsers/postgres_parser.py +27 -0
  44. sqlcg/parsers/registry.py +46 -0
  45. sqlcg/parsers/snowflake_parser.py +148 -0
  46. sqlcg/parsers/tsql_parser.py +27 -0
  47. sqlcg/server/__init__.py +1 -0
  48. sqlcg/server/exceptions.py +20 -0
  49. sqlcg/server/models.py +83 -0
  50. sqlcg/server/server.py +57 -0
  51. sqlcg/server/tools.py +663 -0
  52. sqlcg/utils/__init__.py +6 -0
  53. sqlcg/utils/hashing.py +18 -0
  54. sqlcg/utils/ignore.py +36 -0
  55. sqlcg/utils/logging.py +29 -0
@@ -0,0 +1,73 @@
1
+ """Git integration commands for sqlcg."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+ from rich.console import Console
7
+
8
+ console = Console()
9
+
10
+ app = typer.Typer(name="git", help="Git integration commands")
11
+
12
+
13
+ @app.command("install-hooks")
14
+ def install_hooks(
15
+ repo: Path | None = typer.Option( # noqa: B008
16
+ None, "--repo", "-r", help="Path to git repository (default: current directory)"
17
+ ),
18
+ ) -> None:
19
+ """Install git hooks for sqlcg integration.
20
+
21
+ Writes a post-checkout hook that triggers graph resync after branch switches.
22
+ Idempotent: running multiple times produces one hook entry.
23
+ """
24
+ if repo is None:
25
+ repo = Path.cwd()
26
+
27
+ git_dir = repo / ".git"
28
+ hooks_dir = git_dir / "hooks"
29
+
30
+ if not git_dir.exists():
31
+ console.print("[red]Error: not a git repository[/red]")
32
+ raise typer.Exit(1)
33
+
34
+ hooks_dir.mkdir(parents=True, exist_ok=True)
35
+
36
+ hook_path = hooks_dir / "post-checkout"
37
+ hook_sentinel = "# sqlcg post-checkout hook"
38
+
39
+ # Hook script content
40
+ hook_script = """#!/bin/sh
41
+ # sqlcg post-checkout hook — resync graph after branch switch
42
+ # $3 == 1 means branch checkout (not file checkout); skip file checkouts
43
+ [ "$3" = "1" ] || exit 0
44
+ sqlcg index "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true
45
+ """
46
+
47
+ # Check if hook already exists
48
+ if hook_path.exists():
49
+ existing_content = hook_path.read_text()
50
+ if hook_sentinel in existing_content:
51
+ # Already installed, idempotent: skip silently
52
+ return
53
+ else:
54
+ # Existing hook without sqlcg sentinel
55
+ console.print(
56
+ "[yellow]Warning: existing post-checkout hook found that was not created "
57
+ "by sqlcg.[/yellow]"
58
+ )
59
+ console.print(
60
+ "[yellow]To integrate sqlcg, manually append the following to "
61
+ ".git/hooks/post-checkout:[/yellow]"
62
+ )
63
+ console.print("")
64
+ console.print("[cyan]" + hook_script.rstrip() + "[/cyan]")
65
+ return
66
+
67
+ # Write hook script
68
+ hook_path.write_text(hook_script)
69
+
70
+ # Make it executable
71
+ hook_path.chmod(0o755)
72
+
73
+ console.print("[green]Installed git hook:[/green] .git/hooks/post-checkout")
@@ -0,0 +1,92 @@
1
+ """Index command for scanning and indexing SQL files."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+ from rich.console import Console
7
+
8
+ from sqlcg.core.config import get_backend, get_db_path, get_dialect
9
+ from sqlcg.indexer.indexer import Indexer
10
+
11
+ console = Console()
12
+
13
+
14
+ def index_cmd( # noqa: B008
15
+ path: Path = typer.Argument(..., help="Directory to index"), # noqa: B008
16
+ dialect: str | None = typer.Option( # noqa: B008
17
+ None, "--dialect", "-d", help="SQL dialect (or 'auto' to read from .sqlcg.toml)"
18
+ ),
19
+ dbt_manifest: Path | None = typer.Option( # noqa: B008
20
+ None, "--dbt-manifest", help="Path to dbt manifest"
21
+ ),
22
+ timeout_per_file: int = typer.Option( # noqa: B008
23
+ 30, "--timeout-per-file", help="Timeout per file in seconds"
24
+ ),
25
+ no_ddl: bool = typer.Option( # noqa: B008
26
+ False, "--no-ddl", help="Skip DDL statements (not yet fully implemented)"
27
+ ),
28
+ schema_from_info_schema: str | None = typer.Option( # noqa: B008
29
+ None, "--schema-from-info-schema", hidden=True, help="(Not yet implemented)"
30
+ ),
31
+ quiet: bool = typer.Option( # noqa: B008
32
+ False, "--quiet", "-q", help="Suppress summary console output"
33
+ ),
34
+ ) -> None:
35
+ """Index SQL files in a directory."""
36
+ if schema_from_info_schema:
37
+ console.print("[red]--schema-from-info-schema is not yet implemented (v2)[/red]")
38
+ raise typer.Exit(1)
39
+
40
+ # TODO: wire no_ddl through to the indexer once it supports the parameter
41
+ if no_ddl:
42
+ console.print("[yellow]Note: --no-ddl is not yet fully implemented[/yellow]")
43
+
44
+ # Resolve dialect: 'auto' reads from .sqlcg.toml, otherwise use provided value
45
+ if dialect == "auto":
46
+ dialect = get_dialect(path)
47
+
48
+ db_path = get_db_path()
49
+ db_path.parent.mkdir(parents=True, exist_ok=True)
50
+
51
+ with get_backend() as backend:
52
+ backend.init_schema()
53
+
54
+ # Create Repo node for this repository
55
+ from sqlcg.core.schema import NodeLabel
56
+
57
+ abs_path = str(path.resolve())
58
+ backend.upsert_node(
59
+ NodeLabel.REPO,
60
+ abs_path,
61
+ {
62
+ "path": abs_path,
63
+ "name": path.name,
64
+ },
65
+ )
66
+
67
+ # Index the repository
68
+ indexer = Indexer()
69
+ summary = indexer.index_repo(path, dialect, backend, dbt_manifest, timeout_per_file)
70
+
71
+ # Connect files to repo
72
+ from sqlcg.core.schema import RelType
73
+
74
+ files_query = "MATCH (f:File) WHERE f.path STARTS WITH $repo_prefix RETURN f.path AS path"
75
+ file_rows = backend.run_read(files_query, {"repo_prefix": abs_path})
76
+ for row in file_rows:
77
+ backend.upsert_edge(
78
+ NodeLabel.FILE,
79
+ row["path"],
80
+ NodeLabel.REPO,
81
+ abs_path,
82
+ RelType.BELONGS_TO,
83
+ {},
84
+ )
85
+
86
+ # Print summary unless --quiet is specified
87
+ if not quiet:
88
+ console.print(
89
+ f"[green]Indexed[/green] {summary['files_parsed']} files — "
90
+ f"{summary['tables_found']} tables, {summary['lineage_edges_created']} edges, "
91
+ f"{summary['parse_errors']} errors"
92
+ )
@@ -0,0 +1,60 @@
1
+ """Install sqlcg as an MCP server in Claude Code."""
2
+
3
+ import json
4
+ import os
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ console = Console()
12
+
13
+ _SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
14
+ _SERVER_KEY = "sql-code-graph"
15
+
16
+
17
+ def install_cmd(
18
+ dry_run: bool = typer.Option(False, "--dry-run", help="Print config without writing"),
19
+ ) -> None:
20
+ """Register sqlcg as an MCP server in Claude Code (~/.claude/settings.json)."""
21
+ if shutil.which("uvx"):
22
+ entry: dict = {"command": "uvx", "args": ["sql-code-graph", "mcp", "start"]}
23
+ else:
24
+ entry = {"command": "sqlcg", "args": ["mcp", "start"]}
25
+
26
+ settings_path = _SETTINGS_PATH
27
+ if settings_path.exists():
28
+ try:
29
+ settings: dict = json.loads(settings_path.read_text())
30
+ except json.JSONDecodeError:
31
+ console.print(
32
+ f"[yellow]Warning:[/yellow] {settings_path} contains invalid JSON — "
33
+ "mcpServers key will be added"
34
+ )
35
+ settings = {}
36
+ else:
37
+ settings = {}
38
+
39
+ mcp_servers: dict = settings.setdefault("mcpServers", {})
40
+
41
+ if mcp_servers.get(_SERVER_KEY) == entry:
42
+ console.print(f"[green]Already configured:[/green] {_SERVER_KEY} → {settings_path}")
43
+ return
44
+
45
+ mcp_servers[_SERVER_KEY] = entry
46
+
47
+ if dry_run:
48
+ console.print("[dim]--dry-run: would write:[/dim]")
49
+ console.print_json(json.dumps(settings, indent=2))
50
+ return
51
+
52
+ settings_path.parent.mkdir(parents=True, exist_ok=True)
53
+ tmp = settings_path.with_suffix(".tmp")
54
+ tmp.write_text(json.dumps(settings, indent=2) + "\n")
55
+ os.replace(tmp, settings_path)
56
+
57
+ cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
58
+ console.print(f"[green]Configured:[/green] {_SERVER_KEY} → {cmd_str}")
59
+ console.print(f"[dim]Written to {settings_path}[/dim]")
60
+ console.print("\nRestart Claude Code to pick up the new MCP server.")
@@ -0,0 +1,54 @@
1
+ """MCP server commands."""
2
+
3
+ import json
4
+ import os
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ app = typer.Typer(help="MCP server commands")
12
+ console = Console()
13
+
14
+ _SERVER_KEY = "sql-code-graph"
15
+
16
+
17
+ def _server_entry() -> dict:
18
+ if shutil.which("uvx"):
19
+ return {"command": "uvx", "args": ["sql-code-graph", "mcp", "start"]}
20
+ return {"command": "sqlcg", "args": ["mcp", "start"]}
21
+
22
+
23
+ @app.command("setup")
24
+ def mcp_setup(print_only: bool = typer.Option(True, "--print/--write")) -> None:
25
+ """Print or write MCP server config JSON."""
26
+ entry = _server_entry()
27
+ if print_only:
28
+ console.print_json(json.dumps({"mcpServers": {_SERVER_KEY: entry}}, indent=2))
29
+ return
30
+
31
+ config_path = Path.home() / ".claude" / "settings.json"
32
+ if config_path.exists():
33
+ try:
34
+ settings: dict = json.loads(config_path.read_text())
35
+ except json.JSONDecodeError:
36
+ settings = {}
37
+ else:
38
+ settings = {}
39
+
40
+ settings.setdefault("mcpServers", {})[_SERVER_KEY] = entry
41
+
42
+ config_path.parent.mkdir(parents=True, exist_ok=True)
43
+ tmp = config_path.with_suffix(".tmp")
44
+ tmp.write_text(json.dumps(settings, indent=2) + "\n")
45
+ os.replace(tmp, config_path)
46
+ console.print(f"[green]Configuration written to[/green] {config_path}")
47
+
48
+
49
+ @app.command("start")
50
+ def mcp_start() -> None:
51
+ """Start the MCP server."""
52
+ from sqlcg.server.server import main as server_main
53
+
54
+ server_main()
@@ -0,0 +1,135 @@
1
+ """The sqlcg report command — generate metrics and feedback reports."""
2
+
3
+ import hashlib
4
+ import urllib.parse
5
+ from pathlib import Path
6
+
7
+ import typer
8
+ from rich.console import Console
9
+
10
+ from sqlcg.metrics.store import MetricsStore
11
+ from sqlcg.utils.logging import getLogger
12
+
13
+ logger = getLogger(__name__)
14
+ console = Console()
15
+
16
+
17
+ def report_cmd(
18
+ stdout: bool = typer.Option(False, "--stdout", help="Print to stdout instead of file"), # noqa: B008
19
+ output: Path | None = typer.Option( # noqa: B008
20
+ None,
21
+ "--output",
22
+ "-o",
23
+ help="Output file path",
24
+ ),
25
+ ) -> None:
26
+ """Generate a metrics report with FP clusters and parse error patterns.
27
+
28
+ Analyzes feedback and index run data to identify:
29
+ - False positive clusters (files/patterns with >50% FP rate, min 3 samples)
30
+ - Parse error clusters (repos with persistent parse errors)
31
+ - Provides a pre-filled GitHub issue URL for reporting problems
32
+
33
+ If no metrics database exists, prints a message and exits 0.
34
+ """
35
+ metrics_path = Path.home() / ".sqlcg" / "metrics.db"
36
+
37
+ if not metrics_path.exists():
38
+ message = "No metrics collected yet."
39
+ console.print(message)
40
+ return
41
+
42
+ try:
43
+ metrics = MetricsStore(metrics_path)
44
+ metrics.init_schema()
45
+
46
+ # Section 1: FP clusters
47
+ fp_clusters = metrics.execute_query(
48
+ """
49
+ SELECT query,
50
+ SUM(CASE WHEN label = 'FP' THEN 1 ELSE 0 END) AS fp_count,
51
+ COUNT(*) AS total,
52
+ CAST(SUM(CASE WHEN label = 'FP' THEN 1 ELSE 0 END) AS REAL) / COUNT(*) AS fp_rate
53
+ FROM feedback
54
+ GROUP BY query
55
+ HAVING total >= 3 AND fp_rate > 0.5
56
+ ORDER BY fp_rate DESC
57
+ """
58
+ )
59
+
60
+ # Section 2: Parse error clusters
61
+ error_clusters = metrics.execute_query(
62
+ """
63
+ SELECT repo_path, COUNT(*) AS run_count,
64
+ SUM(parse_errors) AS total_errors,
65
+ CAST(SUM(parse_errors) AS REAL) / NULLIF(SUM(files_parsed), 0) AS error_rate
66
+ FROM index_runs
67
+ GROUP BY repo_path
68
+ HAVING run_count >= 2 AND total_errors > 0
69
+ ORDER BY total_errors DESC
70
+ """
71
+ )
72
+
73
+ # Build report content
74
+ report_lines = []
75
+
76
+ # Section 1: FP clusters
77
+ report_lines.append("## False Positive Clusters\n")
78
+ if fp_clusters:
79
+ report_lines.append("| Query | FP Count | Total | FP Rate |\n")
80
+ report_lines.append("|---|---|---|---|\n")
81
+ for row in fp_clusters:
82
+ query, fp_count, total, fp_rate = row
83
+ rate_pct = fp_rate * 100 if fp_rate else 0
84
+ report_lines.append(f"| {query} | {fp_count} | {total} | {rate_pct:.1f}% |\n")
85
+ else:
86
+ report_lines.append("No FP clusters found (need ≥3 samples per query pattern).\n")
87
+ report_lines.append("\n")
88
+
89
+ # Section 2: Parse error clusters
90
+ report_lines.append("## Parse Error Clusters\n")
91
+ if error_clusters:
92
+ report_lines.append("| Repo Path | Runs | Total Errors | Error Rate |\n")
93
+ report_lines.append("|---|---|---|---|\n")
94
+ for row in error_clusters:
95
+ repo_path, run_count, total_errors, error_rate = row
96
+ rate_pct = error_rate * 100 if error_rate else 0
97
+ report_lines.append(
98
+ f"| {repo_path} | {run_count} | {total_errors} | {rate_pct:.1f}% |\n"
99
+ )
100
+ else:
101
+ report_lines.append("No parse error clusters found.\n")
102
+ report_lines.append("\n")
103
+
104
+ report_content = "".join(report_lines)
105
+
106
+ # Section 3: GitHub issue URL
107
+ report_hash = hashlib.md5(report_content.encode()).hexdigest()[:8]
108
+ issue_title = f"[sqlcg metrics] FP clusters report {report_hash}"
109
+ issue_body_truncated = report_content[:2000]
110
+ encoded_title = urllib.parse.quote(issue_title)
111
+ encoded_body = urllib.parse.quote(issue_body_truncated)
112
+ github_url = (
113
+ f"https://github.com/Warhorze/sql-code-graph/issues/new?"
114
+ f"title={encoded_title}&body={encoded_body}"
115
+ )
116
+
117
+ report_lines.append("## File an Issue\n")
118
+ report_lines.append(f"[Report metrics issues on GitHub]({github_url})\n")
119
+
120
+ final_report = "".join(report_lines)
121
+
122
+ # Output
123
+ if stdout:
124
+ console.print(final_report)
125
+ else:
126
+ output_path = output or Path("docs/METRICS_REPORT.md")
127
+ output_path.parent.mkdir(parents=True, exist_ok=True)
128
+ output_path.write_text(final_report)
129
+ console.print(f"Report written to {output_path}")
130
+
131
+ metrics.close()
132
+
133
+ except Exception as exc:
134
+ logger.error(f"Failed to generate report: {exc}")
135
+ console.print(f"[red]Error: {exc}[/red]")
@@ -0,0 +1,57 @@
1
+ """Watch command for monitoring file changes."""
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import typer
7
+ from rich.console import Console
8
+ from watchdog.observers import Observer
9
+
10
+ from sqlcg.core.config import get_backend, get_db_path
11
+ from sqlcg.core.jobs import WatchJobManager
12
+ from sqlcg.indexer.indexer import Indexer
13
+ from sqlcg.indexer.watcher import SqlFileEventHandler
14
+ from sqlcg.utils.ignore import load_ignore_spec
15
+
16
+ console = Console()
17
+
18
+
19
+ def watch_cmd( # noqa: B008
20
+ path: Path = typer.Argument(..., help="Directory to watch"), # noqa: B008
21
+ dialect: str | None = typer.Option( # noqa: B008
22
+ None, "--dialect", "-d", help="SQL dialect"
23
+ ),
24
+ ) -> None:
25
+ """Watch a directory and re-index on SQL file changes."""
26
+ db_path = get_db_path()
27
+ db_path.parent.mkdir(parents=True, exist_ok=True)
28
+
29
+ with get_backend() as backend:
30
+ backend.init_schema()
31
+
32
+ indexer = Indexer()
33
+
34
+ # Initial full index
35
+ console.print(f"Indexing {path}...")
36
+ indexer.index_repo(path, dialect, backend)
37
+
38
+ spec = load_ignore_spec(path)
39
+ job_manager = WatchJobManager(indexer, backend, dialect)
40
+ handler = SqlFileEventHandler(job_manager, backend, spec, path, indexer=indexer)
41
+ observer = Observer()
42
+ observer.schedule(handler, str(path), recursive=True)
43
+ observer.start()
44
+ console.print(f"[green]Watching[/green] {path} — press Ctrl+C to stop")
45
+ try:
46
+ while observer.is_alive():
47
+ time.sleep(1)
48
+ except (KeyboardInterrupt, SystemExit):
49
+ pass
50
+ finally:
51
+ observer.stop()
52
+ observer.join(timeout=5)
53
+ job_manager.cancel_all()
54
+ if handler._branch_monitor is not None:
55
+ handler._branch_monitor.stop()
56
+ handler._branch_monitor.join(timeout=5)
57
+ console.print("Stopped.")
sqlcg/cli/main.py ADDED
@@ -0,0 +1,40 @@
1
+ """Main CLI entry point for sqlcg."""
2
+
3
+ import typer
4
+ from dotenv import load_dotenv
5
+
6
+ from sqlcg.cli.commands import analyze, db, find, gain, git, index, install, mcp, report, watch
7
+
8
+ app = typer.Typer(name="sqlcg", help="SQL code graph analyzer")
9
+
10
+ # Register subcommand groups
11
+ app.add_typer(db.app, name="db")
12
+ app.add_typer(find.app, name="find")
13
+ app.add_typer(analyze.app, name="analyze")
14
+ app.add_typer(mcp.app, name="mcp")
15
+ app.add_typer(git.app, name="git")
16
+
17
+ # Register single commands
18
+ app.command("index")(index.index_cmd)
19
+ app.command("watch")(watch.watch_cmd)
20
+ app.command("gain")(gain.gain_cmd)
21
+ app.command("report")(report.report_cmd)
22
+ app.command("install")(install.install_cmd)
23
+
24
+
25
+ @app.command()
26
+ def version() -> None:
27
+ """Show version."""
28
+ from sqlcg import __version__
29
+
30
+ typer.echo(f"sqlcg version {__version__}")
31
+
32
+
33
+ def main() -> None:
34
+ """SQL Code Graph - SQL lineage and dependency analysis tool."""
35
+ load_dotenv()
36
+ app()
37
+
38
+
39
+ if __name__ == "__main__":
40
+ main()
sqlcg/core/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Core database and schema modules."""
2
+
3
+ from sqlcg.core import schema
4
+ from sqlcg.core.graph_db import GraphBackend
5
+ from sqlcg.core.kuzu_backend import KuzuBackend
6
+ from sqlcg.core.neo4j_backend import Neo4jBackend
7
+
8
+ __all__ = ["GraphBackend", "KuzuBackend", "Neo4jBackend", "schema"]
sqlcg/core/config.py ADDED
@@ -0,0 +1,104 @@
1
+ """Configuration management for sqlcg."""
2
+
3
+ import os
4
+ import tomllib
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ if TYPE_CHECKING:
11
+ from sqlcg.core.graph_db import GraphBackend
12
+
13
+
14
+ class KuzuConfig(BaseModel):
15
+ """Configuration for KùzuDB backend."""
16
+
17
+ db_path: Path = Field(default_factory=lambda: Path.home() / ".sqlcg" / "graph.db")
18
+
19
+ @classmethod
20
+ def from_env(cls) -> "KuzuConfig":
21
+ """Load KùzuDB config from environment variables.
22
+
23
+ Returns:
24
+ KuzuConfig instance with environment-overridden values if present.
25
+ """
26
+ env_path = os.getenv("SQLCG_DB_PATH")
27
+ return cls(db_path=Path(env_path)) if env_path else cls()
28
+
29
+
30
+ class Neo4jConfig(BaseModel):
31
+ """Configuration for Neo4j backend."""
32
+
33
+ uri: str = Field(default="bolt://localhost:7687")
34
+ user: str = Field(default="neo4j")
35
+ password: str = Field(default="password")
36
+
37
+ @classmethod
38
+ def from_env(cls) -> "Neo4jConfig":
39
+ """Load Neo4j config from environment variables.
40
+
41
+ Returns:
42
+ Neo4jConfig instance with environment-overridden values if present.
43
+ """
44
+ return cls(
45
+ uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
46
+ user=os.getenv("NEO4J_USER", "neo4j"),
47
+ password=os.getenv("NEO4J_PASSWORD", "password"),
48
+ )
49
+
50
+
51
+ def get_db_path() -> Path:
52
+ """Get the database path from environment or use default.
53
+
54
+ Returns:
55
+ Path to the KùzuDB database file
56
+ """
57
+ return KuzuConfig.from_env().db_path
58
+
59
+
60
+ def get_dialect(path: Path) -> str:
61
+ """Get the SQL dialect from .sqlcg.toml or fall back to snowflake.
62
+
63
+ Args:
64
+ path: Root directory to search for .sqlcg.toml
65
+
66
+ Returns:
67
+ SQL dialect string (e.g., "snowflake", "bigquery", "postgres")
68
+ """
69
+ config_file = Path(path) / ".sqlcg.toml"
70
+ if config_file.exists():
71
+ try:
72
+ with open(config_file, "rb") as f:
73
+ config = tomllib.load(f)
74
+ dialect = config.get("sqlcg", {}).get("dialect")
75
+ if dialect:
76
+ return dialect
77
+ except Exception:
78
+ pass
79
+ return "snowflake"
80
+
81
+
82
+ def get_backend() -> "GraphBackend":
83
+ """Get a graph backend instance respecting the SQLCG_BACKEND env var.
84
+
85
+ Returns:
86
+ A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
87
+
88
+ Raises:
89
+ ValueError: If backend type is not recognized
90
+ """
91
+ backend_type = os.getenv("SQLCG_BACKEND", "kuzu")
92
+
93
+ if backend_type == "kuzu":
94
+ from sqlcg.core.kuzu_backend import KuzuBackend
95
+
96
+ kuzu_cfg = KuzuConfig.from_env()
97
+ return KuzuBackend(str(kuzu_cfg.db_path))
98
+ elif backend_type == "neo4j":
99
+ from sqlcg.core.neo4j_backend import Neo4jBackend
100
+
101
+ neo4j_cfg = Neo4jConfig.from_env()
102
+ return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
103
+ else:
104
+ raise ValueError(f"Unknown backend type: {backend_type}")