PyPI - sql-code-graph - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

sql-code-graph 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
{sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +29 -29
sqlcg/__init__.py +1 -1
sqlcg/cli/commands/analyze.py +138 -127
sqlcg/cli/commands/db.py +31 -56
sqlcg/cli/commands/find.py +5 -9
sqlcg/cli/commands/gain.py +14 -16
sqlcg/cli/commands/index.py +6 -17
sqlcg/cli/commands/reindex.py +1 -1
sqlcg/cli/commands/uninstall.py +9 -20
sqlcg/core/__init__.py +1 -3
sqlcg/core/config.py +25 -81
sqlcg/core/duckdb_backend.py +764 -0
sqlcg/core/freshness.py +1 -1
sqlcg/core/graph_db.py +20 -4
sqlcg/core/queries.py +26 -7
sqlcg/core/queries.sql +249 -0
sqlcg/core/schema.py +1 -1
sqlcg/indexer/indexer.py +27 -36
sqlcg/metrics/store.py +1 -1
sqlcg/server/control.py +1 -1
sqlcg/server/noise_filter.py +1 -1
sqlcg/server/read_client.py +2 -2
sqlcg/server/server.py +26 -23
sqlcg/server/skill.py +2 -2
sqlcg/server/tools.py +43 -106
sqlcg/server/writer.py +43 -218
sqlcg/core/kuzu_backend.py +0 -449
sqlcg/core/neo4j_backend.py +0 -233
{sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
{sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0

sqlcg/cli/commands/db.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Database management commands."""
-import os
 import shutil
 from pathlib import Path
@@ -20,18 +19,8 @@ console = Console()
 @app.command("init")
-def db_init(
-    buffer_pool_size: int = typer.Option(
-        0,
-        "--buffer-pool-size",
-        help="KuzuDB buffer pool size in MB (0 = default). "
-        "Set to 256-512 on memory-constrained machines.",
-    ),
-) -> None:
+def db_init() -> None:
     """Initialise the graph database (idempotent)."""
-    if buffer_pool_size > 0:
-        os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
     db_path = get_db_path()
     db_path.parent.mkdir(parents=True, exist_ok=True)
     with get_backend() as backend:
@@ -49,38 +38,36 @@ def db_reset(  # noqa: B008
     from sqlcg.server.control import sock_path
-    # Step 3.4 (OD-3 / W2): refuse cleanly when a server is live — both the
-    # full reset and the --repo partial reset open the RW backend directly and
-    # would fight the server's lock.  Guard runs BEFORE either destructive branch.
+    # Refuse cleanly when a server is live.
     sp = sock_path()
     if sp.exists():
         try:
             with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
                 s.settimeout(1)
                 s.connect(str(sp))
-            # Connection succeeded — a server is live.
             console.print(
                 "[red]A server is running on this database; stop it first "
                 "('sqlcg mcp stop') before resetting the database.[/red]"
             )
             raise typer.Exit(1)
         except (FileNotFoundError, ConnectionRefusedError, OSError):
-            # No live server — fall through to destructive action.
             pass
     if repo:
-        # Delete all nodes for this repo (use run_write for mutation)
+        # Delete all nodes for this repo: delete File nodes (cascades to all
+        # related nodes via delete_nodes_for_file) and the Repo node itself.
         with get_backend() as backend:
-            backend.run_write(
-                "MATCH (r:Repo {path: $p}) DETACH DELETE r",
-                {"p": repo},
+            # Get all files for this repo
+            file_rows = backend.run_read(
+                'SELECT path FROM "File" WHERE repo_path = ?',
+                {"repo_path": repo},
             )
+            for fr in file_rows:
+                backend.delete_nodes_for_file(fr["path"])
+            backend.run_write('DELETE FROM "Repo" WHERE path = ?', {"p": repo})
         console.print(f"[yellow]Reset repo[/yellow] {repo}")
     else:
-        # Full reset — delete the DB. Kuzu may store it as a single file (current,
-        # e.g. 0.11.x) or a directory (older versions); also drop the .wal sidecar.
-        # shutil.rmtree silently no-ops on a regular file (NotADirectoryError +
-        # ignore_errors), so dispatch on the actual filesystem type.
+        # Full reset — delete the DuckDB file (single file, not a directory).
         db_path = get_db_path()
         removed = False
         for target in (db_path, db_path.with_name(db_path.name + ".wal")):
@@ -99,56 +86,46 @@ def db_reset(  # noqa: B008
 @app.command("info")
 def db_info() -> None:
     """Show database stats."""
-    # db info is a read-only command.  All Cypher reads route through the live
-    # server (run_read_routed) to avoid "Database is locked" while the MCP server
-    # holds the write lock.  get_schema_version / get_indexed_sha are inlined as
-    # run_read_routed calls using their known Cypher so they too route through the
-    # socket when a server is live; this avoids a direct-open that would hit the lock.
+    # db info routes through the live server (run_read_routed) to avoid holding
+    # the DuckDB file lock when the MCP server is running.
     # Schema version
-    schema_rows = run_read_routed("MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {})
+    schema_rows = run_read_routed('SELECT version FROM "SchemaVersion" LIMIT 1', {})
     version = (schema_rows[0]["version"] if schema_rows else None) or "unknown"
     console.print(f"Schema version: {version}")
-    # Freshness block — only shown when the DB has been indexed from a git repo
+    # Freshness block
     try:
-        sha_rows = run_read_routed(
-            "MATCH (v:SchemaVersion) RETURN v.indexed_sha AS sha LIMIT 1", {}
-        )
+        sha_rows = run_read_routed('SELECT indexed_sha AS sha FROM "SchemaVersion" LIMIT 1', {})
         indexed_sha = sha_rows[0]["sha"] if sha_rows else None
-        repo_rows = run_read_routed("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
+        repo_rows = run_read_routed('SELECT path FROM "Repo" LIMIT 1', {})
         if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
             repo_root = Path(repo_rows[0]["path"])
             f = compute_freshness(repo_root, indexed_sha)
             console.print(render_freshness_line(f))
-    except NotImplementedError:
-        # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
-        pass
     except Exception as e:
-        # Any unexpected error in the freshness block must not crash db info
         logger.debug(f"Freshness check skipped: {e}")
-    # Show node counts for all labels
+    # Node counts
     for label in NodeLabel:
         try:
-            result = run_read_routed(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
+            result = run_read_routed(f'SELECT count(*) AS count FROM "{label}"', {})
             count = result[0]["count"] if result else 0
             console.print(f"  {label}: {count}")
         except Exception as e:
-            # Log unexpected exceptions instead of silently skipping
             logger.error(f"Error getting count for {label}: {e}")
             console.print(f"  [red]{label}: error[/red]")
-    # Health check section
-    repo_count_result = run_read_routed("MATCH (n:Repo) RETURN COUNT(n) AS count", {})
+    # Health check
+    repo_count_result = run_read_routed('SELECT count(*) AS count FROM "Repo"', {})
     repo_count = repo_count_result[0]["count"] if repo_count_result else 0
     if repo_count == 0:
-        console.print(  # noqa: E501
+        console.print(
             "[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
         )
     else:
-        query_count_result = run_read_routed("MATCH (n:SqlQuery) RETURN COUNT(n) AS count", {})
+        query_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlQuery"', {})
         query_count = query_count_result[0]["count"] if query_count_result else 0
         if query_count == 0:
@@ -157,7 +134,7 @@ def db_info() -> None:
                 "the graph.[/yellow]"
             )
         else:
-            col_count_result = run_read_routed("MATCH (n:SqlColumn) RETURN COUNT(n) AS count", {})
+            col_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlColumn"', {})
             col_count = col_count_result[0]["count"] if col_count_result else 0
             if col_count == 0:
@@ -167,12 +144,10 @@ def db_info() -> None:
                     "will return empty results.[/yellow]"
                 )
-    # Print COLUMN_LINEAGE edges count
-    edges_result = run_read_routed("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
+    edges_result = run_read_routed('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
     edges_count = edges_result[0]["count"] if edges_result else 0
     console.print(f"  COLUMN_LINEAGE edges: {edges_count}")
-    # Print star resolution metrics (T-07)
     from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
     star_source_result = run_read_routed(COUNT_STAR_SOURCES_QUERY, {})
@@ -183,11 +158,11 @@ def db_info() -> None:
     star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
     console.print(f"  STAR_EXPANSION lineage edges: {star_expansion_count}")
-    # Print parsing mode distribution
-    mode_query = (
-        "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC"
+    mode_rows = run_read_routed(
+        'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
+        " GROUP BY parsing_mode ORDER BY cnt DESC",
+        {},
     )
-    mode_rows = run_read_routed(mode_query, {})
     if mode_rows and "mode" in mode_rows[0]:
         console.print("\n  Parsing mode distribution:")
         for row in mode_rows:
@@ -197,7 +172,7 @@ def db_info() -> None:
 @app.command("list-repos")
 def list_repos() -> None:
     """List all indexed repositories."""
-    result = run_read_routed("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
+    result = run_read_routed('SELECT path, name FROM "Repo"', {})
     if not result:
         console.print("[yellow]No repositories indexed[/yellow]")

sqlcg/cli/commands/find.py CHANGED Viewed

@@ -4,7 +4,6 @@ import typer
 from rich.console import Console
 from rich.table import Table
-from sqlcg.core.schema import NodeLabel
 from sqlcg.server.read_client import run_read_routed
 app = typer.Typer(help="Search the graph")
@@ -19,14 +18,13 @@ def find_table(  # noqa: B008
     """Find a table by name."""
     name = name.lower()  # graph keys are lowercased at index time (C2 normalization)
     results = run_read_routed(
-        f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
-        "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
+        "SELECT qualified, kind FROM \"SqlTable\" WHERE qualified LIKE '%' || ? || '%' LIMIT 50",
         {"name": name},
     )
     if not raw:
         from sqlcg.server.noise_filter import NoiseFilter
-        nf = NoiseFilter.from_config()  # repo_root=None → falls back to Path.cwd()
+        nf = NoiseFilter.from_config()
         ids = [r["qualified"] for r in results]
         kept, _ = nf.filter_nodes(ids)
         kept_set = set(kept)
@@ -42,14 +40,13 @@ def find_column(  # noqa: B008
     """Find a column by table.column reference."""
     ref = ref.lower()  # graph keys are lowercased at index time (C2 normalization)
     results = run_read_routed(
-        f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
+        "SELECT id FROM \"SqlColumn\" WHERE id LIKE '%' || ? || '%' LIMIT 50",
         {"ref": ref},
     )
     if not raw:
         from sqlcg.server.noise_filter import NoiseFilter
-        nf = NoiseFilter.from_config()  # repo_root=None → falls back to Path.cwd()
-        # Filter on the schema.table portion of each column id (schema.table.column)
+        nf = NoiseFilter.from_config()
         results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
     _print_table(results, ["id"])
@@ -60,8 +57,7 @@ def find_pattern(  # noqa: B008
 ) -> None:
     """Find queries containing a SQL pattern."""
     results = run_read_routed(
-        f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
-        "RETURN q.id AS id, q.kind AS kind LIMIT 50",
+        "SELECT id, kind FROM \"SqlQuery\" WHERE sql LIKE '%' || ? || '%' LIMIT 50",
         {"pattern": pattern},
     )
     _print_table(results, ["id", "kind"])

sqlcg/cli/commands/gain.py CHANGED Viewed

@@ -112,13 +112,11 @@ def gain_cmd(
             """
         )
-        # Section E: execute_cypher ratio
-        cypher_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_cypher'"
-        execute_cypher_count_result = metrics.execute_query(cypher_query)
-        execute_cypher_count = (
-            execute_cypher_count_result[0][0] if execute_cypher_count_result else 0
-        )
-        execute_cypher_ratio = execute_cypher_count / total_calls if total_calls > 0 else 0
+        # Section E: execute_sql ratio
+        sql_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_sql'"
+        execute_sql_count_result = metrics.execute_query(sql_query)
+        execute_sql_count = execute_sql_count_result[0][0] if execute_sql_count_result else 0
+        execute_sql_ratio = execute_sql_count / total_calls if total_calls > 0 else 0
         # Section F: parse quality from graph.
         # run_read_routed raises typer.Exit (Exception-derived, NOT SystemExit) on
@@ -127,8 +125,8 @@ def gain_cmd(
         parse_quality: dict[str, int] | None = None
         try:
             mode_rows = run_read_routed(
-                "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
-                " COUNT(q) AS cnt ORDER BY cnt DESC",
+                'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
+                " GROUP BY parsing_mode ORDER BY cnt DESC",
                 {},
             )
             if mode_rows and "mode" in mode_rows[0]:
@@ -144,7 +142,7 @@ def gain_cmd(
                 "feedback_tp": tp_count,
                 "feedback_total": fb_total,
                 "top_tools": [{"name": row[0], "count": row[1]} for row in top_tools],
-                "execute_cypher_ratio": round(execute_cypher_ratio, 2),
+                "execute_sql_ratio": round(execute_sql_ratio, 2),
             }
             if parse_quality is not None:
                 payload["parse_quality"] = parse_quality
@@ -191,14 +189,14 @@ def gain_cmd(
                     console.print(f"  {i}. {name}: {count}")
             console.print()
-            # Section E: execute_cypher ratio
-            console.print("[bold cyan]E. Raw Cypher Usage[/bold cyan]")
-            ratio_pct = execute_cypher_ratio * 100
-            if execute_cypher_ratio > 0.3:
-                msg = f"  [yellow]execute_cypher: {ratio_pct:.1f}% (high raw-Cypher usage)[/yellow]"
+            # Section E: execute_sql ratio
+            console.print("[bold cyan]E. Raw SQL Usage[/bold cyan]")
+            ratio_pct = execute_sql_ratio * 100
+            if execute_sql_ratio > 0.3:
+                msg = f"  [yellow]execute_sql: {ratio_pct:.1f}% (high raw-SQL usage)[/yellow]"
                 console.print(msg)
             else:
-                console.print(f"  execute_cypher: {ratio_pct:.1f}%")
+                console.print(f"  execute_sql: {ratio_pct:.1f}%")
             console.print()
             # Section F: parse quality from graph

sqlcg/cli/commands/index.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Index command for scanning and indexing SQL files."""
 import json
-import os
 import socket as _socket
 from pathlib import Path
@@ -16,7 +15,7 @@ from rich.progress import (
     TimeRemainingColumn,
 )
-from sqlcg.core.config import KuzuConfig, config_file_present, get_backend, get_db_path, get_dialect
+from sqlcg.core.config import DbConfig, config_file_present, get_backend, get_db_path, get_dialect
 from sqlcg.indexer.indexer import Indexer
 console = Console()
@@ -37,17 +36,11 @@ def index_cmd(  # noqa: B008
     timeout_per_file: int = typer.Option(  # noqa: B008
         10, "--timeout-per-file", help="Timeout per file in seconds"
     ),
-    buffer_pool_size: int = typer.Option(  # noqa: B008
-        0,
-        "--buffer-pool-size",
-        help="KuzuDB buffer pool size in MB (0 = default). "
-        "Set to 256-512 on memory-constrained machines.",
-    ),
     batch_size: int = typer.Option(  # noqa: B008
         50,
         "--batch-size",
         help=(
-            "Files per KuzuDB transaction in the upsert pass. "
+            "Files per DuckDB transaction in the upsert pass. "
             "Default 50 balances commit-overhead reduction (vs. legacy per-file commits) "
             "against per-batch memory cost. Lower values are safer for memory-constrained "
             "machines; higher values give marginal speedup at the cost of larger working sets. "
@@ -148,16 +141,12 @@ def index_cmd(  # noqa: B008
         sqlcg_log.addHandler(_warn_handler)
         _warn_log_path = None
     else:
-        _warn_log_path = KuzuConfig.from_env().log_path
+        _warn_log_path = DbConfig.from_env().log_path
         _warn_log_path.parent.mkdir(parents=True, exist_ok=True)
         _warn_handler = logging.FileHandler(_warn_log_path)
         _warn_handler.setLevel(logging.WARNING)
         sqlcg_log.addHandler(_warn_handler)
-    # Set buffer pool size via env var if specified
-    if buffer_pool_size > 0:
-        os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
     if not quiet and not config_file_present(path):
         console.print(
             f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
@@ -181,7 +170,7 @@ def index_cmd(  # noqa: B008
         )
     except KeyboardInterrupt:
         # The backend context manager (inside _run_index) has already closed the
-        # KuzuDB connection and released the lock by the time we get here.
+        # DuckDB connection and released the lock by the time we get here.
         console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
         raise typer.Exit(130) from None
     finally:
@@ -400,10 +389,10 @@ def _run_index(
             )
         # Connect files to repo
+        from sqlcg.core.queries import INDEX_REPO_FILES_QUERY
         from sqlcg.core.schema import RelType
-        files_query = "MATCH (f:File) WHERE f.path STARTS WITH $repo_prefix RETURN f.path AS path"
-        file_rows = backend.run_read(files_query, {"repo_prefix": abs_path})
+        file_rows = backend.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": abs_path})
         for row in file_rows:
             backend.upsert_edge(
                 NodeLabel.FILE,

sqlcg/cli/commands/reindex.py CHANGED Viewed

@@ -21,7 +21,7 @@ console = Console()
 # Client-side socket timeout for the --notify control-socket path.
 # A real DWH server-side resync_changed measured ~89 s (41 changed files + closure);
 # 300 s covers that with headroom while keeping the wait bounded on a wedged server.
-# This is a CLI transport bound, NOT a KuzuConfig/indexer constant.
+# This is a CLI transport bound, NOT a DbConfig/indexer constant.
 _NOTIFY_SOCKET_TIMEOUT_S = 300

sqlcg/cli/commands/uninstall.py CHANGED Viewed

@@ -26,7 +26,7 @@ def uninstall_cmd(  # noqa: B008
     """Uninstall sqlcg from Claude Code and optionally clean up resources.
     Step 1: Remove MCP registration from ~/.claude/settings.json
-    Step 2: Optionally delete the KùzuDB graph database
+    Step 2: Optionally delete the DuckDB graph database
     Step 3: Remove git hook sentinel block from .git/hooks/post-checkout
     Step 4: Remove sqlcg skill directory from ~/.claude/skills/sqlcg/ and
             <repo>/.claude/skills/sqlcg/
@@ -34,7 +34,7 @@ def uninstall_cmd(  # noqa: B008
     # Step 1: Remove MCP entry from settings.json
     _step1_remove_mcp_entry()
-    # Step 2: Offer to delete the KùzuDB (unless --keep-db flag is set)
+    # Step 2: Offer to delete the database (unless --keep-db flag is set)
     if not keep_db:
         _step2_delete_database(force)
     else:
@@ -84,7 +84,7 @@ def _step1_remove_mcp_entry() -> None:
 def _step2_delete_database(force: bool) -> None:
-    """Offer to delete the KùzuDB graph database."""
+    """Offer to delete the DuckDB graph database."""
     db_path = _get_db_path()
     if not db_path:
@@ -93,13 +93,6 @@ def _step2_delete_database(force: bool) -> None:
     db_path_obj = Path(db_path)
-    # Check if it's a kuzu backend (not Neo4j)
-    # If db_path is a directory or ends with standard kuzu patterns, it's likely kuzu
-    # For now, we'll assume anything in .sqlcg/kuzu is kuzu
-    if not _is_kuzu_backend(db_path):
-        console.print("[dim]Database is not KùzuDB — skipping deletion[/dim]")
-        return
     if not db_path_obj.exists():
         console.print(f"[dim]Database not found at {db_path}[/dim]")
         return
@@ -117,9 +110,11 @@ def _step2_delete_database(force: bool) -> None:
         console.print("[dim]Keeping database[/dim]")
         return
-    # Delete the database directory
+    # DuckDB is a single file (+ optional .wal sibling); delete both.
     try:
-        shutil.rmtree(db_path_obj, ignore_errors=True)
+        for target in (db_path_obj, db_path_obj.with_name(db_path_obj.name + ".wal")):
+            if target.exists():
+                target.unlink()
         console.print(f"[green]Deleted graph database at {db_path}[/green]")
     except Exception as e:
         console.print(f"[yellow]Warning:[/yellow] Failed to delete database: {e}")
@@ -222,18 +217,12 @@ def _step3_remove_git_hook(repo_path: Path) -> None:
 def _get_db_path() -> str | None:
     """Get the configured database path from environment or default."""
-    from sqlcg.core.config import KuzuConfig
+    from sqlcg.core.config import DbConfig
-    db_path = str(KuzuConfig.from_env().db_path)
+    db_path = str(DbConfig.from_env().db_path)
     return db_path if Path(db_path).exists() else None
-def _is_kuzu_backend(db_path: str) -> bool:
-    """Check if the database is a KùzuDB backend (not Neo4j)."""
-    backend = os.getenv("SQLCG_BACKEND", "kuzu").lower()
-    return backend in ("kuzu", "")  # Default to kuzu if unset
 # Candidate skill directory locations to remove (global first, then project-relative)
 # Each entry is a callable(repo_path) -> Path resolving to the sqlcg skill dir.
 _SKILL_DIR_TARGETS = [

sqlcg/core/__init__.py CHANGED Viewed

@@ -2,7 +2,5 @@
 from sqlcg.core import schema
 from sqlcg.core.graph_db import GraphBackend
-from sqlcg.core.kuzu_backend import KuzuBackend
-from sqlcg.core.neo4j_backend import Neo4jBackend
-__all__ = ["GraphBackend", "KuzuBackend", "Neo4jBackend", "schema"]
+__all__ = ["GraphBackend", "schema"]

sqlcg/core/config.py CHANGED Viewed

@@ -11,64 +11,37 @@ if TYPE_CHECKING:
     from sqlcg.core.graph_db import GraphBackend
-class KuzuConfig(BaseModel):
-    """Configuration for KùzuDB backend."""
+class DbConfig(BaseModel):
+    """Configuration for the DuckDB backend."""
     db_path: Path = Field(default_factory=lambda: Path.home() / ".sqlcg" / "graph.db")
-    buffer_pool_size_mb: int = Field(
-        default=0,
-        description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
-    )
     log_path: Path = Field(
         default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
         description="Path for parse-warning log file written during indexing",
     )
     @classmethod
-    def from_env(cls) -> "KuzuConfig":
-        """Load KùzuDB config from environment variables.
+    def from_env(cls) -> "DbConfig":
+        """Load database config from environment variables.
         Returns:
-            KuzuConfig instance with environment-overridden values if present.
+            DbConfig instance with environment-overridden values if present.
         """
         env_path = os.getenv("SQLCG_DB_PATH")
-        env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
         env_log = os.getenv("SQLCG_LOG_PATH")
         return cls(
             db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
-            buffer_pool_size_mb=int(env_buf) if env_buf else 0,
             log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
         )
-class Neo4jConfig(BaseModel):
-    """Configuration for Neo4j backend."""
-    uri: str = Field(default="bolt://localhost:7687")
-    user: str = Field(default="neo4j")
-    password: str = Field(default="password")
-    @classmethod
-    def from_env(cls) -> "Neo4jConfig":
-        """Load Neo4j config from environment variables.
-        Returns:
-            Neo4jConfig instance with environment-overridden values if present.
-        """
-        return cls(
-            uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
-            user=os.getenv("NEO4J_USER", "neo4j"),
-            password=os.getenv("NEO4J_PASSWORD", "password"),
-        )
 def get_db_path() -> Path:
     """Get the database path from environment or use default.
     Returns:
-        Path to the KùzuDB database file
+        Path to the DuckDB database file
     """
-    return KuzuConfig.from_env().db_path
+    return DbConfig.from_env().db_path
 def config_file_present(path: Path) -> bool:
@@ -347,58 +320,29 @@ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
 def get_backend(read_only: bool = False) -> "GraphBackend":
-    """Get a graph backend instance respecting the SQLCG_BACKEND env var.
+    """Get a DuckDBBackend instance.
+    The ``read_only`` parameter is accepted for API compatibility but is
+    ignored — DuckDB uses a single R/W handle for the process lifetime.
+    Concurrent read safety is provided by DuckDB's MVCC (readers see a
+    consistent snapshot during an in-flight write transaction).
+    Cross-process access: whichever process opens the DuckDB file first holds
+    an exclusive lock; other processes cannot open it at all (even read-only).
+    CLI read commands therefore route through the live MCP server via
+    ``read_client.run_read_routed`` (v1.2.0) when a server is live, and open
+    the file directly only when no server is running.
     Args:
-        read_only: Open the database in read-only mode. For KuzuBackend this
-            enables multiple concurrent read-only opens (reader/reader
-            concurrency), but does NOT allow reads while a read-write writer
-            holds the exclusive process lock — that requires routing through the
-            live MCP server via ``read_client.run_read_routed`` (v1.2.0).
-            Ignored for Neo4jBackend (Neo4j has no single-writer process lock;
-            the flag is a no-op and the normal connection is opened).
-            All writer call sites (index, reindex, db init/reset, server
-            init_backend) use the default ``False``.
+        read_only: Ignored for DuckDB. Accepted for API compatibility.
     Returns:
-        A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
+        A DuckDBBackend instance.
     Raises:
-        ValueError: If backend type is not recognized
-    Note:
-        CLI read commands (find, analyze, db info, gain) route through a live
-        MCP server via ``read_client.run_read_routed`` (v1.2.0) when a server
-        is live, falling back to ``get_backend(read_only=True)`` when no server
-        is present. The fallback path still contends for the process lock under
-        an active writer (Windows / no-server fallback only).
+        duckdb.IOException: If the file is locked by another process.
     """
-    backend_type = os.getenv("SQLCG_BACKEND", "kuzu")
-    if backend_type == "kuzu":
-        from sqlcg.core.kuzu_backend import KuzuBackend
+    from sqlcg.core.duckdb_backend import DuckDBBackend
-        kuzu_cfg = KuzuConfig.from_env()
-        try:
-            return KuzuBackend(
-                str(kuzu_cfg.db_path),
-                buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
-                read_only=read_only,
-            )
-        except RuntimeError as exc:
-            if read_only and "READ ONLY" in str(exc):
-                # KùzuDB refuses to open a non-existent or empty DB in read-only
-                # mode ("Cannot create an empty database under READ ONLY mode").
-                # Surface the same empty-DB guidance the user sees from `db info`.
-                raise RuntimeError(
-                    "Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
-                ) from exc
-            raise
-    elif backend_type == "neo4j":
-        from sqlcg.core.neo4j_backend import Neo4jBackend
-        neo4j_cfg = Neo4jConfig.from_env()
-        # read_only is ignored for Neo4j — no single-writer process lock.
-        return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
-    else:
-        raise ValueError(f"Unknown backend type: {backend_type}")
+    cfg = DbConfig.from_env()
+    return DuckDBBackend(str(cfg.db_path))

sql-code-graph 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

sql-code-graph 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl