PyPI - sql-code-graph - Versions diffs - 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

sql-code-graph 1.2.2py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
{sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +31 -30
sqlcg/__init__.py +1 -1
sqlcg/cli/commands/analyze.py +138 -127
sqlcg/cli/commands/db.py +49 -51
sqlcg/cli/commands/find.py +5 -9
sqlcg/cli/commands/gain.py +14 -16
sqlcg/cli/commands/git.py +11 -4
sqlcg/cli/commands/index.py +173 -21
sqlcg/cli/commands/mcp.py +70 -3
sqlcg/cli/commands/reindex.py +147 -77
sqlcg/cli/commands/uninstall.py +9 -20
sqlcg/core/__init__.py +1 -3
sqlcg/core/config.py +25 -81
sqlcg/core/duckdb_backend.py +764 -0
sqlcg/core/freshness.py +1 -1
sqlcg/core/graph_db.py +20 -4
sqlcg/core/queries.py +26 -7
sqlcg/core/queries.sql +249 -0
sqlcg/core/schema.py +1 -1
sqlcg/indexer/indexer.py +27 -36
sqlcg/metrics/store.py +49 -1
sqlcg/server/control.py +1 -1
sqlcg/server/noise_filter.py +1 -1
sqlcg/server/read_client.py +2 -2
sqlcg/server/server.py +184 -86
sqlcg/server/skill.py +2 -2
sqlcg/server/tools.py +119 -41
sqlcg/server/writer.py +459 -0
sqlcg/core/kuzu_backend.py +0 -445
sqlcg/core/neo4j_backend.py +0 -233
{sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
{sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0

sqlcg/server/tools.py CHANGED Viewed

@@ -6,11 +6,15 @@ import time
 from collections import deque
 from contextlib import contextmanager
 from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import anyio
 from sqlcg.core.config import get_db_path, get_presentation_prefixes
+from sqlcg.core.duckdb_backend import DuckDBBackend
 from sqlcg.core.freshness import compute_freshness
 from sqlcg.core.graph_db import GraphBackend
-from sqlcg.core.kuzu_backend import KuzuBackend
 from sqlcg.core.queries import (
     ANALYZE_UNUSED_TABLES_QUERY,
     FIND_DEFINITION_QUERY,
@@ -94,32 +98,74 @@ from sqlcg.utils.logging import getLogger  # noqa: E402
 logger = getLogger(__name__)
-# Module-level singleton backend (KùzuDB single-writer model)
+# Module-level singleton backend (DuckDB single R/W handle for the process lifetime)
 _backend: GraphBackend | None = None
 # Module-level metrics store singleton
 _metrics: MetricsStore | None = None
+# Module-level backend lock — injected by server.py _run_with_control so that
+# MCP write tools (index_repo) share the same lock as the drain loop.
+# None when no server event-loop is running (unit tests, direct DB access).
+_backend_lock: "anyio.Lock | None" = None
+# The path that init_backend() actually opened.  Captured at init time so
+# MCP write tools use this path, not get_db_path() which returns the default
+# ~/.sqlcg/graph.db regardless of what was passed to init_backend.
+_init_db_path: str | None = None
+def _set_backend_lock(lock: "anyio.Lock | None") -> None:
+    """Register the backend lock from the server's task group.
+    Called by server.py _run_with_control so MCP write tools use the same
+    lock as the drain loop — ensuring no concurrent RW access.
+    """
+    global _backend_lock
+    _backend_lock = lock
 def init_backend(db_path: str | None = None) -> None:
     """Initialize the module-level backend singleton.
+    Startup sequence (OD-2 — measured on kuzu 0.11.3):
+      1. Open read-write → create schema if absent (init_schema is a no-op on
+         an already-initialized DB — it does NOT migrate).
+      2. Run the schema-version gate (Step 1.4): refuse non-zero if the stored
+         version differs from the current build's SCHEMA_VERSION.
+      3. Close the RW backend.
+      4. Reopen read-only and store as the serving singleton.
+    This ensures ``init_schema()`` — which issues DDL — never runs on the RO
+    connection (DDL raises on RO; ``Cannot create an empty database under READ
+    ONLY mode.`` is raised on a non-existent DB opened RO).
     Args:
-        db_path: Path to KùzuDB database. If None, uses get_db_path().
+        db_path: Path to DuckDB database. If None, uses get_db_path().
     Raises:
-        RuntimeError: If backend initialization fails
+        RuntimeError: If backend initialization fails or schema version
+            is stale (the caller must not swallow this — server must exit).
     """
-    global _backend, _metrics
+    global _backend, _metrics, _init_db_path
     path = db_path or str(get_db_path())
-    backend = KuzuBackend(path)
+    _init_db_path = path
+    # DuckDB: single R/W handle for the process lifetime — no RO/RW escalation.
+    # init_schema is idempotent; transaction() wraps the DDL in one commit.
+    rw_backend = DuckDBBackend(path)
     try:
-        backend.init_schema()
+        rw_backend.init_schema()
     except Exception as exc:
-        backend.close()
+        rw_backend.close()
         raise RuntimeError(f"Backend initialization failed: {exc}") from exc
-    _backend = backend
-    logger.debug(f"Backend initialized: {path}")
+    # Step 2 — schema-version gate (Step 1.4).
+    _assert_schema_current(rw_backend, path)
+    # DuckDB: the same handle is used for reads and writes (MVCC).
+    _backend = rw_backend
+    logger.debug(f"Backend initialized (DuckDB R/W): {path}")
     # Initialize metrics store (best-effort, failures are logged as WARNING)
     try:
@@ -136,7 +182,7 @@ def shutdown_backend() -> None:
     Closes the database connection and clears the global reference.
     Safe to call multiple times.
     """
-    global _backend, _metrics
+    global _backend, _metrics, _init_db_path
     if _backend is not None:
         _backend.close()
         _backend = None
@@ -144,6 +190,7 @@ def shutdown_backend() -> None:
     if _metrics is not None:
         _metrics.close()
         _metrics = None
+    _init_db_path = None
 def _get_backend() -> GraphBackend:
@@ -157,6 +204,31 @@ def _get_backend() -> GraphBackend:
     return _backend
+def _assert_schema_current(backend: GraphBackend, path: str) -> None:
+    """Refuse to start when the stored schema version differs from the current build.
+    Called inside the RW-ensure window of init_backend (Step 1.4) after
+    init_schema() has run the create-if-absent step.
+    Args:
+        backend: An open (RW) backend to query.
+        path: The db_path string — included in the error message for context.
+    Raises:
+        RuntimeError: Stored version present and != current SCHEMA_VERSION.
+            Message names both versions and the sqlcg db reset remedy.
+    """
+    from sqlcg.core.schema import SCHEMA_VERSION
+    stored = backend.get_schema_version()
+    if stored is not None and stored != SCHEMA_VERSION:
+        msg = (
+            f"Database schema is v{stored}, but this build expects v{SCHEMA_VERSION} — "
+            f"run 'sqlcg db reset && sqlcg index <path>' to re-index."
+        )
+        raise RuntimeError(msg)
 @contextmanager
 def _open_backend():
     """Context manager to get the initialized backend.
@@ -183,11 +255,11 @@ def _assert_indexed(db: GraphBackend) -> None:
     Raises:
         NotIndexedError: If no repos or files have been indexed
     """
-    rows = db.run_read("MATCH (r:Repo) RETURN count(r) AS n", {})
+    rows = db.run_read('SELECT count(*) AS n FROM "Repo"', {})
     if rows and rows[0]["n"] > 0:
         return
     # Fallback: accept a graph with File nodes but no Repo (test-only or partial state).
-    file_rows = db.run_read("MATCH (f:File) RETURN count(f) AS n", {})
+    file_rows = db.run_read('SELECT count(*) AS n FROM "File"', {})
     if file_rows and file_rows[0]["n"] > 0:
         logger.debug(
             "File nodes present but no Repo node — accepting as test-only/partial graph; "
@@ -214,7 +286,7 @@ def _indexed_root(db: GraphBackend) -> Path | None:
         Absolute Path of the indexed root, or None if unavailable.
     """
     try:
-        rows = db.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
+        rows = db.run_read('SELECT path FROM "Repo" LIMIT 1', {})
         if rows and rows[0].get("path"):
             return Path(rows[0]["path"])
     except Exception:
@@ -364,7 +436,10 @@ def _kahn_topological_sort(affected_tables: list[str], db: GraphBackend) -> tupl
     indegree: dict[str, int] = {t: 0 for t in affected_tables}
     for table in affected_tables:
-        rows = db.run_read(GET_TABLE_DIRECT_UPSTREAMS_QUERY, {"table_qualified": table})
+        rows = db.run_read(
+            GET_TABLE_DIRECT_UPSTREAMS_QUERY,
+            {"table_qualified": table, "table_qualified2": table},
+        )
         for row in rows:
             src = row["upstream_table"]
             if src in table_set and src != table and table not in successors[src]:
@@ -462,19 +537,21 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
     success = True
     try:
-        db = _get_backend()
-        indexer = Indexer()
         path = Path(repo_path).resolve()
         if not path.exists():
             raise ValueError(f"Repository path does not exist: {repo_path}")
         if not path.is_dir():
             raise ValueError(f"Repository path is not a directory: {repo_path}")
+        # DuckDB: single R/W handle for the process lifetime — use directly.
+        rw_db = _get_backend()
+        indexer = Indexer()
         # Ensure the Repo node exists for this repository
         from sqlcg.core.schema import NodeLabel, RelType
         abs_path = str(path)
-        db.upsert_node(
+        rw_db.upsert_node(
             NodeLabel.REPO,
             abs_path,
             {
@@ -484,14 +561,14 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
         )
         # Index the repository (with absolute path)
-        result = indexer.index_repo(path, dialect, db)
+        result = indexer.index_repo(path, dialect, rw_db)
         # Create BELONGS_TO relationships from File nodes to Repo node
         # Query for all File nodes in this repo and link them to the Repo
         repo_prefix = abs_path.rstrip("/") + "/"
-        file_rows = db.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": repo_prefix})
+        file_rows = rw_db.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": repo_prefix})
         for row in file_rows:
-            db.upsert_edge(
+            rw_db.upsert_edge(
                 NodeLabel.FILE,
                 row["path"],
                 NodeLabel.REPO,
@@ -499,7 +576,6 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
                 RelType.BELONGS_TO,
                 {},
             )
         logger.info(f"Indexed {result['files_parsed']} files with {result['tables_found']} tables")
         # Record metrics
@@ -831,7 +907,10 @@ def get_change_scope(table_qualified: str) -> ChangeScopeResult:
         def_rows = db.run_read(GET_TABLE_DEFINING_FILES_QUERY, {"table_qualified": target})
         defining_files = _dedup_preserve_order([r["file_path"] for r in def_rows])
-        up_rows = db.run_read(GET_TABLE_DIRECT_UPSTREAMS_QUERY, {"table_qualified": target})
+        up_rows = db.run_read(
+            GET_TABLE_DIRECT_UPSTREAMS_QUERY,
+            {"table_qualified": target, "table_qualified2": target},
+        )
         upstream_raw = _dedup_preserve_order(
             [r["upstream_table"] for r in up_rows if r["upstream_table"]]
         )
@@ -1464,14 +1543,15 @@ def db_info() -> DbInfoResult:
     node_counts: dict[str, int] = {}
     for label in NodeLabel:
-        result = db.run_read(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
+        result = db.run_read(f'SELECT count(*) AS count FROM "{label}"', {})
         node_counts[str(label)] = result[0]["count"] if result else 0
-    edges_result = db.run_read("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
+    edges_result = db.run_read('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
     column_lineage_edges = edges_result[0]["count"] if edges_result else 0
     mode_rows = db.run_read(
-        "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC",
+        'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery" '
+        "GROUP BY parsing_mode ORDER BY cnt DESC",
         {},
     )
     parse_quality: dict[str, int] = {}
@@ -1503,7 +1583,7 @@ def db_info() -> DbInfoResult:
     _freshness_kwargs: dict = {}
     try:
         _indexed_sha = db.get_indexed_sha()
-        _repo_rows = db.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
+        _repo_rows = db.run_read('SELECT path FROM "Repo" LIMIT 1', {})
         if _repo_rows and _indexed_sha is not None and _repo_rows[0].get("path"):
             _root = Path(_repo_rows[0]["path"])
             _f = compute_freshness(_root, _indexed_sha)
@@ -1533,11 +1613,11 @@ def db_info() -> DbInfoResult:
 @mcp.tool()
-@_timed_tool("execute_cypher")
-def execute_cypher(query: str) -> list[dict]:
-    """Execute a read-only Cypher query against the graph.
+@_timed_tool("execute_sql")
+def execute_sql(query: str) -> list[dict]:
+    """Execute a read-only SQL query against the graph (DuckDB).
-    This tool allows direct Cypher queries for advanced users. It enforces
+    This tool allows direct SQL queries for advanced users. It enforces
     read-only mode by stripping quoted literals and checking for write
     operation keywords. A LIMIT clause is automatically appended if missing.
@@ -1548,31 +1628,29 @@ def execute_cypher(query: str) -> list[dict]:
     that contains such keywords.
     Args:
-        query: Cypher query string (read-only)
+        query: DuckDB SQL query string (read-only SELECT only)
     Returns:
         List of result dictionaries from the query
     Raises:
-        ValueError: If the query contains write operations (CREATE, MERGE,
-                   DELETE, SET, REMOVE, DROP, TRUNCATE)
+        ValueError: If the query contains write operations (INSERT, UPDATE,
+                   DELETE, CREATE, DROP, TRUNCATE, MERGE)
     """
     db = _get_backend()
     # Strip quoted string literals before blocklist check
-    # This prevents mutation commands hiding inside strings from triggering the blocker
-    # Handle escaped quotes: '' in single quotes, "" in double quotes
     stripped = re.sub(r"'(?:''|[^'])*'", "", query)
     stripped = re.sub(r'"(?:""|[^"])*"', "", stripped)
     # Check for write operations (case-insensitive)
     if re.search(
-        r"\b(CREATE|MERGE|DELETE|SET|REMOVE|DROP|TRUNCATE)\b",
+        r"\b(INSERT|UPDATE|DELETE|CREATE|MERGE|DROP|TRUNCATE)\b",
         stripped,
         re.IGNORECASE,
     ):
         raise ValueError(
-            "Write operations are not permitted via execute_cypher. "
+            "Write operations are not permitted via execute_sql. "
             "Use the CLI or dedicated tools instead."
         )
@@ -1580,13 +1658,13 @@ def execute_cypher(query: str) -> list[dict]:
     q = query.rstrip()
     if q.endswith(";"):
         q = q[:-1].rstrip()
-    if "limit" not in stripped.lower():  # use stripped, not q.lower()
+    if "limit" not in stripped.lower():
         q = q + " LIMIT 500"
     try:
         return db.run_read(q, {})
     except Exception as e:
-        logger.error(f"Cypher execution failed: {e}")
+        logger.error(f"SQL execution failed: {e}")
         raise
@@ -1677,7 +1755,7 @@ def analyze_unused() -> UnusedTablesResult:
         # Single aggregation — no Python per-row graph traversal.
         unused_rows = db.run_read(ANALYZE_UNUSED_TABLES_QUERY, {})
-        total_rows = db.run_read("MATCH (t:SqlTable) RETURN count(t) AS n", {})
+        total_rows = db.run_read('SELECT count(*) AS n FROM "SqlTable"', {})
         total_tables_scanned = total_rows[0]["n"] if total_rows else 0
         prefixes = get_presentation_prefixes(root)

sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

sql-code-graph 1.2.2py3-none-any.whl → 1.4.0py3-none-any.whl