sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/server/tools.py CHANGED
@@ -6,11 +6,15 @@ import time
6
6
  from collections import deque
7
7
  from contextlib import contextmanager
8
8
  from pathlib import Path
9
+ from typing import TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ import anyio
9
13
 
10
14
  from sqlcg.core.config import get_db_path, get_presentation_prefixes
15
+ from sqlcg.core.duckdb_backend import DuckDBBackend
11
16
  from sqlcg.core.freshness import compute_freshness
12
17
  from sqlcg.core.graph_db import GraphBackend
13
- from sqlcg.core.kuzu_backend import KuzuBackend
14
18
  from sqlcg.core.queries import (
15
19
  ANALYZE_UNUSED_TABLES_QUERY,
16
20
  FIND_DEFINITION_QUERY,
@@ -94,32 +98,74 @@ from sqlcg.utils.logging import getLogger # noqa: E402
94
98
 
95
99
  logger = getLogger(__name__)
96
100
 
97
- # Module-level singleton backend (KùzuDB single-writer model)
101
+ # Module-level singleton backend (DuckDB single R/W handle for the process lifetime)
98
102
  _backend: GraphBackend | None = None
99
103
 
100
104
  # Module-level metrics store singleton
101
105
  _metrics: MetricsStore | None = None
102
106
 
107
+ # Module-level backend lock — injected by server.py _run_with_control so that
108
+ # MCP write tools (index_repo) share the same lock as the drain loop.
109
+ # None when no server event-loop is running (unit tests, direct DB access).
110
+ _backend_lock: "anyio.Lock | None" = None
111
+
112
+ # The path that init_backend() actually opened. Captured at init time so
113
+ # MCP write tools use this path, not get_db_path() which returns the default
114
+ # ~/.sqlcg/graph.db regardless of what was passed to init_backend.
115
+ _init_db_path: str | None = None
116
+
117
+
118
+ def _set_backend_lock(lock: "anyio.Lock | None") -> None:
119
+ """Register the backend lock from the server's task group.
120
+
121
+ Called by server.py _run_with_control so MCP write tools use the same
122
+ lock as the drain loop — ensuring no concurrent RW access.
123
+ """
124
+ global _backend_lock
125
+ _backend_lock = lock
126
+
103
127
 
104
128
  def init_backend(db_path: str | None = None) -> None:
105
129
  """Initialize the module-level backend singleton.
106
130
 
131
+ Startup sequence (OD-2 — measured on kuzu 0.11.3):
132
+ 1. Open read-write → create schema if absent (init_schema is a no-op on
133
+ an already-initialized DB — it does NOT migrate).
134
+ 2. Run the schema-version gate (Step 1.4): refuse non-zero if the stored
135
+ version differs from the current build's SCHEMA_VERSION.
136
+ 3. Close the RW backend.
137
+ 4. Reopen read-only and store as the serving singleton.
138
+
139
+ This ensures ``init_schema()`` — which issues DDL — never runs on the RO
140
+ connection (DDL raises on RO; ``Cannot create an empty database under READ
141
+ ONLY mode.`` is raised on a non-existent DB opened RO).
142
+
107
143
  Args:
108
- db_path: Path to KùzuDB database. If None, uses get_db_path().
144
+ db_path: Path to DuckDB database. If None, uses get_db_path().
109
145
 
110
146
  Raises:
111
- RuntimeError: If backend initialization fails
147
+ RuntimeError: If backend initialization fails or schema version
148
+ is stale (the caller must not swallow this — server must exit).
112
149
  """
113
- global _backend, _metrics
150
+ global _backend, _metrics, _init_db_path
114
151
  path = db_path or str(get_db_path())
115
- backend = KuzuBackend(path)
152
+ _init_db_path = path
153
+
154
+ # DuckDB: single R/W handle for the process lifetime — no RO/RW escalation.
155
+ # init_schema is idempotent; transaction() wraps the DDL in one commit.
156
+ rw_backend = DuckDBBackend(path)
116
157
  try:
117
- backend.init_schema()
158
+ rw_backend.init_schema()
118
159
  except Exception as exc:
119
- backend.close()
160
+ rw_backend.close()
120
161
  raise RuntimeError(f"Backend initialization failed: {exc}") from exc
121
- _backend = backend
122
- logger.debug(f"Backend initialized: {path}")
162
+
163
+ # Step 2 — schema-version gate (Step 1.4).
164
+ _assert_schema_current(rw_backend, path)
165
+
166
+ # DuckDB: the same handle is used for reads and writes (MVCC).
167
+ _backend = rw_backend
168
+ logger.debug(f"Backend initialized (DuckDB R/W): {path}")
123
169
 
124
170
  # Initialize metrics store (best-effort, failures are logged as WARNING)
125
171
  try:
@@ -136,7 +182,7 @@ def shutdown_backend() -> None:
136
182
  Closes the database connection and clears the global reference.
137
183
  Safe to call multiple times.
138
184
  """
139
- global _backend, _metrics
185
+ global _backend, _metrics, _init_db_path
140
186
  if _backend is not None:
141
187
  _backend.close()
142
188
  _backend = None
@@ -144,6 +190,7 @@ def shutdown_backend() -> None:
144
190
  if _metrics is not None:
145
191
  _metrics.close()
146
192
  _metrics = None
193
+ _init_db_path = None
147
194
 
148
195
 
149
196
  def _get_backend() -> GraphBackend:
@@ -157,6 +204,31 @@ def _get_backend() -> GraphBackend:
157
204
  return _backend
158
205
 
159
206
 
207
+ def _assert_schema_current(backend: GraphBackend, path: str) -> None:
208
+ """Refuse to start when the stored schema version differs from the current build.
209
+
210
+ Called inside the RW-ensure window of init_backend (Step 1.4) after
211
+ init_schema() has run the create-if-absent step.
212
+
213
+ Args:
214
+ backend: An open (RW) backend to query.
215
+ path: The db_path string — included in the error message for context.
216
+
217
+ Raises:
218
+ RuntimeError: Stored version present and != current SCHEMA_VERSION.
219
+ Message names both versions and the sqlcg db reset remedy.
220
+ """
221
+ from sqlcg.core.schema import SCHEMA_VERSION
222
+
223
+ stored = backend.get_schema_version()
224
+ if stored is not None and stored != SCHEMA_VERSION:
225
+ msg = (
226
+ f"Database schema is v{stored}, but this build expects v{SCHEMA_VERSION} — "
227
+ f"run 'sqlcg db reset && sqlcg index <path>' to re-index."
228
+ )
229
+ raise RuntimeError(msg)
230
+
231
+
160
232
  @contextmanager
161
233
  def _open_backend():
162
234
  """Context manager to get the initialized backend.
@@ -183,11 +255,11 @@ def _assert_indexed(db: GraphBackend) -> None:
183
255
  Raises:
184
256
  NotIndexedError: If no repos or files have been indexed
185
257
  """
186
- rows = db.run_read("MATCH (r:Repo) RETURN count(r) AS n", {})
258
+ rows = db.run_read('SELECT count(*) AS n FROM "Repo"', {})
187
259
  if rows and rows[0]["n"] > 0:
188
260
  return
189
261
  # Fallback: accept a graph with File nodes but no Repo (test-only or partial state).
190
- file_rows = db.run_read("MATCH (f:File) RETURN count(f) AS n", {})
262
+ file_rows = db.run_read('SELECT count(*) AS n FROM "File"', {})
191
263
  if file_rows and file_rows[0]["n"] > 0:
192
264
  logger.debug(
193
265
  "File nodes present but no Repo node — accepting as test-only/partial graph; "
@@ -214,7 +286,7 @@ def _indexed_root(db: GraphBackend) -> Path | None:
214
286
  Absolute Path of the indexed root, or None if unavailable.
215
287
  """
216
288
  try:
217
- rows = db.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
289
+ rows = db.run_read('SELECT path FROM "Repo" LIMIT 1', {})
218
290
  if rows and rows[0].get("path"):
219
291
  return Path(rows[0]["path"])
220
292
  except Exception:
@@ -364,7 +436,10 @@ def _kahn_topological_sort(affected_tables: list[str], db: GraphBackend) -> tupl
364
436
  indegree: dict[str, int] = {t: 0 for t in affected_tables}
365
437
 
366
438
  for table in affected_tables:
367
- rows = db.run_read(GET_TABLE_DIRECT_UPSTREAMS_QUERY, {"table_qualified": table})
439
+ rows = db.run_read(
440
+ GET_TABLE_DIRECT_UPSTREAMS_QUERY,
441
+ {"table_qualified": table, "table_qualified2": table},
442
+ )
368
443
  for row in rows:
369
444
  src = row["upstream_table"]
370
445
  if src in table_set and src != table and table not in successors[src]:
@@ -462,19 +537,21 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
462
537
  success = True
463
538
 
464
539
  try:
465
- db = _get_backend()
466
- indexer = Indexer()
467
540
  path = Path(repo_path).resolve()
468
541
  if not path.exists():
469
542
  raise ValueError(f"Repository path does not exist: {repo_path}")
470
543
  if not path.is_dir():
471
544
  raise ValueError(f"Repository path is not a directory: {repo_path}")
472
545
 
546
+ # DuckDB: single R/W handle for the process lifetime — use directly.
547
+ rw_db = _get_backend()
548
+
549
+ indexer = Indexer()
473
550
  # Ensure the Repo node exists for this repository
474
551
  from sqlcg.core.schema import NodeLabel, RelType
475
552
 
476
553
  abs_path = str(path)
477
- db.upsert_node(
554
+ rw_db.upsert_node(
478
555
  NodeLabel.REPO,
479
556
  abs_path,
480
557
  {
@@ -484,14 +561,14 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
484
561
  )
485
562
 
486
563
  # Index the repository (with absolute path)
487
- result = indexer.index_repo(path, dialect, db)
564
+ result = indexer.index_repo(path, dialect, rw_db)
488
565
 
489
566
  # Create BELONGS_TO relationships from File nodes to Repo node
490
567
  # Query for all File nodes in this repo and link them to the Repo
491
568
  repo_prefix = abs_path.rstrip("/") + "/"
492
- file_rows = db.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": repo_prefix})
569
+ file_rows = rw_db.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": repo_prefix})
493
570
  for row in file_rows:
494
- db.upsert_edge(
571
+ rw_db.upsert_edge(
495
572
  NodeLabel.FILE,
496
573
  row["path"],
497
574
  NodeLabel.REPO,
@@ -499,7 +576,6 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
499
576
  RelType.BELONGS_TO,
500
577
  {},
501
578
  )
502
-
503
579
  logger.info(f"Indexed {result['files_parsed']} files with {result['tables_found']} tables")
504
580
 
505
581
  # Record metrics
@@ -831,7 +907,10 @@ def get_change_scope(table_qualified: str) -> ChangeScopeResult:
831
907
  def_rows = db.run_read(GET_TABLE_DEFINING_FILES_QUERY, {"table_qualified": target})
832
908
  defining_files = _dedup_preserve_order([r["file_path"] for r in def_rows])
833
909
 
834
- up_rows = db.run_read(GET_TABLE_DIRECT_UPSTREAMS_QUERY, {"table_qualified": target})
910
+ up_rows = db.run_read(
911
+ GET_TABLE_DIRECT_UPSTREAMS_QUERY,
912
+ {"table_qualified": target, "table_qualified2": target},
913
+ )
835
914
  upstream_raw = _dedup_preserve_order(
836
915
  [r["upstream_table"] for r in up_rows if r["upstream_table"]]
837
916
  )
@@ -1464,14 +1543,15 @@ def db_info() -> DbInfoResult:
1464
1543
 
1465
1544
  node_counts: dict[str, int] = {}
1466
1545
  for label in NodeLabel:
1467
- result = db.run_read(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
1546
+ result = db.run_read(f'SELECT count(*) AS count FROM "{label}"', {})
1468
1547
  node_counts[str(label)] = result[0]["count"] if result else 0
1469
1548
 
1470
- edges_result = db.run_read("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
1549
+ edges_result = db.run_read('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
1471
1550
  column_lineage_edges = edges_result[0]["count"] if edges_result else 0
1472
1551
 
1473
1552
  mode_rows = db.run_read(
1474
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC",
1553
+ 'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery" '
1554
+ "GROUP BY parsing_mode ORDER BY cnt DESC",
1475
1555
  {},
1476
1556
  )
1477
1557
  parse_quality: dict[str, int] = {}
@@ -1503,7 +1583,7 @@ def db_info() -> DbInfoResult:
1503
1583
  _freshness_kwargs: dict = {}
1504
1584
  try:
1505
1585
  _indexed_sha = db.get_indexed_sha()
1506
- _repo_rows = db.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
1586
+ _repo_rows = db.run_read('SELECT path FROM "Repo" LIMIT 1', {})
1507
1587
  if _repo_rows and _indexed_sha is not None and _repo_rows[0].get("path"):
1508
1588
  _root = Path(_repo_rows[0]["path"])
1509
1589
  _f = compute_freshness(_root, _indexed_sha)
@@ -1533,11 +1613,11 @@ def db_info() -> DbInfoResult:
1533
1613
 
1534
1614
 
1535
1615
  @mcp.tool()
1536
- @_timed_tool("execute_cypher")
1537
- def execute_cypher(query: str) -> list[dict]:
1538
- """Execute a read-only Cypher query against the graph.
1616
+ @_timed_tool("execute_sql")
1617
+ def execute_sql(query: str) -> list[dict]:
1618
+ """Execute a read-only SQL query against the graph (DuckDB).
1539
1619
 
1540
- This tool allows direct Cypher queries for advanced users. It enforces
1620
+ This tool allows direct SQL queries for advanced users. It enforces
1541
1621
  read-only mode by stripping quoted literals and checking for write
1542
1622
  operation keywords. A LIMIT clause is automatically appended if missing.
1543
1623
 
@@ -1548,31 +1628,29 @@ def execute_cypher(query: str) -> list[dict]:
1548
1628
  that contains such keywords.
1549
1629
 
1550
1630
  Args:
1551
- query: Cypher query string (read-only)
1631
+ query: DuckDB SQL query string (read-only SELECT only)
1552
1632
 
1553
1633
  Returns:
1554
1634
  List of result dictionaries from the query
1555
1635
 
1556
1636
  Raises:
1557
- ValueError: If the query contains write operations (CREATE, MERGE,
1558
- DELETE, SET, REMOVE, DROP, TRUNCATE)
1637
+ ValueError: If the query contains write operations (INSERT, UPDATE,
1638
+ DELETE, CREATE, DROP, TRUNCATE, MERGE)
1559
1639
  """
1560
1640
  db = _get_backend()
1561
1641
 
1562
1642
  # Strip quoted string literals before blocklist check
1563
- # This prevents mutation commands hiding inside strings from triggering the blocker
1564
- # Handle escaped quotes: '' in single quotes, "" in double quotes
1565
1643
  stripped = re.sub(r"'(?:''|[^'])*'", "", query)
1566
1644
  stripped = re.sub(r'"(?:""|[^"])*"', "", stripped)
1567
1645
 
1568
1646
  # Check for write operations (case-insensitive)
1569
1647
  if re.search(
1570
- r"\b(CREATE|MERGE|DELETE|SET|REMOVE|DROP|TRUNCATE)\b",
1648
+ r"\b(INSERT|UPDATE|DELETE|CREATE|MERGE|DROP|TRUNCATE)\b",
1571
1649
  stripped,
1572
1650
  re.IGNORECASE,
1573
1651
  ):
1574
1652
  raise ValueError(
1575
- "Write operations are not permitted via execute_cypher. "
1653
+ "Write operations are not permitted via execute_sql. "
1576
1654
  "Use the CLI or dedicated tools instead."
1577
1655
  )
1578
1656
 
@@ -1580,13 +1658,13 @@ def execute_cypher(query: str) -> list[dict]:
1580
1658
  q = query.rstrip()
1581
1659
  if q.endswith(";"):
1582
1660
  q = q[:-1].rstrip()
1583
- if "limit" not in stripped.lower(): # use stripped, not q.lower()
1661
+ if "limit" not in stripped.lower():
1584
1662
  q = q + " LIMIT 500"
1585
1663
 
1586
1664
  try:
1587
1665
  return db.run_read(q, {})
1588
1666
  except Exception as e:
1589
- logger.error(f"Cypher execution failed: {e}")
1667
+ logger.error(f"SQL execution failed: {e}")
1590
1668
  raise
1591
1669
 
1592
1670
 
@@ -1677,7 +1755,7 @@ def analyze_unused() -> UnusedTablesResult:
1677
1755
 
1678
1756
  # Single aggregation — no Python per-row graph traversal.
1679
1757
  unused_rows = db.run_read(ANALYZE_UNUSED_TABLES_QUERY, {})
1680
- total_rows = db.run_read("MATCH (t:SqlTable) RETURN count(t) AS n", {})
1758
+ total_rows = db.run_read('SELECT count(*) AS n FROM "SqlTable"', {})
1681
1759
  total_tables_scanned = total_rows[0]["n"] if total_rows else 0
1682
1760
 
1683
1761
  prefixes = get_presentation_prefixes(root)