sql-code-graph 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.3.0.dist-info}/METADATA +1 -1
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.3.0.dist-info}/RECORD +15 -14
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/db.py +23 -0
- sqlcg/cli/commands/git.py +11 -4
- sqlcg/cli/commands/index.py +167 -4
- sqlcg/cli/commands/mcp.py +70 -3
- sqlcg/cli/commands/reindex.py +146 -76
- sqlcg/core/kuzu_backend.py +10 -6
- sqlcg/metrics/store.py +48 -0
- sqlcg/server/server.py +165 -70
- sqlcg/server/tools.py +155 -14
- sqlcg/server/writer.py +634 -0
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.3.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.3.0.dist-info}/entry_points.txt +0 -0
sqlcg/server/server.py
CHANGED
|
@@ -79,34 +79,46 @@ async def _control_socket_task(
|
|
|
79
79
|
stop_event: "anyio.Event",
|
|
80
80
|
backend_lock: "anyio.Lock",
|
|
81
81
|
start_time: float,
|
|
82
|
+
writer_queue: "WriterQueue",
|
|
82
83
|
) -> None:
|
|
83
84
|
"""Accept control connections on ``<db>.sock`` and dispatch ops.
|
|
84
85
|
|
|
85
86
|
Supported ops:
|
|
86
87
|
|
|
87
|
-
- ``{"op": "status"}`` → running state, pid, db_path, freshness, uptime
|
|
88
|
-
|
|
88
|
+
- ``{"op": "status"}`` → running state, pid, db_path, freshness, uptime,
|
|
89
|
+
writer_queue block. **Length-prefixed framing** (v1.3.0, B3): the
|
|
90
|
+
response uses ``<decimal-byte-length>\\n<json-body>`` so large queue
|
|
91
|
+
payloads are read in full by the recv-exactly client.
|
|
89
92
|
- ``{"op": "stop"}`` → sends ``{"ok": true}`` then signals stop via
|
|
90
|
-
*stop_event*. Unframed.
|
|
91
|
-
- ``{"op": "
|
|
92
|
-
|
|
93
|
-
``
|
|
94
|
-
|
|
93
|
+
*stop_event*. Unframed (mcp_stop uses s.recv(128) — do NOT change).
|
|
94
|
+
- ``{"op": "index", "root", "dialect", "wait"}`` → enqueues a full index
|
|
95
|
+
onto *writer_queue* (rule 1 — supersedes all pending). Supports
|
|
96
|
+
``wait=true`` (stream progress frames + terminal ``done:true``) and
|
|
97
|
+
``wait=false`` (immediate ``{ok, queued, position}``).
|
|
98
|
+
- ``{"op": "reindex", "root", "from", "to", "dialect", "wait"}`` →
|
|
99
|
+
enqueues an incremental resync (coalescing rules 2–3). ``from`` may be
|
|
100
|
+
``null``/omitted to resolve at drain start (W3). Same ``wait`` semantics
|
|
101
|
+
as ``index``. The handler enqueues only — it never touches the backend
|
|
102
|
+
(B1 invariant: only the drain task resolves a backend, under backend_lock).
|
|
95
103
|
- ``{"op": "query", "cypher": ..., "params": ...}`` → executes a
|
|
96
104
|
read-only Cypher query on the single backend connection, serialised
|
|
97
105
|
behind *backend_lock*. **Length-prefixed framing** (v1.2.0):
|
|
98
106
|
``<decimal-byte-length>\\n<json-body>`` on both request and response.
|
|
99
107
|
|
|
100
|
-
Framing protocol
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
108
|
+
Framing protocol:
|
|
109
|
+
Requests: a bare decimal integer on the first line → framed. Unframed
|
|
110
|
+
JSON always starts with ``{``, so the sniff is unambiguous.
|
|
111
|
+
Responses: framed (``<len>\\n<body>``) for ``query`` and ``status``;
|
|
112
|
+
unframed for ``stop``/``reindex``/``index`` (unless ``wait=true`` which
|
|
113
|
+
uses the multi-frame streaming protocol).
|
|
114
|
+
|
|
115
|
+
Multi-frame streaming protocol (``index``/``reindex`` with ``wait=true``):
|
|
116
|
+
The server sends a sequence of length-prefixed frames on the same
|
|
117
|
+
connection. Progress frames carry ``{done: false, files_done, files_total}``.
|
|
118
|
+
The terminal frame carries ``{ok: true, done: true, summary: {...}}`` on
|
|
119
|
+
success or ``{ok: false, done: true, error: ...}`` on failure (W7).
|
|
120
|
+
The client reads frames in a loop and stops when it sees ``done == true``
|
|
121
|
+
— it does NOT rely on EOF as the terminator.
|
|
110
122
|
|
|
111
123
|
R2 (single connection): all backend operations go through ``backend_lock``
|
|
112
124
|
so concurrent calls never touch the single Kuzu connection simultaneously.
|
|
@@ -126,6 +138,7 @@ async def _control_socket_task(
|
|
|
126
138
|
from anyio.streams.buffered import BufferedByteReceiveStream
|
|
127
139
|
|
|
128
140
|
from sqlcg.core.config import get_db_path as _get_db_path
|
|
141
|
+
from sqlcg.server.writer import WriterRequest
|
|
129
142
|
|
|
130
143
|
# Read-only keyword allow-list for the ``query`` op. Only these leading
|
|
131
144
|
# keywords are permitted — anything that starts with a write keyword is
|
|
@@ -211,7 +224,13 @@ async def _control_socket_task(
|
|
|
211
224
|
"stale_by_commits": stale,
|
|
212
225
|
"connected_clients": 1, # stdio transport = 1 by design
|
|
213
226
|
"uptime": time.time() - start_time,
|
|
227
|
+
"writer_queue": writer_queue.coalesce_view(),
|
|
214
228
|
}
|
|
229
|
+
# status response is framed (B3, v1.3.0) — same framing as query
|
|
230
|
+
# so recv-exactly clients read it in full regardless of payload size.
|
|
231
|
+
resp_bytes = json.dumps(resp).encode()
|
|
232
|
+
await stream.send(f"{len(resp_bytes)}\n".encode() + resp_bytes)
|
|
233
|
+
return
|
|
215
234
|
|
|
216
235
|
elif op == "stop":
|
|
217
236
|
resp = {"ok": True}
|
|
@@ -222,35 +241,91 @@ async def _control_socket_task(
|
|
|
222
241
|
stop_event.set()
|
|
223
242
|
return
|
|
224
243
|
|
|
244
|
+
elif op == "index":
|
|
245
|
+
# Step 3.1 — enqueue a full index; never touches the backend here (B1).
|
|
246
|
+
root = req.get("root")
|
|
247
|
+
dialect = req.get("dialect")
|
|
248
|
+
wait = req.get("wait", False)
|
|
249
|
+
requested_by = req.get("requested_by", "cli")
|
|
250
|
+
if not root:
|
|
251
|
+
resp = {"error": "index op requires root"}
|
|
252
|
+
await stream.send(json.dumps(resp).encode() + b"\n")
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
writer_req = WriterRequest(
|
|
256
|
+
op="index",
|
|
257
|
+
root=root,
|
|
258
|
+
dialect=dialect,
|
|
259
|
+
from_sha=None,
|
|
260
|
+
to_sha=None,
|
|
261
|
+
requested_by=requested_by,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if wait:
|
|
265
|
+
# Attach-and-wait: register a memory channel then stream frames.
|
|
266
|
+
send_ch, recv_ch = anyio.create_memory_object_stream(max_buffer_size=64)
|
|
267
|
+
writer_req._waiters.append(send_ch)
|
|
268
|
+
position = await writer_queue.enqueue(writer_req)
|
|
269
|
+
# Send the queued acknowledgement frame first.
|
|
270
|
+
queued_frame = json.dumps(
|
|
271
|
+
{"ok": True, "done": False, "queued": True, "position": position}
|
|
272
|
+
).encode()
|
|
273
|
+
await stream.send(f"{len(queued_frame)}\n".encode() + queued_frame)
|
|
274
|
+
# Stream progress frames until done:true terminal frame.
|
|
275
|
+
async with recv_ch:
|
|
276
|
+
async for terminal in recv_ch:
|
|
277
|
+
frame_bytes = json.dumps(terminal).encode()
|
|
278
|
+
await stream.send(f"{len(frame_bytes)}\n".encode() + frame_bytes)
|
|
279
|
+
if terminal.get("done"):
|
|
280
|
+
break
|
|
281
|
+
else:
|
|
282
|
+
position = await writer_queue.enqueue(writer_req)
|
|
283
|
+
resp = {"ok": True, "queued": True, "position": position}
|
|
284
|
+
await stream.send(json.dumps(resp).encode() + b"\n")
|
|
285
|
+
return
|
|
286
|
+
|
|
225
287
|
elif op == "reindex":
|
|
288
|
+
# Step 2.3 (B1) — enqueue; the drain is the only backend consumer.
|
|
289
|
+
# The handler NEVER calls backend_ref() (B1 invariant).
|
|
226
290
|
root = req.get("root")
|
|
227
|
-
from_sha = req.get("from")
|
|
291
|
+
from_sha = req.get("from") # may be None (W3 — server resolves at drain)
|
|
228
292
|
to_sha = req.get("to")
|
|
229
293
|
dialect = req.get("dialect")
|
|
230
|
-
|
|
231
|
-
|
|
294
|
+
wait = req.get("wait", False)
|
|
295
|
+
requested_by = req.get("requested_by", "cli")
|
|
296
|
+
if not root:
|
|
297
|
+
resp = {"error": "reindex op requires root"}
|
|
298
|
+
await stream.send(json.dumps(resp).encode() + b"\n")
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
writer_req = WriterRequest(
|
|
302
|
+
op="reindex",
|
|
303
|
+
root=root,
|
|
304
|
+
dialect=dialect,
|
|
305
|
+
from_sha=from_sha,
|
|
306
|
+
to_sha=to_sha,
|
|
307
|
+
requested_by=requested_by,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if wait:
|
|
311
|
+
send_ch, recv_ch = anyio.create_memory_object_stream(max_buffer_size=64)
|
|
312
|
+
writer_req._waiters.append(send_ch)
|
|
313
|
+
position = await writer_queue.enqueue(writer_req)
|
|
314
|
+
queued_frame = json.dumps(
|
|
315
|
+
{"ok": True, "done": False, "queued": True, "position": position}
|
|
316
|
+
).encode()
|
|
317
|
+
await stream.send(f"{len(queued_frame)}\n".encode() + queued_frame)
|
|
318
|
+
async with recv_ch:
|
|
319
|
+
async for terminal in recv_ch:
|
|
320
|
+
frame_bytes = json.dumps(terminal).encode()
|
|
321
|
+
await stream.send(f"{len(frame_bytes)}\n".encode() + frame_bytes)
|
|
322
|
+
if terminal.get("done"):
|
|
323
|
+
break
|
|
232
324
|
else:
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
resp = {"error": "backend not available"}
|
|
238
|
-
else:
|
|
239
|
-
indexer = Indexer()
|
|
240
|
-
|
|
241
|
-
def _do_reindex() -> dict:
|
|
242
|
-
return indexer.resync_changed(
|
|
243
|
-
_Path(root),
|
|
244
|
-
from_sha,
|
|
245
|
-
to_sha,
|
|
246
|
-
db,
|
|
247
|
-
dialect,
|
|
248
|
-
)
|
|
249
|
-
|
|
250
|
-
async with backend_lock:
|
|
251
|
-
# R1: run off event-loop thread; R2: lock serialises
|
|
252
|
-
summary = await _to_thread.run_sync(_do_reindex)
|
|
253
|
-
resp = {"ok": True, "summary": summary}
|
|
325
|
+
position = await writer_queue.enqueue(writer_req)
|
|
326
|
+
resp = {"ok": True, "queued": True, "position": position}
|
|
327
|
+
await stream.send(json.dumps(resp).encode() + b"\n")
|
|
328
|
+
return
|
|
254
329
|
|
|
255
330
|
elif op == "query":
|
|
256
331
|
# Framed op (v1.2.0): read-only Cypher query over the socket.
|
|
@@ -297,36 +372,37 @@ async def _control_socket_task(
|
|
|
297
372
|
async def _stop_watcher(
|
|
298
373
|
stop_event: "anyio.Event",
|
|
299
374
|
db_path: "Path",
|
|
375
|
+
backend_lock: "anyio.Lock",
|
|
376
|
+
shutdown_requested: "anyio.Event",
|
|
300
377
|
) -> None:
|
|
301
378
|
"""Wait for stop_event then perform graceful shutdown.
|
|
302
379
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
R8 ordering: backend is shut down HERE (before os._exit), not in main().
|
|
318
|
-
The ``finally`` block in ``main()`` will also try to shutdown/cleanup but
|
|
319
|
-
``os._exit`` prevents it from running — so we do it explicitly here.
|
|
380
|
+
B2 shutdown ordering:
|
|
381
|
+
1. Set shutdown_requested so the drain loop exits cleanly and
|
|
382
|
+
de_escalate_to_ro skips the RO reopen.
|
|
383
|
+
2. Acquire backend_lock — waits until any active drain has fully
|
|
384
|
+
de-escalated (so the in-flight RW write is committed, not torn).
|
|
385
|
+
3. Call shutdown_backend() under the lock.
|
|
386
|
+
4. Release backend_lock.
|
|
387
|
+
5. Remove control files.
|
|
388
|
+
6. Call os._exit(0).
|
|
389
|
+
|
|
390
|
+
We use ``os._exit(0)`` because the MCP ``stdio_server`` blocks on a pipe
|
|
391
|
+
read (``anyio.to_thread.run_sync`` with ``abandon_on_cancel=False``).
|
|
392
|
+
We cannot interrupt it without killing the process.
|
|
320
393
|
"""
|
|
321
394
|
import sqlcg.server.tools as _tools
|
|
322
395
|
from sqlcg.server.control import cleanup_control_files
|
|
323
396
|
|
|
324
397
|
await stop_event.wait()
|
|
325
|
-
#
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
398
|
+
# B2(b): signal de_escalate_to_ro to skip the RO reopen.
|
|
399
|
+
shutdown_requested.set()
|
|
400
|
+
# B2(a): wait for any active drain to finish (acquires backend_lock).
|
|
401
|
+
async with backend_lock:
|
|
402
|
+
try:
|
|
403
|
+
_tools.shutdown_backend()
|
|
404
|
+
except Exception:
|
|
405
|
+
pass
|
|
330
406
|
try:
|
|
331
407
|
cleanup_control_files(db_path)
|
|
332
408
|
except Exception:
|
|
@@ -355,9 +431,10 @@ async def _sigterm_watcher(
|
|
|
355
431
|
async def _run_with_control(db_path: "Path", start_time: float) -> None:
|
|
356
432
|
"""Run the stdio MCP loop and the control-socket task in a shared TaskGroup.
|
|
357
433
|
|
|
358
|
-
Stop mechanism (
|
|
434
|
+
Stop mechanism (B2 teardown ordering):
|
|
359
435
|
- Control socket ``stop`` op → ``stop_event.set()`` → ``_stop_watcher``
|
|
360
|
-
|
|
436
|
+
sets shutdown_requested, acquires backend_lock (waits for active drain),
|
|
437
|
+
shuts down backend, removes control files, calls ``os._exit(0)``.
|
|
361
438
|
- External SIGTERM → ``_sigterm_watcher`` → same path via ``stop_event``.
|
|
362
439
|
- Normal EOF on stdin (editor closes connection) → stdio loop returns →
|
|
363
440
|
``tg.cancel_scope.cancel()`` → tasks cancelled → ``main()`` finally
|
|
@@ -368,19 +445,35 @@ async def _run_with_control(db_path: "Path", start_time: float) -> None:
|
|
|
368
445
|
with ``abandon_on_cancel=False``). We cannot interrupt it without
|
|
369
446
|
killing the process; ``_stop_watcher`` does cleanup first.
|
|
370
447
|
|
|
371
|
-
``backend_lock`` is created once here and passed into
|
|
372
|
-
``_control_socket_task``
|
|
373
|
-
|
|
448
|
+
``backend_lock`` is created once here and passed into both
|
|
449
|
+
``_control_socket_task`` and the ``drain_loop`` task so that:
|
|
450
|
+
- concurrent control ops (reindex, query) are serialised (R2), and
|
|
451
|
+
- _stop_watcher can acquire the lock to wait for an active drain (B2).
|
|
374
452
|
"""
|
|
375
453
|
import anyio
|
|
376
454
|
|
|
377
455
|
import sqlcg.server.tools as _tools
|
|
456
|
+
from sqlcg.server.writer import WriterQueue, drain_loop
|
|
378
457
|
|
|
379
458
|
stop_event = anyio.Event()
|
|
380
|
-
|
|
459
|
+
shutdown_requested = anyio.Event()
|
|
460
|
+
backend_lock = anyio.Lock() # R2 + B2: serialise all backend ops
|
|
461
|
+
|
|
462
|
+
# Inject metrics into the queue so coalesce/drain events are persisted.
|
|
463
|
+
writer_queue = WriterQueue(metrics=_tools._metrics)
|
|
464
|
+
|
|
465
|
+
db_path_str = str(db_path)
|
|
381
466
|
|
|
382
467
|
async with anyio.create_task_group() as tg:
|
|
383
468
|
if sys.platform != "win32":
|
|
469
|
+
# Drain task: consumes WriterQueue; sole backend consumer (B1).
|
|
470
|
+
tg.start_soon(
|
|
471
|
+
drain_loop,
|
|
472
|
+
writer_queue,
|
|
473
|
+
db_path_str,
|
|
474
|
+
backend_lock,
|
|
475
|
+
shutdown_requested,
|
|
476
|
+
)
|
|
384
477
|
# Spawn control socket alongside the stdio loop.
|
|
385
478
|
tg.start_soon(
|
|
386
479
|
_control_socket_task,
|
|
@@ -389,9 +482,10 @@ async def _run_with_control(db_path: "Path", start_time: float) -> None:
|
|
|
389
482
|
stop_event,
|
|
390
483
|
backend_lock,
|
|
391
484
|
start_time,
|
|
485
|
+
writer_queue,
|
|
392
486
|
)
|
|
393
487
|
# Watch stop_event; shuts down and calls os._exit(0).
|
|
394
|
-
tg.start_soon(_stop_watcher, stop_event, db_path)
|
|
488
|
+
tg.start_soon(_stop_watcher, stop_event, db_path, backend_lock, shutdown_requested)
|
|
395
489
|
# Watch for SIGTERM; fires stop_event for same clean path.
|
|
396
490
|
tg.start_soon(_sigterm_watcher, stop_event)
|
|
397
491
|
|
|
@@ -457,5 +551,6 @@ if TYPE_CHECKING:
|
|
|
457
551
|
import anyio
|
|
458
552
|
|
|
459
553
|
from sqlcg.core.graph_db import GraphBackend
|
|
554
|
+
from sqlcg.server.writer import WriterQueue
|
|
460
555
|
|
|
461
556
|
from sqlcg.server.control import sock_path # noqa: E402 (used in _control_socket_task)
|
sqlcg/server/tools.py
CHANGED
|
@@ -6,6 +6,10 @@ import time
|
|
|
6
6
|
from collections import deque
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import anyio
|
|
9
13
|
|
|
10
14
|
from sqlcg.core.config import get_db_path, get_presentation_prefixes
|
|
11
15
|
from sqlcg.core.freshness import compute_freshness
|
|
@@ -100,26 +104,77 @@ _backend: GraphBackend | None = None
|
|
|
100
104
|
# Module-level metrics store singleton
|
|
101
105
|
_metrics: MetricsStore | None = None
|
|
102
106
|
|
|
107
|
+
# Module-level backend lock — injected by server.py _run_with_control so that
|
|
108
|
+
# MCP write tools (index_repo) share the same lock as the drain loop.
|
|
109
|
+
# None when no server event-loop is running (unit tests, direct DB access).
|
|
110
|
+
_backend_lock: "anyio.Lock | None" = None
|
|
111
|
+
|
|
112
|
+
# True when init_backend has opened the backend in RO serving mode.
|
|
113
|
+
# Checked by MCP write tools to decide whether to escalate.
|
|
114
|
+
_serving_ro: bool = False
|
|
115
|
+
|
|
116
|
+
# The path that init_backend() actually opened. Captured at init time so
|
|
117
|
+
# MCP write tools (index_repo escalation) use this path, not get_db_path()
|
|
118
|
+
# which returns the default ~/.sqlcg/graph.db regardless of what was passed
|
|
119
|
+
# to init_backend. Bug B fix: without this, every escalation opened the real
|
|
120
|
+
# live DB instead of the path the server (or test) configured.
|
|
121
|
+
_init_db_path: str | None = None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _set_backend_lock(lock: "anyio.Lock | None") -> None:
|
|
125
|
+
"""Register the backend lock from the server's task group.
|
|
126
|
+
|
|
127
|
+
Called by server.py _run_with_control so MCP write tools use the same
|
|
128
|
+
lock as the drain loop — ensuring no concurrent RW access.
|
|
129
|
+
"""
|
|
130
|
+
global _backend_lock
|
|
131
|
+
_backend_lock = lock
|
|
132
|
+
|
|
103
133
|
|
|
104
134
|
def init_backend(db_path: str | None = None) -> None:
|
|
105
135
|
"""Initialize the module-level backend singleton.
|
|
106
136
|
|
|
137
|
+
Startup sequence (OD-2 — measured on kuzu 0.11.3):
|
|
138
|
+
1. Open read-write → create schema if absent (init_schema is a no-op on
|
|
139
|
+
an already-initialized DB — it does NOT migrate).
|
|
140
|
+
2. Run the schema-version gate (Step 1.4): refuse non-zero if the stored
|
|
141
|
+
version differs from the current build's SCHEMA_VERSION.
|
|
142
|
+
3. Close the RW backend.
|
|
143
|
+
4. Reopen read-only and store as the serving singleton.
|
|
144
|
+
|
|
145
|
+
This ensures ``init_schema()`` — which issues DDL — never runs on the RO
|
|
146
|
+
connection (DDL raises on RO; ``Cannot create an empty database under READ
|
|
147
|
+
ONLY mode.`` is raised on a non-existent DB opened RO).
|
|
148
|
+
|
|
107
149
|
Args:
|
|
108
150
|
db_path: Path to KùzuDB database. If None, uses get_db_path().
|
|
109
151
|
|
|
110
152
|
Raises:
|
|
111
|
-
RuntimeError: If backend initialization fails
|
|
153
|
+
RuntimeError: If backend initialization fails or schema version
|
|
154
|
+
is stale (the caller must not swallow this — server must exit).
|
|
112
155
|
"""
|
|
113
|
-
global _backend, _metrics
|
|
156
|
+
global _backend, _metrics, _serving_ro, _init_db_path
|
|
114
157
|
path = db_path or str(get_db_path())
|
|
115
|
-
|
|
158
|
+
_init_db_path = path
|
|
159
|
+
|
|
160
|
+
# Step 1 — RW open + create schema if absent.
|
|
161
|
+
rw_backend = KuzuBackend(path, read_only=False)
|
|
116
162
|
try:
|
|
117
|
-
|
|
163
|
+
rw_backend.init_schema()
|
|
118
164
|
except Exception as exc:
|
|
119
|
-
|
|
165
|
+
rw_backend.close()
|
|
120
166
|
raise RuntimeError(f"Backend initialization failed: {exc}") from exc
|
|
121
|
-
|
|
122
|
-
|
|
167
|
+
|
|
168
|
+
# Step 2 — schema-version gate (Step 1.4).
|
|
169
|
+
_assert_schema_current(rw_backend, path)
|
|
170
|
+
|
|
171
|
+
# Step 3 — close RW.
|
|
172
|
+
rw_backend.close()
|
|
173
|
+
|
|
174
|
+
# Step 4 — reopen RO as the serving singleton.
|
|
175
|
+
_backend = KuzuBackend(path, read_only=True)
|
|
176
|
+
_serving_ro = True
|
|
177
|
+
logger.debug(f"Backend initialized (RO serving): {path}")
|
|
123
178
|
|
|
124
179
|
# Initialize metrics store (best-effort, failures are logged as WARNING)
|
|
125
180
|
try:
|
|
@@ -136,7 +191,7 @@ def shutdown_backend() -> None:
|
|
|
136
191
|
Closes the database connection and clears the global reference.
|
|
137
192
|
Safe to call multiple times.
|
|
138
193
|
"""
|
|
139
|
-
global _backend, _metrics
|
|
194
|
+
global _backend, _metrics, _serving_ro, _init_db_path
|
|
140
195
|
if _backend is not None:
|
|
141
196
|
_backend.close()
|
|
142
197
|
_backend = None
|
|
@@ -144,6 +199,8 @@ def shutdown_backend() -> None:
|
|
|
144
199
|
if _metrics is not None:
|
|
145
200
|
_metrics.close()
|
|
146
201
|
_metrics = None
|
|
202
|
+
_serving_ro = False
|
|
203
|
+
_init_db_path = None
|
|
147
204
|
|
|
148
205
|
|
|
149
206
|
def _get_backend() -> GraphBackend:
|
|
@@ -157,6 +214,80 @@ def _get_backend() -> GraphBackend:
|
|
|
157
214
|
return _backend
|
|
158
215
|
|
|
159
216
|
|
|
217
|
+
def _get_or_escalate_rw(db_path_str: str, should_escalate: bool) -> GraphBackend:
|
|
218
|
+
"""Return the current backend for write use.
|
|
219
|
+
|
|
220
|
+
When the server has opened the backend RO (should_escalate=True), escalates
|
|
221
|
+
to RW using escalate_to_rw so the MCP write tool can write. When the
|
|
222
|
+
backend is already RW (direct call in tests / non-server context),
|
|
223
|
+
returns it directly.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
db_path_str: Path string for escalation.
|
|
227
|
+
should_escalate: True when the server has opened the backend RO.
|
|
228
|
+
"""
|
|
229
|
+
if not should_escalate:
|
|
230
|
+
return _get_backend()
|
|
231
|
+
from sqlcg.server.writer import escalate_to_rw
|
|
232
|
+
|
|
233
|
+
return escalate_to_rw(db_path_str, current=_backend)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _de_escalate_to_ro_from_tool(db_path_str: str) -> None:
|
|
237
|
+
"""De-escalate from RW back to RO after a MCP write tool finishes.
|
|
238
|
+
|
|
239
|
+
Best-effort: logs on failure but does not raise.
|
|
240
|
+
"""
|
|
241
|
+
from sqlcg.server.writer import de_escalate_to_ro
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
de_escalate_to_ro(db_path_str)
|
|
245
|
+
except Exception as exc:
|
|
246
|
+
logger.warning(f"de_escalate_to_ro failed in MCP write tool: {exc}")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _escalation_db_path() -> str:
|
|
250
|
+
"""Return the DB path to use for RW escalation in MCP write tools.
|
|
251
|
+
|
|
252
|
+
When init_backend() was called with an explicit path, returns that path.
|
|
253
|
+
Falls back to get_db_path() only when init_backend has not been called
|
|
254
|
+
(e.g. direct invocation in tests that set up a backend themselves).
|
|
255
|
+
|
|
256
|
+
Bug B fix: index_repo previously called str(get_db_path()) directly,
|
|
257
|
+
which always returns the default ~/.sqlcg/graph.db regardless of the path
|
|
258
|
+
init_backend() was given. This caused escalation to open the real live DB
|
|
259
|
+
even when init_backend was called with a tmp_path in tests.
|
|
260
|
+
"""
|
|
261
|
+
if _init_db_path is not None:
|
|
262
|
+
return _init_db_path
|
|
263
|
+
return str(get_db_path())
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _assert_schema_current(backend: GraphBackend, path: str) -> None:
|
|
267
|
+
"""Refuse to start when the stored schema version differs from the current build.
|
|
268
|
+
|
|
269
|
+
Called inside the RW-ensure window of init_backend (Step 1.4) after
|
|
270
|
+
init_schema() has run the create-if-absent step.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
backend: An open (RW) backend to query.
|
|
274
|
+
path: The db_path string — included in the error message for context.
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
RuntimeError: Stored version present and != current SCHEMA_VERSION.
|
|
278
|
+
Message names both versions and the sqlcg db reset remedy.
|
|
279
|
+
"""
|
|
280
|
+
from sqlcg.core.schema import SCHEMA_VERSION
|
|
281
|
+
|
|
282
|
+
stored = backend.get_schema_version()
|
|
283
|
+
if stored is not None and stored != SCHEMA_VERSION:
|
|
284
|
+
msg = (
|
|
285
|
+
f"Database schema is v{stored}, but this build expects v{SCHEMA_VERSION} — "
|
|
286
|
+
f"run 'sqlcg db reset && sqlcg index <path>' to re-index."
|
|
287
|
+
)
|
|
288
|
+
raise RuntimeError(msg)
|
|
289
|
+
|
|
290
|
+
|
|
160
291
|
@contextmanager
|
|
161
292
|
def _open_backend():
|
|
162
293
|
"""Context manager to get the initialized backend.
|
|
@@ -462,19 +593,27 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
462
593
|
success = True
|
|
463
594
|
|
|
464
595
|
try:
|
|
465
|
-
db = _get_backend()
|
|
466
|
-
indexer = Indexer()
|
|
467
596
|
path = Path(repo_path).resolve()
|
|
468
597
|
if not path.exists():
|
|
469
598
|
raise ValueError(f"Repository path does not exist: {repo_path}")
|
|
470
599
|
if not path.is_dir():
|
|
471
600
|
raise ValueError(f"Repository path is not a directory: {repo_path}")
|
|
472
601
|
|
|
602
|
+
# If the backend is in RO serving mode (set by init_backend), escalate
|
|
603
|
+
# to RW for the duration of this write op, then de-escalate after.
|
|
604
|
+
# Bug B fix: use _escalation_db_path() instead of str(get_db_path()) so
|
|
605
|
+
# that escalation targets the DB init_backend() actually opened, not the
|
|
606
|
+
# default ~/.sqlcg/graph.db.
|
|
607
|
+
db_path_str = _escalation_db_path()
|
|
608
|
+
is_ro = _serving_ro
|
|
609
|
+
rw_db = _get_or_escalate_rw(db_path_str, is_ro)
|
|
610
|
+
|
|
611
|
+
indexer = Indexer()
|
|
473
612
|
# Ensure the Repo node exists for this repository
|
|
474
613
|
from sqlcg.core.schema import NodeLabel, RelType
|
|
475
614
|
|
|
476
615
|
abs_path = str(path)
|
|
477
|
-
|
|
616
|
+
rw_db.upsert_node(
|
|
478
617
|
NodeLabel.REPO,
|
|
479
618
|
abs_path,
|
|
480
619
|
{
|
|
@@ -484,14 +623,14 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
484
623
|
)
|
|
485
624
|
|
|
486
625
|
# Index the repository (with absolute path)
|
|
487
|
-
result = indexer.index_repo(path, dialect,
|
|
626
|
+
result = indexer.index_repo(path, dialect, rw_db)
|
|
488
627
|
|
|
489
628
|
# Create BELONGS_TO relationships from File nodes to Repo node
|
|
490
629
|
# Query for all File nodes in this repo and link them to the Repo
|
|
491
630
|
repo_prefix = abs_path.rstrip("/") + "/"
|
|
492
|
-
file_rows =
|
|
631
|
+
file_rows = rw_db.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": repo_prefix})
|
|
493
632
|
for row in file_rows:
|
|
494
|
-
|
|
633
|
+
rw_db.upsert_edge(
|
|
495
634
|
NodeLabel.FILE,
|
|
496
635
|
row["path"],
|
|
497
636
|
NodeLabel.REPO,
|
|
@@ -499,6 +638,8 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
499
638
|
RelType.BELONGS_TO,
|
|
500
639
|
{},
|
|
501
640
|
)
|
|
641
|
+
if is_ro:
|
|
642
|
+
_de_escalate_to_ro_from_tool(db_path_str)
|
|
502
643
|
|
|
503
644
|
logger.info(f"Indexed {result['files_parsed']} files with {result['tables_found']} tables")
|
|
504
645
|
|