sql-code-graph 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/server/writer.py ADDED
@@ -0,0 +1,634 @@
1
+ """Single-writer queue, escalation primitive, and drain task for the MCP server.
2
+
3
+ Owns the RO→RW escalation (close the read-only backend, reopen read-write for
4
+ the duration of a drain, then de-escalate back to read-only) and the
5
+ coalescing WriterQueue that serialises all write ops through the server.
6
+
7
+ Constants below are server-side transport/escalation parameters — NOT
8
+ KuzuConfig values. Same convention as _NOTIFY_SOCKET_TIMEOUT_S in reindex.py.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import time
14
+ from dataclasses import dataclass, field
15
+ from typing import TYPE_CHECKING, Literal
16
+
17
+ from sqlcg.utils.logging import getLogger
18
+
19
+ if TYPE_CHECKING:
20
+ import anyio
21
+
22
+ from sqlcg.core.graph_db import GraphBackend
23
+ from sqlcg.metrics.store import MetricsStore
24
+
25
+ logger = getLogger(__name__)
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Escalation constants — not config-owned
29
+ # ---------------------------------------------------------------------------
30
+
31
+ # Total retry budget for RW reopen on lock error.
32
+ _ESCALATION_RETRY_BUDGET_S = 5.0
33
+ # Initial backoff in seconds; doubles each attempt, capped at _BACKOFF_CAP_S.
34
+ _BACKOFF_START_S = 0.02
35
+ _BACKOFF_FACTOR = 2.0
36
+ _BACKOFF_CAP_S = 0.5
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Coalesce reason constants — single source of truth for status, logs, metrics
40
+ # ---------------------------------------------------------------------------
41
+
42
+ COALESCE_SUPERSEDED_BY_INDEX = "superseded_by_index"
43
+ COALESCE_COLLAPSED_INTO_PENDING_REINDEX = "collapsed_into_pending_reindex"
44
+ COALESCE_REINDEX_DROPPED_INDEX_PENDING = "reindex_dropped_index_pending"
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Exceptions
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ class EscalationLockError(RuntimeError):
53
+ """Raised when escalate_to_rw exhausts its retry budget.
54
+
55
+ The C3 message names the PID hint and the SQLCG_DB_PATH side-DB workaround.
56
+ """
57
+
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Data models
61
+ # ---------------------------------------------------------------------------
62
+
63
+
64
+ @dataclass
65
+ class WriterRequest:
66
+ """A single enqueued write request."""
67
+
68
+ op: Literal["index", "reindex"]
69
+ root: str
70
+ dialect: str | None
71
+ from_sha: str | None
72
+ to_sha: str | None
73
+ requested_by: str # "cli" or "hook"
74
+ queued_at: float = field(default_factory=time.time)
75
+ # Subscribers waiting for this request to complete (for wait=true clients).
76
+ # Each entry is a (send_channel, ) pair written by the drain task.
77
+ _waiters: list = field(default_factory=list, repr=False, compare=False)
78
+
79
+
80
+ class WriterQueue:
81
+ """Serialised, coalescing write request queue for the single-writer model.
82
+
83
+ Coalescing rules (enforced at enqueue time under ``_lock``):
84
+
85
+ 1. A full ``index`` enqueue drops ALL pending requests (it supersedes).
86
+ 2. A ``reindex`` arriving while another ``reindex`` is pending collapses
87
+ into that pending request (the pending one executes against HEAD at
88
+ drain time).
89
+ 3. A ``reindex`` arriving while a ``index`` is pending is dropped (the
90
+ pending ``index`` subsumes it).
91
+ 4. Write lock is held only while draining — enforced by the drain task.
92
+
93
+ The *active* request (currently draining) is never mutated by coalescing.
94
+ """
95
+
96
+ def __init__(self, metrics: MetricsStore | None = None) -> None:
97
+ import anyio
98
+
99
+ self._pending: list[WriterRequest] = []
100
+ self._active: WriterRequest | None = None
101
+ self._active_progress: dict = {}
102
+ self._coalesced: dict[str, int] = {
103
+ COALESCE_SUPERSEDED_BY_INDEX: 0,
104
+ COALESCE_COLLAPSED_INTO_PENDING_REINDEX: 0,
105
+ COALESCE_REINDEX_DROPPED_INDEX_PENDING: 0,
106
+ }
107
+ self._last_coalesce_at: float | None = None
108
+ self._last_coalesce_reason: str | None = None
109
+ self._lock: anyio.Lock = anyio.Lock()
110
+ self._wake: anyio.Event = anyio.Event()
111
+ self._metrics = metrics
112
+
113
+ async def enqueue(self, req: WriterRequest) -> int:
114
+ """Enqueue *req* and return its position (1-indexed, after coalescing).
115
+
116
+ Applies coalescing rules 1–3. Returns the pending-queue position
117
+ (1 = next to drain) so callers can report it in ``{queued: true}``.
118
+ """
119
+ async with self._lock:
120
+ if req.op == "index":
121
+ # Rule 1: full index supersedes all pending requests.
122
+ n_superseded = len(self._pending)
123
+ if n_superseded > 0:
124
+ # Collect waiters from superseded requests to relay terminal frame
125
+ superseded_waiters: list = []
126
+ for old_req in self._pending:
127
+ superseded_waiters.extend(old_req._waiters)
128
+ self._pending.clear()
129
+ for _ in range(n_superseded):
130
+ self._record_coalesce(COALESCE_SUPERSEDED_BY_INDEX)
131
+ # Notify superseded waiters that their request was coalesced away
132
+ for ch in superseded_waiters:
133
+ try:
134
+ await ch.send(
135
+ {
136
+ "ok": True,
137
+ "done": True,
138
+ "coalesced": True,
139
+ "summary": {"note": "superseded by a newer full index"},
140
+ }
141
+ )
142
+ except Exception:
143
+ pass
144
+ self._pending.append(req)
145
+ elif req.op == "reindex":
146
+ # Rule 3: reindex behind a pending index → drop.
147
+ if any(r.op == "index" for r in self._pending):
148
+ self._record_coalesce(COALESCE_REINDEX_DROPPED_INDEX_PENDING)
149
+ # Notify waiter immediately
150
+ for ch in req._waiters:
151
+ try:
152
+ await ch.send(
153
+ {
154
+ "ok": True,
155
+ "done": True,
156
+ "coalesced": True,
157
+ "summary": {"note": "dropped — full index already pending"},
158
+ }
159
+ )
160
+ except Exception:
161
+ pass
162
+ return len(self._pending)
163
+ # Rule 2: reindex collapses into existing pending reindex.
164
+ existing = next((r for r in self._pending if r.op == "reindex"), None)
165
+ if existing is not None:
166
+ self._record_coalesce(COALESCE_COLLAPSED_INTO_PENDING_REINDEX)
167
+ # Transfer waiters to the existing request
168
+ existing._waiters.extend(req._waiters)
169
+ return len(self._pending)
170
+ self._pending.append(req)
171
+ else:
172
+ self._pending.append(req)
173
+
174
+ # Wake the drain task
175
+ if not self._wake.is_set():
176
+ self._wake.set()
177
+
178
+ logger.info(
179
+ f"enqueued op={req.op} root={req.root!r} by={req.requested_by} "
180
+ f"position={len(self._pending)}"
181
+ )
182
+ self._record_enqueue(req, len(self._pending))
183
+ return len(self._pending)
184
+
185
+ async def pop_next(self) -> WriterRequest | None:
186
+ """Pop the next pending request (or None if empty).
187
+
188
+ Resets the wake Event so the drain can wait again.
189
+ """
190
+ async with self._lock:
191
+ if not self._pending:
192
+ self._wake = __import__("anyio").Event()
193
+ return None
194
+ req = self._pending.pop(0)
195
+ self._active = req
196
+ self._active_progress = {
197
+ "state": "running",
198
+ "op": req.op,
199
+ "started_at": time.time(),
200
+ "files_done": 0,
201
+ "files_total": None,
202
+ }
203
+ if not self._pending:
204
+ self._wake = __import__("anyio").Event()
205
+ return req
206
+
207
+ def mark_active_done(self, summary: dict) -> None:
208
+ """Called by the drain task after a successful drain."""
209
+ self._active = None
210
+ self._active_progress = {
211
+ "state": "done",
212
+ "finished_at": time.time(),
213
+ "summary": summary,
214
+ }
215
+
216
+ def mark_active_failed(self, error: str) -> None:
217
+ """Called by the drain task when the indexer body raises."""
218
+ self._active = None
219
+ self._active_progress = {
220
+ "state": "failed",
221
+ "error": error,
222
+ "finished_at": time.time(),
223
+ }
224
+
225
+ def update_progress(self, files_done: int, files_total: int) -> None:
226
+ """Update the active drain's file-level progress."""
227
+ if self._active_progress.get("state") == "running":
228
+ self._active_progress["files_done"] = files_done
229
+ self._active_progress["files_total"] = files_total
230
+
231
+ def coalesce_view(self) -> dict:
232
+ """Return a consistent snapshot of queue state for status responses.
233
+
234
+ Synchronous — reads are safe without the async lock because mutations
235
+ only add to counts; no torn reads on int fields in CPython.
236
+ """
237
+ total = sum(self._coalesced.values())
238
+ return {
239
+ "active": (
240
+ {
241
+ "op": self._active.op,
242
+ "root": self._active.root,
243
+ "requested_by": self._active.requested_by,
244
+ "queued_at": self._active.queued_at,
245
+ }
246
+ if self._active is not None
247
+ else None
248
+ ),
249
+ "active_progress": dict(self._active_progress),
250
+ "pending": [
251
+ {
252
+ "op": r.op,
253
+ "root": r.root,
254
+ "requested_by": r.requested_by,
255
+ "queued_at": r.queued_at,
256
+ }
257
+ for r in self._pending
258
+ ],
259
+ "coalesced_since_start": total,
260
+ "coalesced_by_reason": dict(self._coalesced),
261
+ "last_coalesce_at": self._last_coalesce_at,
262
+ "last_coalesce_reason": self._last_coalesce_reason,
263
+ }
264
+
265
+ def _record_coalesce(self, reason: str) -> None:
266
+ """Bump per-reason coalesce counter and emit lifecycle log + metrics."""
267
+ self._coalesced[reason] = self._coalesced.get(reason, 0) + 1
268
+ self._last_coalesce_at = time.time()
269
+ self._last_coalesce_reason = reason
270
+ logger.info(f"coalesced (reason={reason})")
271
+ if self._metrics is not None:
272
+ try:
273
+ self._metrics.record_queue_event(
274
+ "coalesced",
275
+ reason=reason,
276
+ queue_depth=len(self._pending),
277
+ )
278
+ except Exception:
279
+ pass
280
+
281
+ def _record_enqueue(self, req: WriterRequest, depth: int) -> None:
282
+ """Record an enqueue event to MetricsStore (best-effort)."""
283
+ if self._metrics is not None:
284
+ try:
285
+ self._metrics.record_queue_event(
286
+ "enqueued",
287
+ op=req.op,
288
+ queue_depth=depth,
289
+ )
290
+ except Exception:
291
+ pass
292
+
293
+ def record_drained(self, op: str, duration_ms: float) -> None:
294
+ """Record a completed drain to MetricsStore (best-effort)."""
295
+ logger.info(f"drained op={op} duration_ms={duration_ms:.1f}")
296
+ if self._metrics is not None:
297
+ try:
298
+ self._metrics.record_queue_event(
299
+ "drained",
300
+ op=op,
301
+ duration_ms=duration_ms,
302
+ )
303
+ except Exception:
304
+ pass
305
+
306
+
307
+ # ---------------------------------------------------------------------------
308
+ # Escalation primitive
309
+ # ---------------------------------------------------------------------------
310
+
311
+
312
+ def escalate_to_rw(
313
+ db_path: str,
314
+ *,
315
+ current: GraphBackend | None = None,
316
+ opener=None,
317
+ retry_budget_s: float = _ESCALATION_RETRY_BUDGET_S,
318
+ ) -> GraphBackend:
319
+ """Close *current* RO backend and reopen read-write with bounded retry.
320
+
321
+ This is the single RO→RW escalation path — reused by both the startup
322
+ schema-ensure window (Step 1.3) and every drain (Phase 2).
323
+
324
+ Args:
325
+ db_path: Path to the KùzuDB database file.
326
+ current: The currently-open backend to close before reopening RW.
327
+ Pass ``None`` when no handle exists yet (startup path).
328
+ opener: Callable ``(path: str, read_only: bool) -> GraphBackend``.
329
+ Defaults to ``KuzuBackend``. Tests inject a fake opener to
330
+ exercise retry/backoff deterministically without real lock races
331
+ — never patch a module global (parallel-test-safe).
332
+ retry_budget_s: Total retry budget in seconds. Default is the
333
+ module constant. Override via this parameter in tests — never
334
+ via a global patch.
335
+
336
+ Returns:
337
+ A new read-write ``GraphBackend`` stored in ``tools._backend``.
338
+
339
+ Raises:
340
+ EscalationLockError: Budget exhausted — the C3 message names the
341
+ PID hint and the ``SQLCG_DB_PATH`` side-DB workaround.
342
+ RuntimeError: Non-lock error from the opener (non-retryable).
343
+ """
344
+ import random
345
+
346
+ import sqlcg.server.tools as _tools
347
+ from sqlcg.core.kuzu_backend import KuzuBackend, find_lock_holder
348
+
349
+ if opener is None:
350
+ opener = KuzuBackend
351
+
352
+ # Close the current backend (if any) before attempting RW open.
353
+ if current is not None:
354
+ try:
355
+ current.close()
356
+ except Exception:
357
+ pass
358
+ # Ensure the module singleton no longer points at the closed handle.
359
+ if _tools._backend is current:
360
+ _tools._backend = None
361
+
362
+ deadline = time.monotonic() + retry_budget_s
363
+ backoff = _BACKOFF_START_S
364
+ attempts = 0
365
+
366
+ while True:
367
+ attempts += 1
368
+ try:
369
+ rw_backend = opener(db_path, read_only=False)
370
+ _tools._backend = rw_backend
371
+ logger.debug(f"escalated to RW db_path={db_path!r} attempts={attempts}")
372
+ return rw_backend
373
+ except RuntimeError as exc:
374
+ exc_str = str(exc)
375
+ is_lock = "Could not set lock" in exc_str or "lock" in exc_str.lower()
376
+ if not is_lock:
377
+ raise
378
+ remaining = deadline - time.monotonic()
379
+ if remaining <= 0:
380
+ # Budget exhausted — emit ERROR log then raise C3.
381
+ pid_hint = find_lock_holder(db_path)
382
+ logger.error(
383
+ f"escalation failed: attempts={attempts} "
384
+ f"budget={retry_budget_s:.1f}s db_path={db_path!r} "
385
+ f"holder={pid_hint}"
386
+ )
387
+ # Reopen RO so the server keeps serving reads.
388
+ try:
389
+ ro = opener(db_path, read_only=True)
390
+ _tools._backend = ro
391
+ except Exception:
392
+ pass
393
+ msg = (
394
+ f"Could not acquire the write lock to reindex after "
395
+ f"{retry_budget_s:.1f}s — another process is holding the "
396
+ f"database ({pid_hint}). The graph was not updated. "
397
+ f"To run a one-off index without the server, point at a side DB:\n"
398
+ f" SQLCG_DB_PATH=/tmp/sqlcg-cli/graph.db sqlcg index <path>\n"
399
+ f"or stop the server first ('sqlcg mcp stop')."
400
+ )
401
+ raise EscalationLockError(msg) from exc
402
+
403
+ # Exponential backoff with jitter, capped at _BACKOFF_CAP_S.
404
+ jitter = random.uniform(0, backoff * 0.1)
405
+ sleep_for = min(backoff + jitter, min(remaining, _BACKOFF_CAP_S))
406
+ time.sleep(sleep_for)
407
+ backoff = min(backoff * _BACKOFF_FACTOR, _BACKOFF_CAP_S)
408
+
409
+
410
+ def de_escalate_to_ro(
411
+ db_path: str,
412
+ *,
413
+ shutdown_requested: anyio.Event | None = None,
414
+ opener=None,
415
+ ) -> None:
416
+ """Close the current RW backend and reopen read-only.
417
+
418
+ Always runs in the drain's ``finally`` block. When *shutdown_requested*
419
+ is set, skips the RO reopen and leaves ``tools._backend = None`` so
420
+ ``shutdown_backend()`` can finish teardown cleanly (B2 guard).
421
+
422
+ Args:
423
+ db_path: Path to the KùzuDB database file.
424
+ shutdown_requested: An ``anyio.Event`` that, when set, tells this
425
+ function to skip the RO reopen (B2 shutdown ordering).
426
+ opener: Injectable backend constructor (default ``KuzuBackend``).
427
+ """
428
+ import sqlcg.server.tools as _tools
429
+ from sqlcg.core.kuzu_backend import KuzuBackend
430
+
431
+ if opener is None:
432
+ opener = KuzuBackend
433
+
434
+ current = _tools._backend
435
+ if current is not None:
436
+ try:
437
+ current.close()
438
+ except Exception:
439
+ pass
440
+ _tools._backend = None
441
+
442
+ if shutdown_requested is not None and shutdown_requested.is_set():
443
+ # B2: shutdown in progress — do not reopen RO; leave _backend = None
444
+ # so shutdown_backend() finds a clean state.
445
+ logger.debug("de_escalate_to_ro: shutdown requested — skipping RO reopen")
446
+ return
447
+
448
+ try:
449
+ ro = opener(db_path, read_only=True)
450
+ _tools._backend = ro
451
+ logger.debug(f"de-escalated to RO db_path={db_path!r}")
452
+ except Exception as exc:
453
+ logger.error(f"de_escalate_to_ro: failed to reopen RO: {exc}")
454
+
455
+
456
+ # ---------------------------------------------------------------------------
457
+ # Drain task
458
+ # ---------------------------------------------------------------------------
459
+
460
+
461
+ async def drain_loop(
462
+ queue: WriterQueue,
463
+ db_path: str,
464
+ backend_lock: anyio.Lock,
465
+ shutdown_requested: anyio.Event,
466
+ opener=None,
467
+ ) -> None:
468
+ """Consume WriterQueue requests one at a time under backend_lock.
469
+
470
+ This is the sole backend consumer — no other code path resolves or
471
+ touches the backend while a drain holds backend_lock.
472
+
473
+ The drain task:
474
+ 1. Waits on queue._wake.
475
+ 2. Pops the next request.
476
+ 3. Acquires backend_lock.
477
+ 4. Escalates RO→RW (escalate_to_rw).
478
+ 5. Runs the indexer op off the event-loop thread.
479
+ 6. De-escalates RW→RO in a finally (de_escalate_to_ro).
480
+ 7. Clears queue._active; records drain metrics.
481
+ 8. Repeats until shutdown_requested is set.
482
+
483
+ W7 — drain body exception: non-EscalationLockError exceptions are caught,
484
+ logged at ERROR, and the loop continues so one bad request cannot kill the
485
+ drain task.
486
+ """
487
+ import anyio.to_thread as _to_thread
488
+
489
+ from sqlcg.indexer.indexer import Indexer
490
+
491
+ while not shutdown_requested.is_set():
492
+ # Wait until there is work to do.
493
+ await queue._wake.wait()
494
+ if shutdown_requested.is_set():
495
+ break
496
+
497
+ req = await queue.pop_next()
498
+ if req is None:
499
+ continue
500
+
501
+ logger.info(f"drain started op={req.op} root={req.root!r}")
502
+ drain_start = time.monotonic()
503
+
504
+ async with backend_lock:
505
+ from pathlib import Path as _Path
506
+
507
+ import sqlcg.server.tools as _tools
508
+
509
+ try:
510
+ rw = escalate_to_rw(
511
+ db_path,
512
+ current=_tools._backend,
513
+ opener=opener,
514
+ )
515
+ except EscalationLockError as exc:
516
+ # C3: escalation failed — notify waiters and continue.
517
+ queue.mark_active_failed(str(exc))
518
+ for ch in req._waiters:
519
+ try:
520
+ await ch.send({"ok": False, "done": True, "error": str(exc)})
521
+ except Exception:
522
+ pass
523
+ continue
524
+
525
+ try:
526
+ indexer = Indexer()
527
+
528
+ if req.op == "index":
529
+ # Capture loop vars by value to satisfy B023.
530
+ _req = req
531
+ _rw = rw
532
+ _idx = indexer
533
+
534
+ def _do_index(_r=_req, _b=_rw, _q=queue, _P=_Path, _i=_idx) -> dict:
535
+ return _i.index_repo(
536
+ _P(_r.root),
537
+ _r.dialect,
538
+ _b,
539
+ progress_callback=_q.update_progress,
540
+ )
541
+
542
+ summary = await _to_thread.run_sync(_do_index)
543
+ elif req.op == "reindex":
544
+ # Resolve from_sha / to_sha at drain time (rule 2 + W3).
545
+ effective_from = req.from_sha
546
+ effective_to = req.to_sha
547
+
548
+ if effective_from is None:
549
+ # Standalone mode — resolve stored SHA.
550
+ try:
551
+ effective_from = rw.get_indexed_sha()
552
+ except Exception:
553
+ effective_from = None
554
+ if effective_from is None:
555
+ err = "no prior index — run 'sqlcg index <path>' first"
556
+ queue.mark_active_failed(err)
557
+ for ch in req._waiters:
558
+ try:
559
+ await ch.send({"ok": False, "done": True, "error": err})
560
+ except Exception:
561
+ pass
562
+ continue
563
+
564
+ if effective_to is None:
565
+ # Resolve current HEAD.
566
+ import subprocess
567
+
568
+ try:
569
+ result = subprocess.run(
570
+ ["git", "rev-parse", "HEAD"],
571
+ cwd=req.root,
572
+ capture_output=True,
573
+ text=True,
574
+ )
575
+ effective_to = result.stdout.strip() if result.returncode == 0 else None
576
+ except Exception:
577
+ effective_to = None
578
+
579
+ if effective_to is None:
580
+ err = "could not resolve HEAD SHA for reindex"
581
+ queue.mark_active_failed(err)
582
+ for ch in req._waiters:
583
+ try:
584
+ await ch.send({"ok": False, "done": True, "error": err})
585
+ except Exception:
586
+ pass
587
+ continue
588
+
589
+ # Capture loop vars by value to satisfy B023.
590
+ _req2 = req
591
+ _rw2 = rw
592
+ _ef = effective_from
593
+ _et = effective_to
594
+ _idx2 = indexer
595
+
596
+ def _do_reindex(_r=_req2, _b=_rw2, _f=_ef, _t=_et, _P=_Path, _i=_idx2) -> dict:
597
+ return _i.resync_changed(
598
+ _P(_r.root),
599
+ _f,
600
+ _t,
601
+ _b,
602
+ _r.dialect,
603
+ )
604
+
605
+ summary = await _to_thread.run_sync(_do_reindex)
606
+ else:
607
+ summary = {}
608
+
609
+ queue.mark_active_done(summary)
610
+ # Notify waiting clients with terminal frame.
611
+ for ch in req._waiters:
612
+ try:
613
+ await ch.send({"ok": True, "done": True, "summary": summary})
614
+ except Exception:
615
+ pass
616
+
617
+ except Exception as exc:
618
+ # W7 — non-escalation failure: clear active, relay terminal frame.
619
+ err_str = str(exc)
620
+ logger.error(f"drain body raised: {err_str}")
621
+ queue.mark_active_failed(err_str)
622
+ for ch in req._waiters:
623
+ try:
624
+ await ch.send({"ok": False, "done": True, "error": err_str})
625
+ except Exception:
626
+ pass
627
+ finally:
628
+ de_escalate_to_ro(
629
+ db_path,
630
+ shutdown_requested=shutdown_requested,
631
+ opener=opener,
632
+ )
633
+ duration_ms = (time.monotonic() - drain_start) * 1000
634
+ queue.record_drained(req.op, duration_ms)