sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/server/writer.py ADDED
@@ -0,0 +1,459 @@
1
+ """Single-writer queue and drain task for the MCP server.
2
+
3
+ DuckDB runs with a single R/W handle for the server process lifetime.
4
+ The drain task acquires backend_lock and runs the indexer op directly against
5
+ the live backend — no RO→RW escalation needed. Atomic visibility is provided
6
+ by DuckDB's transaction (BEGIN … COMMIT): readers on MVCC snapshots see the
7
+ old graph until COMMIT, then atomically the new one.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from typing import TYPE_CHECKING, Literal
15
+
16
+ from sqlcg.utils.logging import getLogger
17
+
18
+ if TYPE_CHECKING:
19
+ import anyio
20
+
21
+ from sqlcg.metrics.store import MetricsStore
22
+
23
+ logger = getLogger(__name__)
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Coalesce reason constants — single source of truth for status, logs, metrics
27
+ # ---------------------------------------------------------------------------
28
+
29
+ COALESCE_SUPERSEDED_BY_INDEX = "superseded_by_index"
30
+ COALESCE_COLLAPSED_INTO_PENDING_REINDEX = "collapsed_into_pending_reindex"
31
+ COALESCE_REINDEX_DROPPED_INDEX_PENDING = "reindex_dropped_index_pending"
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Data models
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ @dataclass
40
+ class WriterRequest:
41
+ """A single enqueued write request."""
42
+
43
+ op: Literal["index", "reindex"]
44
+ root: str
45
+ dialect: str | None
46
+ from_sha: str | None
47
+ to_sha: str | None
48
+ requested_by: str # "cli" or "hook"
49
+ queued_at: float = field(default_factory=time.time)
50
+ # Subscribers waiting for this request to complete (for wait=true clients).
51
+ # Each entry is a (send_channel, ) pair written by the drain task.
52
+ _waiters: list = field(default_factory=list, repr=False, compare=False)
53
+
54
+
55
+ class WriterQueue:
56
+ """Serialised, coalescing write request queue for the single-writer model.
57
+
58
+ Coalescing rules (enforced at enqueue time under ``_lock``):
59
+
60
+ 1. A full ``index`` enqueue drops ALL pending requests (it supersedes).
61
+ 2. A ``reindex`` arriving while another ``reindex`` is pending collapses
62
+ into that pending request (the pending one executes against HEAD at
63
+ drain time).
64
+ 3. A ``reindex`` arriving while a ``index`` is pending is dropped (the
65
+ pending ``index`` subsumes it).
66
+ 4. Write lock is held only while draining — enforced by the drain task.
67
+
68
+ The *active* request (currently draining) is never mutated by coalescing.
69
+ """
70
+
71
+ def __init__(self, metrics: MetricsStore | None = None) -> None:
72
+ import anyio
73
+
74
+ self._pending: list[WriterRequest] = []
75
+ self._active: WriterRequest | None = None
76
+ self._active_progress: dict = {}
77
+ self._coalesced: dict[str, int] = {
78
+ COALESCE_SUPERSEDED_BY_INDEX: 0,
79
+ COALESCE_COLLAPSED_INTO_PENDING_REINDEX: 0,
80
+ COALESCE_REINDEX_DROPPED_INDEX_PENDING: 0,
81
+ }
82
+ self._last_coalesce_at: float | None = None
83
+ self._last_coalesce_reason: str | None = None
84
+ self._lock: anyio.Lock = anyio.Lock()
85
+ self._wake: anyio.Event = anyio.Event()
86
+ self._metrics = metrics
87
+
88
+ async def enqueue(self, req: WriterRequest) -> int:
89
+ """Enqueue *req* and return its position (1-indexed, after coalescing).
90
+
91
+ Applies coalescing rules 1–3. Returns the pending-queue position
92
+ (1 = next to drain) so callers can report it in ``{queued: true}``.
93
+ """
94
+ async with self._lock:
95
+ if req.op == "index":
96
+ # Rule 1: full index supersedes all pending requests.
97
+ n_superseded = len(self._pending)
98
+ if n_superseded > 0:
99
+ # Collect waiters from superseded requests to relay terminal frame
100
+ superseded_waiters: list = []
101
+ for old_req in self._pending:
102
+ superseded_waiters.extend(old_req._waiters)
103
+ self._pending.clear()
104
+ for _ in range(n_superseded):
105
+ self._record_coalesce(COALESCE_SUPERSEDED_BY_INDEX)
106
+ # Notify superseded waiters that their request was coalesced away
107
+ for ch in superseded_waiters:
108
+ try:
109
+ await ch.send(
110
+ {
111
+ "ok": True,
112
+ "done": True,
113
+ "coalesced": True,
114
+ "summary": {"note": "superseded by a newer full index"},
115
+ }
116
+ )
117
+ except Exception:
118
+ pass
119
+ self._pending.append(req)
120
+ elif req.op == "reindex":
121
+ # Rule 3: reindex behind a pending index → drop.
122
+ if any(r.op == "index" for r in self._pending):
123
+ self._record_coalesce(COALESCE_REINDEX_DROPPED_INDEX_PENDING)
124
+ # Notify waiter immediately
125
+ for ch in req._waiters:
126
+ try:
127
+ await ch.send(
128
+ {
129
+ "ok": True,
130
+ "done": True,
131
+ "coalesced": True,
132
+ "summary": {"note": "dropped — full index already pending"},
133
+ }
134
+ )
135
+ except Exception:
136
+ pass
137
+ return len(self._pending)
138
+ # Rule 2: reindex collapses into existing pending reindex.
139
+ existing = next((r for r in self._pending if r.op == "reindex"), None)
140
+ if existing is not None:
141
+ self._record_coalesce(COALESCE_COLLAPSED_INTO_PENDING_REINDEX)
142
+ # Transfer waiters to the existing request
143
+ existing._waiters.extend(req._waiters)
144
+ return len(self._pending)
145
+ self._pending.append(req)
146
+ else:
147
+ self._pending.append(req)
148
+
149
+ # Wake the drain task
150
+ if not self._wake.is_set():
151
+ self._wake.set()
152
+
153
+ logger.info(
154
+ f"enqueued op={req.op} root={req.root!r} by={req.requested_by} "
155
+ f"position={len(self._pending)}"
156
+ )
157
+ self._record_enqueue(req, len(self._pending))
158
+ return len(self._pending)
159
+
160
+ async def pop_next(self) -> WriterRequest | None:
161
+ """Pop the next pending request (or None if empty).
162
+
163
+ Resets the wake Event so the drain can wait again.
164
+ """
165
+ async with self._lock:
166
+ if not self._pending:
167
+ self._wake = __import__("anyio").Event()
168
+ return None
169
+ req = self._pending.pop(0)
170
+ self._active = req
171
+ self._active_progress = {
172
+ "state": "running",
173
+ "op": req.op,
174
+ "started_at": time.time(),
175
+ "files_done": 0,
176
+ "files_total": None,
177
+ }
178
+ if not self._pending:
179
+ self._wake = __import__("anyio").Event()
180
+ return req
181
+
182
+ def mark_active_done(self, summary: dict) -> None:
183
+ """Called by the drain task after a successful drain."""
184
+ self._active = None
185
+ self._active_progress = {
186
+ "state": "done",
187
+ "finished_at": time.time(),
188
+ "summary": summary,
189
+ }
190
+
191
+ def mark_active_failed(self, error: str) -> None:
192
+ """Called by the drain task when the indexer body raises."""
193
+ self._active = None
194
+ self._active_progress = {
195
+ "state": "failed",
196
+ "error": error,
197
+ "finished_at": time.time(),
198
+ }
199
+
200
+ def update_progress(self, files_done: int, files_total: int) -> None:
201
+ """Update the active drain's file-level progress."""
202
+ if self._active_progress.get("state") == "running":
203
+ self._active_progress["files_done"] = files_done
204
+ self._active_progress["files_total"] = files_total
205
+
206
+ def coalesce_view(self) -> dict:
207
+ """Return a consistent snapshot of queue state for status responses.
208
+
209
+ Synchronous — reads are safe without the async lock because mutations
210
+ only add to counts; no torn reads on int fields in CPython.
211
+ """
212
+ total = sum(self._coalesced.values())
213
+ return {
214
+ "active": (
215
+ {
216
+ "op": self._active.op,
217
+ "root": self._active.root,
218
+ "requested_by": self._active.requested_by,
219
+ "queued_at": self._active.queued_at,
220
+ }
221
+ if self._active is not None
222
+ else None
223
+ ),
224
+ "active_progress": dict(self._active_progress),
225
+ "pending": [
226
+ {
227
+ "op": r.op,
228
+ "root": r.root,
229
+ "requested_by": r.requested_by,
230
+ "queued_at": r.queued_at,
231
+ }
232
+ for r in self._pending
233
+ ],
234
+ "coalesced_since_start": total,
235
+ "coalesced_by_reason": dict(self._coalesced),
236
+ "last_coalesce_at": self._last_coalesce_at,
237
+ "last_coalesce_reason": self._last_coalesce_reason,
238
+ }
239
+
240
+ def _record_coalesce(self, reason: str) -> None:
241
+ """Bump per-reason coalesce counter and emit lifecycle log + metrics."""
242
+ self._coalesced[reason] = self._coalesced.get(reason, 0) + 1
243
+ self._last_coalesce_at = time.time()
244
+ self._last_coalesce_reason = reason
245
+ logger.info(f"coalesced (reason={reason})")
246
+ if self._metrics is not None:
247
+ try:
248
+ self._metrics.record_queue_event(
249
+ "coalesced",
250
+ reason=reason,
251
+ queue_depth=len(self._pending),
252
+ )
253
+ except Exception:
254
+ pass
255
+
256
+ def _record_enqueue(self, req: WriterRequest, depth: int) -> None:
257
+ """Record an enqueue event to MetricsStore (best-effort)."""
258
+ if self._metrics is not None:
259
+ try:
260
+ self._metrics.record_queue_event(
261
+ "enqueued",
262
+ op=req.op,
263
+ queue_depth=depth,
264
+ )
265
+ except Exception:
266
+ pass
267
+
268
+ def record_drained(self, op: str, duration_ms: float) -> None:
269
+ """Record a completed drain to MetricsStore (best-effort)."""
270
+ logger.info(f"drained op={op} duration_ms={duration_ms:.1f}")
271
+ if self._metrics is not None:
272
+ try:
273
+ self._metrics.record_queue_event(
274
+ "drained",
275
+ op=op,
276
+ duration_ms=duration_ms,
277
+ )
278
+ except Exception:
279
+ pass
280
+
281
+
282
+ # ---------------------------------------------------------------------------
283
+ # Drain task
284
+ # ---------------------------------------------------------------------------
285
+
286
+
287
+ async def drain_loop(
288
+ queue: WriterQueue,
289
+ db_path: str, # kept for call-site compatibility; not used (backend from tools._backend)
290
+ backend_lock: anyio.Lock,
291
+ shutdown_requested: anyio.Event,
292
+ opener=None, # kept for call-site compatibility; not used (no escalation)
293
+ ) -> None:
294
+ """Consume WriterQueue requests one at a time under backend_lock.
295
+
296
+ DuckDB model: the server holds one R/W connection for its lifetime.
297
+ The drain task uses tools._backend directly — no RO→RW escalation.
298
+ Each full rebuild wraps all table-clearing + bulk inserts in a single
299
+ transaction so MVCC readers see the old graph until COMMIT (C3).
300
+
301
+ The drain task:
302
+ 1. Waits on queue._wake.
303
+ 2. Pops the next request.
304
+ 3. Acquires backend_lock.
305
+ 4. Gets the current R/W backend from tools._backend.
306
+ 5. Runs the indexer op off the event-loop thread.
307
+ 6. Clears queue._active; records drain metrics.
308
+ 7. Repeats until shutdown_requested is set.
309
+
310
+ W7 — drain body exception: exceptions are caught, logged at ERROR, and
311
+ the loop continues so one bad request cannot kill the drain task.
312
+ """
313
+ import anyio.to_thread as _to_thread
314
+
315
+ from sqlcg.indexer.indexer import Indexer
316
+
317
+ while not shutdown_requested.is_set():
318
+ # Wait until there is work to do.
319
+ await queue._wake.wait()
320
+ if shutdown_requested.is_set():
321
+ break
322
+
323
+ req = await queue.pop_next()
324
+ if req is None:
325
+ continue
326
+
327
+ logger.info(f"drain started op={req.op} root={req.root!r}")
328
+ drain_start = time.monotonic()
329
+
330
+ async with backend_lock:
331
+ from pathlib import Path as _Path
332
+
333
+ import sqlcg.server.tools as _tools
334
+
335
+ db = _tools._backend
336
+ if db is None:
337
+ err = "backend not available — skipping drain"
338
+ logger.error(err)
339
+ queue.mark_active_failed(err)
340
+ for ch in req._waiters:
341
+ try:
342
+ await ch.send({"ok": False, "done": True, "error": err})
343
+ except Exception:
344
+ pass
345
+ continue
346
+
347
+ try:
348
+ indexer = Indexer()
349
+
350
+ if req.op == "index":
351
+ # Capture loop vars by value to satisfy B023.
352
+ _req = req
353
+ _db = db
354
+ _idx = indexer
355
+
356
+ def _do_index(_r=_req, _b=_db, _q=queue, _P=_Path, _i=_idx) -> dict:
357
+ # Phase 4 (C3/C6): full rebuild in ONE transaction —
358
+ # clear all graph tables, then re-index. Readers on MVCC
359
+ # snapshots see the old graph until COMMIT flips it
360
+ # atomically; a mid-rebuild raise rolls back to the old
361
+ # graph. transaction() is reentrant, so index_repo's
362
+ # internal per-batch transactions join this outer one.
363
+ with _b.transaction():
364
+ _b.clear_all_tables()
365
+ return _i.index_repo(
366
+ _P(_r.root),
367
+ _r.dialect,
368
+ _b,
369
+ progress_callback=_q.update_progress,
370
+ )
371
+
372
+ summary = await _to_thread.run_sync(_do_index)
373
+ elif req.op == "reindex":
374
+ # Resolve from_sha / to_sha at drain time (rule 2 + W3).
375
+ effective_from = req.from_sha
376
+ effective_to = req.to_sha
377
+
378
+ if effective_from is None:
379
+ # Standalone mode — resolve stored SHA.
380
+ try:
381
+ effective_from = db.get_indexed_sha()
382
+ except Exception:
383
+ effective_from = None
384
+ if effective_from is None:
385
+ err = "no prior index — run 'sqlcg index <path>' first"
386
+ queue.mark_active_failed(err)
387
+ for ch in req._waiters:
388
+ try:
389
+ await ch.send({"ok": False, "done": True, "error": err})
390
+ except Exception:
391
+ pass
392
+ continue
393
+
394
+ if effective_to is None:
395
+ # Resolve current HEAD.
396
+ import subprocess
397
+
398
+ try:
399
+ result = subprocess.run(
400
+ ["git", "rev-parse", "HEAD"],
401
+ cwd=req.root,
402
+ capture_output=True,
403
+ text=True,
404
+ )
405
+ effective_to = result.stdout.strip() if result.returncode == 0 else None
406
+ except Exception:
407
+ effective_to = None
408
+
409
+ if effective_to is None:
410
+ err = "could not resolve HEAD SHA for reindex"
411
+ queue.mark_active_failed(err)
412
+ for ch in req._waiters:
413
+ try:
414
+ await ch.send({"ok": False, "done": True, "error": err})
415
+ except Exception:
416
+ pass
417
+ continue
418
+
419
+ # Capture loop vars by value to satisfy B023.
420
+ _req2 = req
421
+ _db2 = db
422
+ _ef = effective_from
423
+ _et = effective_to
424
+ _idx2 = indexer
425
+
426
+ def _do_reindex(_r=_req2, _b=_db2, _f=_ef, _t=_et, _P=_Path, _i=_idx2) -> dict:
427
+ return _i.resync_changed(
428
+ _P(_r.root),
429
+ _f,
430
+ _t,
431
+ _b,
432
+ _r.dialect,
433
+ )
434
+
435
+ summary = await _to_thread.run_sync(_do_reindex)
436
+ else:
437
+ summary = {}
438
+
439
+ queue.mark_active_done(summary)
440
+ # Notify waiting clients with terminal frame.
441
+ for ch in req._waiters:
442
+ try:
443
+ await ch.send({"ok": True, "done": True, "summary": summary})
444
+ except Exception:
445
+ pass
446
+
447
+ except Exception as exc:
448
+ # W7 — failure: clear active, relay terminal frame.
449
+ err_str = str(exc)
450
+ logger.error(f"drain body raised: {err_str}")
451
+ queue.mark_active_failed(err_str)
452
+ for ch in req._waiters:
453
+ try:
454
+ await ch.send({"ok": False, "done": True, "error": err_str})
455
+ except Exception:
456
+ pass
457
+ finally:
458
+ duration_ms = (time.monotonic() - drain_start) * 1000
459
+ queue.record_drained(req.op, duration_ms)