vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
vigil_mcp/_jobs.py ADDED
@@ -0,0 +1,587 @@
1
+ """Background job registry for vigil_mcp servers.
2
+
3
+ Design constraints (the user's machine hangs under heavy parallel runs):
4
+ - At most MAX_CONCURRENT jobs running at any time (hard cap = 2).
5
+ - Each job runs in exactly ONE threading.Thread — no thread pools.
6
+ - Jobs are cancellable via a threading.Event that the worker can poll.
7
+ - A per-job wall-clock timeout (default 600 s) caps runaway jobs automatically.
8
+ - Results are stored in-process AND, when a job carries a ``project_dir``,
9
+ persisted to disk so a completed/failed/cancelled job's result survives an
10
+ MCP server restart.
11
+
12
+ Disk-backed persistence (G2.3)
13
+ ------------------------------
14
+ When ``start(...)`` is given a ``project_dir`` (the path passed to the
15
+ ``start_*`` MCP tools), the job's *terminal* record + result is written to::
16
+
17
+ <project_dir>/.cortex/cortex_jobs/<job_id>.json
18
+
19
+ via an atomic ``tempfile.mkstemp`` + ``os.replace`` under a per-job
20
+ ``filelock.FileLock`` — exactly the pattern vigil_mapper uses in
21
+ ``map_storage._atomic_write_json``. A *fresh* ``JobRegistry`` instance (a new
22
+ process = simulated restart) resolves a prior run's job from disk on lookup,
23
+ because its in-memory dict is empty.
24
+
25
+ Restart / interrupted semantics
26
+ -------------------------------
27
+ * Terminal records (``done`` / ``error`` / ``cancelled``) load back verbatim,
28
+ so ``status`` / ``result`` return the prior run's outcome after a restart.
29
+ * A record left on disk in the ``running`` state means the owning process died
30
+ mid-flight. The worker thread is gone and CANNOT be resumed, so on load such
31
+ a record is surfaced as ``interrupted`` — it is never reported as ``done``.
32
+
33
+ Resolution & cross-project rule
34
+ --------------------------------
35
+ * ``status(job_id)`` / ``result(job_id)`` first check memory. On a miss they
36
+ read the job file lazily *by id* (bounded: one ``Path.exists`` + one read,
37
+ never a full directory scan).
38
+ * Job files live under their OWN project's ``cortex_jobs`` dir. A small global
39
+ index (``job_id -> project_dir``, under the user state dir) lets the servers —
40
+ which only pass a ``job_id`` — locate the owning project after a restart.
41
+ * Passing an explicit ``project_dir=`` to ``status`` / ``result`` SCOPES the
42
+ lookup to that project only (the global index is ignored). Hence a job that
43
+ ran under project X is *not* visible when resolved scoped to project Y — its
44
+ file simply is not under Y. Resolving scoped to X (or by id via the index)
45
+ finds it.
46
+ """
47
+ from __future__ import annotations
48
+
49
+ import json
50
+ import os
51
+ import tempfile
52
+ import threading
53
+ import time
54
+ import traceback
55
+ import uuid
56
+ from pathlib import Path
57
+ from typing import Any, Callable
58
+
59
+ MAX_CONCURRENT = 2 # Hard cap; callers get "busy" status when exceeded.
60
+
61
+ # Default wall-clock timeout for a single job (seconds).
62
+ DEFAULT_TIMEOUT_S: int = 600
63
+
64
+ # Job status values
65
+ STATUS_RUNNING = "running"
66
+ STATUS_DONE = "done"
67
+ STATUS_ERROR = "error"
68
+ STATUS_CANCELLED = "cancelled"
69
+ STATUS_TIMEOUT = "timeout"
70
+ # Loaded from disk for a job whose process died while it was still running.
71
+ STATUS_INTERRUPTED = "interrupted"
72
+
73
+ # Statuses that are final and worth persisting to disk. ``running`` is also
74
+ # persisted (so a death mid-flight is detectable as ``interrupted`` on reload)
75
+ # but is NOT terminal.
76
+ _TERMINAL_STATUSES = frozenset(
77
+ {STATUS_DONE, STATUS_ERROR, STATUS_CANCELLED, STATUS_TIMEOUT}
78
+ )
79
+
80
+ # On-disk layout (mirrors vigil_mapper's <project>/.cortex/maps/).
81
+ _JOBS_SUBDIR = (".cortex", "cortex_jobs")
82
+
83
+ # Schema marker for forward-compat.
84
+ _SCHEMA_VERSION = "1.0.0"
85
+
86
+ # FileLock acquire timeout (seconds) — matches map_storage's 10 s.
87
+ _LOCK_TIMEOUT_S = 10
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Disk path helpers
92
+ # ---------------------------------------------------------------------------
93
+
94
+ def jobs_dir(project_dir: Path | str) -> Path:
95
+ """Per-project job directory: ``<project_dir>/.cortex/cortex_jobs/``."""
96
+ return Path(project_dir).joinpath(*_JOBS_SUBDIR)
97
+
98
+
99
+ def _job_file(project_dir: Path | str, job_id: str) -> Path:
100
+ return jobs_dir(project_dir) / f"{job_id}.json"
101
+
102
+
103
+ def _index_dir() -> Path:
104
+ """Global ``job_id -> project_dir`` index root (survives restart).
105
+
106
+ Lives under the user state dir so the MCP servers, which look a job up by
107
+ ``job_id`` alone, can find the owning project after a process restart.
108
+ Falls back to the OS temp dir if the home directory is unavailable.
109
+ """
110
+ try:
111
+ root = Path.home()
112
+ except (OSError, RuntimeError):
113
+ root = Path(tempfile.gettempdir())
114
+ return root / ".cortex" / "cortex_jobs_index"
115
+
116
+
117
+ def _index_file(job_id: str) -> Path:
118
+ return _index_dir() / f"{job_id}.json"
119
+
120
+
121
+ # ---------------------------------------------------------------------------
122
+ # Atomic JSON write (mirrors vigil_mapper.map_storage._atomic_write_json)
123
+ # ---------------------------------------------------------------------------
124
+
125
+ def _atomic_write_json(path: Path, payload: dict) -> None:
126
+ """Write *payload* to *path* atomically via tempfile + os.replace.
127
+
128
+ Cross-platform: ``os.replace`` is atomic on POSIX and Windows. A partial
129
+ write can only ever land in the temp file, which is removed on error, so a
130
+ reader never observes a half-written target.
131
+ """
132
+ path.parent.mkdir(parents=True, exist_ok=True)
133
+ fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), prefix=".job_", suffix=".tmp")
134
+ try:
135
+ with os.fdopen(fd, "w", encoding="utf-8") as fh:
136
+ fh.write(json.dumps(payload, indent=2, ensure_ascii=False, sort_keys=True))
137
+ fh.write("\n")
138
+ os.replace(tmp_path, str(path))
139
+ except BaseException:
140
+ try:
141
+ os.unlink(tmp_path)
142
+ except OSError:
143
+ pass
144
+ raise
145
+
146
+
147
+ def _write_with_lock(path: Path, payload: dict) -> None:
148
+ """Atomic write under a per-file FileLock (best-effort if filelock absent)."""
149
+ path.parent.mkdir(parents=True, exist_ok=True)
150
+ lock_path = path.with_suffix(".lock")
151
+ try:
152
+ from filelock import FileLock, Timeout as FileLockTimeout
153
+ except ImportError:
154
+ # filelock is a declared dependency; if somehow missing, the tmp+replace
155
+ # write is still atomic per-writer — degrade rather than crash the job.
156
+ _atomic_write_json(path, payload)
157
+ return
158
+ try:
159
+ with FileLock(str(lock_path), timeout=_LOCK_TIMEOUT_S):
160
+ _atomic_write_json(path, payload)
161
+ except FileLockTimeout:
162
+ # Another writer holds the lock; fall back to a direct atomic write
163
+ # rather than losing the result entirely. os.replace is still atomic.
164
+ _atomic_write_json(path, payload)
165
+
166
+
167
+ def _read_json_quiet(path: Path) -> dict | None:
168
+ """Read a JSON object from *path*; return None on absent / corrupt / empty.
169
+
170
+ Never raises for a bad file — a truncated or partially-written record is
171
+ treated as "no usable record here" so a lookup degrades to ``not_found``.
172
+ """
173
+ try:
174
+ if not path.exists():
175
+ return None
176
+ raw = path.read_text(encoding="utf-8")
177
+ except OSError:
178
+ return None
179
+ if not raw.strip():
180
+ return None
181
+ try:
182
+ data = json.loads(raw)
183
+ except (json.JSONDecodeError, ValueError):
184
+ return None
185
+ return data if isinstance(data, dict) else None
186
+
187
+
188
+ # ---------------------------------------------------------------------------
189
+ # Record (de)serialisation
190
+ # ---------------------------------------------------------------------------
191
+
192
+ def _now_iso() -> str:
193
+ return time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime())
194
+
195
+
196
+ def _record_to_status_dict(rec: dict) -> dict:
197
+ """Project a loaded on-disk record into a status dict.
198
+
199
+ A persisted ``running`` record means the process died mid-flight → the job
200
+ is surfaced as ``interrupted`` (its thread cannot be resumed).
201
+ """
202
+ status = rec.get("status")
203
+ if status == STATUS_RUNNING:
204
+ status = STATUS_INTERRUPTED
205
+ return {"job_id": rec.get("job_id"), "status": status}
206
+
207
+
208
+ def _record_to_result_dict(rec: dict) -> dict:
209
+ status = rec.get("status")
210
+ if status == STATUS_RUNNING:
211
+ status = STATUS_INTERRUPTED
212
+ return {
213
+ "job_id": rec.get("job_id"),
214
+ "status": status,
215
+ "result": rec.get("result"),
216
+ "error": rec.get("error"),
217
+ }
218
+
219
+
220
+ class _Job:
221
+ __slots__ = (
222
+ "job_id", "status", "result", "error",
223
+ "thread", "cancel_event", "_lock", "project_dir",
224
+ "submitted_at", "_persist_lock",
225
+ )
226
+
227
+ def __init__(self, job_id: str, project_dir: str | None = None) -> None:
228
+ self.job_id: str = job_id
229
+ self.status: str = STATUS_RUNNING
230
+ self.result: Any = None
231
+ self.error: str | None = None
232
+ self.cancel_event: threading.Event = threading.Event()
233
+ self._lock: threading.Lock = threading.Lock()
234
+ # Serialises the two persisters (start()'s RUNNING write and the
235
+ # worker/timeout terminal write) so the terminal record can never be
236
+ # clobbered by a late RUNNING write — see JobRegistry._persist.
237
+ self._persist_lock: threading.Lock = threading.Lock()
238
+ self.thread: threading.Thread | None = None
239
+ self.project_dir: str | None = project_dir
240
+ self.submitted_at: str = _now_iso()
241
+
242
+ def to_status_dict(self) -> dict:
243
+ with self._lock:
244
+ return {"job_id": self.job_id, "status": self.status}
245
+
246
+ def to_result_dict(self) -> dict:
247
+ with self._lock:
248
+ return {
249
+ "job_id": self.job_id,
250
+ "status": self.status,
251
+ "result": self.result,
252
+ "error": self.error,
253
+ }
254
+
255
+ def to_record(self) -> dict:
256
+ """Serialise to a persistable record (caller holds self._lock)."""
257
+ completed = self.status != STATUS_RUNNING
258
+ return {
259
+ "schema_version": _SCHEMA_VERSION,
260
+ "job_id": self.job_id,
261
+ "project_dir": self.project_dir or "",
262
+ "status": self.status,
263
+ "result": self.result,
264
+ "error": self.error,
265
+ "submitted_at": self.submitted_at,
266
+ "completed_at": _now_iso() if completed else "",
267
+ }
268
+
269
+
270
+ class JobRegistry:
271
+ """Thread-safe registry of background jobs with optional disk persistence.
272
+
273
+ Usage::
274
+
275
+ registry = JobRegistry()
276
+ job_id = registry.start(my_fn, arg1, kw=val, project_dir="/path/proj")
277
+ registry.status(job_id) # -> {"job_id": ..., "status": "running"}
278
+ registry.cancel(job_id) # -> {"job_id": ..., "cancelled": True/False}
279
+ registry.result(job_id) # -> {"job_id": ..., "status": "done", "result": ...}
280
+
281
+ Persistence is engaged only when a ``project_dir`` is supplied to
282
+ ``start``; without it the registry behaves exactly as before (in-memory
283
+ only). See the module docstring for restart / interrupted / cross-project
284
+ semantics.
285
+ """
286
+
287
+ def __init__(self, max_concurrent: int = MAX_CONCURRENT) -> None:
288
+ self._max_concurrent = max_concurrent
289
+ self._jobs: dict[str, _Job] = {}
290
+ self._lock = threading.Lock()
291
+
292
+ # ------------------------------------------------------------------
293
+ # Helpers
294
+ # ------------------------------------------------------------------
295
+
296
+ def _running_count(self) -> int:
297
+ return sum(1 for j in self._jobs.values() if j.status == STATUS_RUNNING)
298
+
299
+ def _persist(self, job: _Job) -> None:
300
+ """Persist *job*'s current record to disk (no-op without project_dir).
301
+
302
+ Terminal precedence: a non-terminal (``running``) write must never
303
+ overwrite an already-terminal on-disk record. Because the initial
304
+ RUNNING persist (from ``start``) and the terminal persist (from the
305
+ worker / timeout watcher) race — a fast job can reach ``done`` before
306
+ the synchronous RUNNING write finishes its I/O — both go through this
307
+ method under the job's ``_persist_lock``, and a RUNNING snapshot yields
308
+ to a terminal record found on disk.
309
+
310
+ Never lets a persistence failure break job execution — a job that ran
311
+ successfully must still report success even if the disk write fails.
312
+ """
313
+ project_dir = job.project_dir
314
+ if not project_dir:
315
+ return
316
+ path = _job_file(project_dir, job.job_id)
317
+ with job._persist_lock:
318
+ with job._lock:
319
+ record = job.to_record()
320
+ # If we are about to write a non-terminal record, defer to any
321
+ # terminal record already on disk (the job finished first).
322
+ if record.get("status") not in _TERMINAL_STATUSES:
323
+ existing = _read_json_quiet(path)
324
+ if existing and existing.get("status") in _TERMINAL_STATUSES:
325
+ return
326
+ try:
327
+ _write_with_lock(path, record)
328
+ # Global job_id -> project_dir index for by-id lookups.
329
+ _write_with_lock(_index_file(job.job_id), {"project_dir": project_dir})
330
+ except Exception:
331
+ # Fail-soft on persistence: the in-memory result is still valid.
332
+ pass
333
+
334
+ def _finish(
335
+ self,
336
+ job: _Job,
337
+ status: str,
338
+ *,
339
+ result: Any = None,
340
+ error: str | None = None,
341
+ ) -> None:
342
+ """Move *job* to a terminal *status*, writing disk BEFORE publishing.
343
+
344
+ Disk-before-memory ordering guarantees an external reader (a fresh
345
+ registry reading the file) is never *behind* a same-process reader that
346
+ already saw the terminal status in memory: the moment ``status``/
347
+ ``result`` report a terminal state, the on-disk record is at least as
348
+ advanced. Pre-emption is respected — a job already moved to
349
+ ``cancelled`` (by ``cancel``) or ``timeout`` is left untouched.
350
+ """
351
+ with job._lock:
352
+ if job.status in (STATUS_CANCELLED, STATUS_TIMEOUT):
353
+ return # pre-empted; the pre-emptor owns the terminal record
354
+ if job.status != STATUS_RUNNING and status != STATUS_TIMEOUT:
355
+ return # already terminal via another path
356
+ # Stage the terminal fields on the job so to_record() serialises
357
+ # them, but DO NOT publish the status to readers yet.
358
+ job.result = result
359
+ job.error = error
360
+ staged_status = status
361
+
362
+ # Build + write the terminal record to disk first (status forced
363
+ # terminal regardless of the not-yet-published in-memory status).
364
+ if job.project_dir:
365
+ with job._persist_lock:
366
+ with job._lock:
367
+ record = job.to_record()
368
+ record["status"] = staged_status
369
+ record["completed_at"] = _now_iso()
370
+ try:
371
+ _write_with_lock(_job_file(job.project_dir, job.job_id), record)
372
+ _write_with_lock(_index_file(job.job_id), {"project_dir": job.project_dir})
373
+ except Exception:
374
+ pass # fail-soft: in-memory result remains valid
375
+
376
+ # Now publish the terminal status in memory.
377
+ with job._lock:
378
+ if job.status in (STATUS_CANCELLED, STATUS_TIMEOUT) and staged_status not in (
379
+ STATUS_CANCELLED, STATUS_TIMEOUT
380
+ ):
381
+ return
382
+ job.status = staged_status
383
+
384
+ def _resolve_from_disk(self, job_id: str, project_dir: str | None) -> dict | None:
385
+ """Load a job record from disk by id. Returns the record dict or None.
386
+
387
+ * ``project_dir`` given -> read only that project's file (scoped;
388
+ enforces cross-project isolation).
389
+ * ``project_dir`` None -> consult the global index to find the owning
390
+ project, then read its file (by-id lookup, used by the servers).
391
+ Bounded: at most a couple of file reads, never a directory walk.
392
+ """
393
+ if project_dir:
394
+ return _read_json_quiet(_job_file(project_dir, job_id))
395
+ idx = _read_json_quiet(_index_file(job_id))
396
+ if not idx:
397
+ return None
398
+ owning = idx.get("project_dir")
399
+ if not owning:
400
+ return None
401
+ return _read_json_quiet(_job_file(owning, job_id))
402
+
403
+ # ------------------------------------------------------------------
404
+ # Test seam
405
+ # ------------------------------------------------------------------
406
+
407
+ def _persist_running_record_for_test(self, project_dir: str) -> str:
408
+ """Persist a fresh record in the RUNNING state and return its job_id.
409
+
410
+ Simulates a job that began executing and was then killed before
411
+ reaching a terminal state (used by the running→interrupted test).
412
+ """
413
+ job = _Job(uuid.uuid4().hex, project_dir=project_dir)
414
+ with self._lock:
415
+ self._jobs[job.job_id] = job
416
+ self._persist(job) # status is RUNNING
417
+ return job.job_id
418
+
419
+ # ------------------------------------------------------------------
420
+ # Public API
421
+ # ------------------------------------------------------------------
422
+
423
+ def start(
424
+ self,
425
+ fn: Callable,
426
+ *args: Any,
427
+ timeout_s: int = DEFAULT_TIMEOUT_S,
428
+ project_dir: str | None = None,
429
+ **kwargs: Any,
430
+ ) -> dict:
431
+ """Start *fn* in a background thread and return {job_id, status}.
432
+
433
+ Returns {"status": "busy", "job_id": None} when the concurrent cap
434
+ is already reached — callers should retry later.
435
+
436
+ The cancel_event is injected as keyword argument ``cancel_event``
437
+ only if *fn* accepts it (checked via __code__.co_varnames).
438
+
439
+ Args:
440
+ fn: Callable to run in a background thread.
441
+ *args: Positional arguments forwarded to *fn*.
442
+ timeout_s: Wall-clock timeout in seconds. When the job runs
443
+ longer than this, its cancel_event is set and status
444
+ transitions to ``"timeout"``. Default: ``DEFAULT_TIMEOUT_S``
445
+ (600 s). Pass 0 to disable the timeout.
446
+ project_dir: When given, the job's terminal record + result are
447
+ persisted under ``<project_dir>/.cortex/cortex_jobs/`` so they
448
+ survive a process restart. When None, the job is in-memory
449
+ only (legacy behaviour).
450
+ **kwargs: Keyword arguments forwarded to *fn*.
451
+ """
452
+ with self._lock:
453
+ if self._running_count() >= self._max_concurrent:
454
+ return {"job_id": None, "status": "busy",
455
+ "message": f"server busy: max {self._max_concurrent} concurrent jobs reached"}
456
+
457
+ job_id = uuid.uuid4().hex
458
+ job = _Job(job_id, project_dir=project_dir)
459
+ self._jobs[job_id] = job
460
+
461
+ # Persist the initial RUNNING record so a death mid-flight is later
462
+ # detectable as ``interrupted`` (only when persistence is enabled).
463
+ self._persist(job)
464
+
465
+ # Decide whether to pass cancel_event to the wrapped function.
466
+ try:
467
+ varnames = fn.__code__.co_varnames
468
+ except AttributeError:
469
+ varnames = ()
470
+ inject_cancel = "cancel_event" in varnames
471
+
472
+ def _worker() -> None:
473
+ try:
474
+ if inject_cancel:
475
+ result = fn(*args, cancel_event=job.cancel_event, **kwargs)
476
+ else:
477
+ result = fn(*args, **kwargs)
478
+ self._finish(job, STATUS_DONE, result=result)
479
+ except Exception:
480
+ self._finish(job, STATUS_ERROR, error=traceback.format_exc())
481
+
482
+ def _timeout_watcher() -> None:
483
+ """Wait timeout_s; if job is still running, cancel it."""
484
+ if timeout_s <= 0:
485
+ return
486
+ job.cancel_event.wait(timeout=timeout_s)
487
+ with job._lock:
488
+ still_running = job.status == STATUS_RUNNING
489
+ if still_running:
490
+ job.cancel_event.set()
491
+ if still_running:
492
+ self._finish(job, STATUS_TIMEOUT)
493
+
494
+ t = threading.Thread(target=_worker, daemon=True)
495
+ job.thread = t
496
+ t.start()
497
+
498
+ tw = threading.Thread(target=_timeout_watcher, daemon=True)
499
+ tw.start()
500
+
501
+ return {"job_id": job_id, "status": STATUS_RUNNING}
502
+
503
+ def status(self, job_id: str, project_dir: str | None = None) -> dict:
504
+ """Return current status dict or {"status": "not_found"} for unknown ids.
505
+
506
+ Falls back to disk when the job is not in memory (e.g. after a restart);
507
+ a persisted ``running`` record surfaces as ``interrupted``. Passing
508
+ ``project_dir`` scopes the disk lookup to that project only.
509
+ """
510
+ with self._lock:
511
+ job = self._jobs.get(job_id)
512
+ if job is not None:
513
+ return job.to_status_dict()
514
+ rec = self._resolve_from_disk(job_id, project_dir)
515
+ if rec is None:
516
+ return {"job_id": job_id, "status": "not_found"}
517
+ return _record_to_status_dict(rec)
518
+
519
+ def result(self, job_id: str, project_dir: str | None = None) -> dict:
520
+ """Return result dict. Status is still "running" if not yet done.
521
+
522
+ Falls back to disk when the job is not in memory; a persisted
523
+ ``running`` record surfaces as ``interrupted`` (result/None). Passing
524
+ ``project_dir`` scopes the disk lookup to that project only.
525
+ """
526
+ with self._lock:
527
+ job = self._jobs.get(job_id)
528
+ if job is not None:
529
+ return job.to_result_dict()
530
+ rec = self._resolve_from_disk(job_id, project_dir)
531
+ if rec is None:
532
+ return {"job_id": job_id, "status": "not_found", "result": None, "error": None}
533
+ return _record_to_result_dict(rec)
534
+
535
+ def cancel(self, job_id: str, project_dir: str | None = None) -> dict:
536
+ """Signal the job's cancel_event. Returns {job_id, cancelled: bool}.
537
+
538
+ A job only living on disk (after a restart) cannot be cancelled — its
539
+ thread is gone; it is reported with its persisted terminal/interrupted
540
+ state instead.
541
+ """
542
+ with self._lock:
543
+ job = self._jobs.get(job_id)
544
+ if job is None:
545
+ rec = self._resolve_from_disk(job_id, project_dir)
546
+ if rec is None:
547
+ return {"job_id": job_id, "cancelled": False, "reason": "not_found"}
548
+ state = _record_to_status_dict(rec)["status"]
549
+ return {"job_id": job_id, "cancelled": False,
550
+ "reason": f"job not in memory (persisted state: {state})"}
551
+ do_persist = False
552
+ with job._lock:
553
+ if job.status == STATUS_RUNNING:
554
+ job.cancel_event.set()
555
+ job.status = STATUS_CANCELLED
556
+ do_persist = True
557
+ if do_persist:
558
+ self._persist(job)
559
+ return {"job_id": job_id, "cancelled": True}
560
+ return {"job_id": job_id, "cancelled": False,
561
+ "reason": f"job already in terminal state: {job.status}"}
562
+
563
+
564
+ # Module-level singleton used by both servers.
565
+ _registry = JobRegistry()
566
+
567
+
568
+ def start(
569
+ fn: Callable,
570
+ *args: Any,
571
+ timeout_s: int = DEFAULT_TIMEOUT_S,
572
+ project_dir: str | None = None,
573
+ **kwargs: Any,
574
+ ) -> dict:
575
+ return _registry.start(fn, *args, timeout_s=timeout_s, project_dir=project_dir, **kwargs)
576
+
577
+
578
+ def status(job_id: str, project_dir: str | None = None) -> dict:
579
+ return _registry.status(job_id, project_dir=project_dir)
580
+
581
+
582
+ def result(job_id: str, project_dir: str | None = None) -> dict:
583
+ return _registry.result(job_id, project_dir=project_dir)
584
+
585
+
586
+ def cancel(job_id: str, project_dir: str | None = None) -> dict:
587
+ return _registry.cancel(job_id, project_dir=project_dir)
vigil_mcp/_paths.py ADDED
@@ -0,0 +1,93 @@
1
+ """Project-root resolution for vigil_mcp servers.
2
+
3
+ Used by the auto project-targeting feature: when a tool is called with an
4
+ empty/absent ``path`` the server walks up from a starting directory looking
5
+ for a project marker (``.git`` / ``pyproject.toml`` / ``package.json`` and a
6
+ few common siblings). If no marker is found it falls back to the starting
7
+ directory. ``None`` starts the walk from the current working directory.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ from pathlib import Path
13
+
14
+ # Markers that identify a project root, in no particular priority — the walk
15
+ # returns the *nearest* ancestor (starting dir first) that contains ANY marker.
16
+ _PROJECT_MARKERS: tuple[str, ...] = (
17
+ ".git",
18
+ "pyproject.toml",
19
+ "package.json",
20
+ "setup.py",
21
+ "setup.cfg",
22
+ "Cargo.toml",
23
+ "go.mod",
24
+ "pom.xml",
25
+ "build.gradle",
26
+ )
27
+
28
+
29
+ def _has_marker(directory: Path) -> bool:
30
+ """True if *directory* contains any recognised project marker."""
31
+ for marker in _PROJECT_MARKERS:
32
+ if (directory / marker).exists():
33
+ return True
34
+ return False
35
+
36
+
37
+ def _resolve_project_root(start: str | None) -> str:
38
+ """Resolve the project root by walking up from *start*.
39
+
40
+ Args:
41
+ start: Directory to begin the search from. When ``None`` (or empty)
42
+ the current working directory is used.
43
+
44
+ Returns:
45
+ Absolute path (as ``str``) of the nearest ancestor containing a
46
+ project marker, or — when no marker is found — the starting directory
47
+ itself.
48
+
49
+ Notes:
50
+ Never raises for a missing/odd ``start``; it falls back to ``cwd`` so
51
+ callers always get a usable directory string.
52
+ """
53
+ if not start:
54
+ start_path = Path(os.getcwd())
55
+ else:
56
+ start_path = Path(start)
57
+
58
+ # Resolve to an absolute path without requiring the path to exist.
59
+ try:
60
+ start_path = start_path.resolve()
61
+ except (OSError, RuntimeError):
62
+ start_path = Path(os.getcwd()).resolve()
63
+
64
+ # Boundary for the *upward* walk: never auto-adopt the user's home
65
+ # directory (or anything above it) as a project root — "audit my whole
66
+ # home folder" is never the intent. The start dir itself is exempt from
67
+ # this rule (an explicit project that happens to live at home is fine);
68
+ # the boundary only stops the ancestor search.
69
+ try:
70
+ home = Path.home().resolve()
71
+ except (OSError, RuntimeError):
72
+ home = None
73
+
74
+ # The start dir itself always wins if it carries a marker (covers
75
+ # "find in current dir" and "prefer dir that holds both markers").
76
+ if _has_marker(start_path):
77
+ return str(start_path)
78
+
79
+ candidate = start_path
80
+ while True:
81
+ parent = candidate.parent
82
+ if parent == candidate: # reached filesystem root
83
+ break
84
+ candidate = parent
85
+ # Stop when the walk reaches the home directory or any ancestor of it.
86
+ # (home itself or above → never auto-adopt as a project root.)
87
+ if home is not None and (candidate == home or candidate in home.parents):
88
+ break
89
+ if _has_marker(candidate):
90
+ return str(candidate)
91
+
92
+ # No marker found within bounds → fall back to the starting directory.
93
+ return str(start_path)