vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,780 @@
1
+ Metadata-Version: 2.4
2
+ Name: vigil-codeintel
3
+ Version: 0.1.0
4
+ Summary: Multi-language code intelligence: structural mapper, forensic gate auditor, and two FastMCP stdio servers
5
+ Author: Julio
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3.10
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Topic :: Software Development :: Quality Assurance
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Development Status :: 4 - Beta
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: tree-sitter<0.26,>=0.25
20
+ Requires-Dist: tree-sitter-language-pack>=1.10
21
+ Requires-Dist: filelock<4,>=3.12
22
+ Requires-Dist: mcp>=1.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest<8,>=7.4; extra == "dev"
25
+ Dynamic: license-file
26
+
27
+ # vigil
28
+
29
+ Two FastMCP stdio servers for code intelligence, backed by multi-language static analysis cores.
30
+
31
+ **License:** MIT (see [LICENSE](LICENSE)). Change the copyright holder before any publication.
32
+
33
+ ---
34
+
35
+ ## What it is
36
+
37
+ `vigil` packages three cooperating libraries:
38
+
39
+ - **`vigil_mapper`** — structural code mapper. Parses Python (stdlib `ast`) and Go/Java/JS/TS (tree-sitter). Produces typed maps: structural (imports + symbols), data contracts, runtime signals, authority writes, hotspots, refactor boundaries, conflicts, and findings. Output is written to `<project>/.cortex/maps/` as JSON.
40
+
41
+ - **`vigil_forensic`** — static forensic gate auditor. Runs a suite of 40+ pattern-based checks (broad-except, hallucinations, TOCTOU, security injection, config-safety, contract drift, etc.) against a project directory. Returns structured findings with severity, category, evidence, and fingerprint. Single public function: `run_forensic_audit(project_dir, ...) -> dict`.
42
+
43
+ - **`vigil_mcp`** — two FastMCP stdio servers (`code-map`, `forensic-audit`) that wrap the above cores behind a **background-job + poll** API. Resource-constrained: max 2 concurrent jobs, cancellable, output paginated/capped at 80 000 chars (~25 k tokens) per page.
44
+
45
+ ---
46
+
47
+ ## Capability matrix
48
+
49
+ The table below reflects the actual `supports_*` flags and implementation state read from the adapter sources.
50
+
51
+ | Language | Structural (imports + symbols) | Contracts | Runtime signals | Authority writes |
52
+ |----------|-------------------------------|-----------|-----------------|------------------|
53
+ | **Python** | yes — stdlib `ast`, fully implemented | yes — `ast`: `@dataclass`, pydantic `BaseModel`, `TypedDict`, `NamedTuple` | yes — `ast`: import-time side effects, decorator registries, `os.getenv`/`environ` reads | yes — `ast`: `write_text`/`write_bytes`/`save`/`json.dump`/`open(...,"w")` |
54
+ | **Go** | yes — tree-sitter, fully implemented | yes — structs and interfaces via tree-sitter | yes — `init`, goroutine spawns, package-level `var = call(...)` | yes — `os.WriteFile`, `os.Create`, `.Write`, `.Exec` |
55
+ | **Java** | yes — tree-sitter, fully implemented | yes — class/record/interface/enum via tree-sitter | yes — `static {}`, Spring stereotypes, thread/executor spawns | yes — `Files.write`, `.write`/`.append`, `.save`/`.persist`, `new FileWriter` |
56
+ | **JavaScript** | yes — tree-sitter, fully implemented | not supported (`supports_contracts = False`) | yes — timer, event listener, top-level effects | yes — write patterns via tree-sitter |
57
+ | **TypeScript** | yes — tree-sitter, fully implemented | yes — via regex (contracts, interfaces, zod schemas) | yes — via regex | yes — via tree-sitter |
58
+
59
+ **Forensic gates:** language-aware; runs on all five languages where applicable. The gate framework uses `vigil_mapper` sources internally. Includes an **ML/NN check pack** (`ml.*`): future-data leakage (`.shift(-N)`), scaler `fit`/`fit_transform` on `*_test`/`*_val` splits (train→test leakage), `train_test_split` without `random_state` (non-reproducible), and RNG use without a seed — high-precision static checks for data-science / model code.
60
+
61
+ > **Note on the Python row.** `PythonAdapter` extracts contracts/runtime/writers directly via `ast` (parity with Go/Java/TS at the adapter layer). The map builders (`data_contract_builder.py`, `authority_builder.py`, `runtime_builder.py`) remain the authoritative L2+ path and add deeper detection (e.g. the atomic-write trio `os.fdopen`+`write`+`os.replace`); the adapter methods surface the same signals at the source-adapter layer. Reads (`open(p)` / `open(p, "r")` / `.read_text()` / `json.load` / `json.dumps`) are not writes.
62
+
63
+ ### Authority map works out-of-the-box (no seed required)
64
+
65
+ The authority map (`vigil_mapper/authority_builder.py`) is useful on any project **without configuration**. With **no** `<project>/.cortex/map_seeds/authority_domains.json`, every discovered write site is auto-surfaced as an *inferred* per-writer `AuthorityDomain` (`status="inferred"`, `source="static_scan"`, modest confidence). Each entry names the writer file (`canonical_owner`) and lists its resolved write targets + operation kinds, so the map is immediately actionable. A pure read never produces an entry.
66
+
67
+ Providing a seed switches to the structured behaviour: domains carry `target_file_patterns`, writers are attributed by AST-resolved target match, and seed entries are `status="observed"`. With a seed present, the per-writer auto-surfacing is **not** added (no double-surfacing).
68
+
69
+ Known limitation: write sites whose target is unresolvable and which use idioms outside the detected set — notably the atomic-write trio `os.fdopen(fd, "w")` + `fh.write(...)` + `os.replace(tmp, str(path))` — are not detected, so a file that *only* writes that way (e.g. `vigil_mapper/map_storage.py`) will not surface. This is a discovery-layer limitation, independent of the seed behaviour.
70
+
71
+ ### Runtime map surfaces entrypoints out-of-the-box (no seed required)
72
+
73
+ The runtime map (`vigil_mapper/runtime_builder.py`) surfaces real entrypoints on any project **without configuration**. With **no** `<project>/.cortex/map_seeds/runtime_seed.json`, the Python AST scanner (`_runtime_ast._RuntimeVisitor`) emits inferred `RuntimeNode` entries (`status="inferred"`, `source="static_scan"`, evidence pointing at `file:line`) for:
74
+
75
+ - `if __name__ == "__main__":` blocks (`kind="main_entrypoint"`); the invoked entry functions are recorded in `calls`;
76
+ - the module-level function(s) invoked from that block (`kind="entry_function"`);
77
+ - async entrypoints (`asyncio.run(...)` in a `__main__` block) — tagged `async_entrypoint`.
78
+
79
+ Adapter-provided runtime signals (Go `init`/goroutine, Java static-block/Spring/thread, JS timer/listener/top-level effect) already surface without a seed via `collect_adapter_runtime_nodes`; this change adds the Python `__main__`/entry-function path that was previously missing.
80
+
81
+ Precision guard: an ordinary helper function or a plain import is **not** an entrypoint. A `def main(): ...` *without* a `__main__` guard is just a function and does **not** produce a `main_entrypoint` node. Providing a seed keeps the existing behaviour — seed nodes are `status="canonical"` and win on name conflicts, so the same node is never double-surfaced; auto-discovered nodes augment the seed.
82
+
83
+ Known limitation: entrypoints exposed only via packaging (`console_scripts` / `[project.scripts]`) without an in-file `__main__` guard — e.g. `vigil_mapper/cli_entry.py` — are not surfaced by the static scan (there is no in-source signal to key on). Background tasks/routes are detected only inside init-style function bodies (`__init__`/`bootstrap`/`setup`/`startup`/`start`/`initialize`/`init`), per the existing visitor scope.
84
+
85
+ ---
86
+
87
+ ## Install
88
+
89
+ ```bash
90
+ pip install -e .
91
+ ```
92
+
93
+ **Hard dependencies** (pulled automatically by pip):
94
+ - `tree-sitter >= 0.25, < 0.26`
95
+ - `tree-sitter-language-pack >= 1.10`
96
+ - `filelock >= 3.12, < 4`
97
+ - `mcp >= 1.0`
98
+
99
+ **Dev extras** (adds pytest):
100
+ ```bash
101
+ pip install -e ".[dev]"
102
+ ```
103
+
104
+ ---
105
+
106
+ ## Register in Claude Code
107
+
108
+ ### Option A — `claude mcp add` (stdio, recommended)
109
+
110
+ ```bash
111
+ claude mcp add code-map -- vigil-mapper-mcp
112
+ claude mcp add forensic-audit -- vigil-forensic-mcp
113
+ ```
114
+
115
+ Both commands are entry points installed by `pip install -e .`.
116
+
117
+ ### Option B — `.mcp.json` (project file)
118
+
119
+ ```json
120
+ {
121
+ "mcpServers": {
122
+ "code-map": {
123
+ "type": "stdio",
124
+ "command": "vigil-mapper-mcp",
125
+ "args": []
126
+ },
127
+ "forensic-audit": {
128
+ "type": "stdio",
129
+ "command": "vigil-forensic-mcp",
130
+ "args": []
131
+ }
132
+ }
133
+ }
134
+ ```
135
+
136
+ Place `.mcp.json` in the project root or in `~/.claude/`.
137
+
138
+ ### Option C — Claude Code plugin marketplace
139
+
140
+ Installable as a Claude Code **plugin** straight from GitHub. The plugin launches the
141
+ servers via `python -m vigil_mcp.*`, so the package must be importable in the Python that
142
+ Claude Code uses — install it first, then add the marketplace:
143
+
144
+ ```bash
145
+ pip install "git+https://github.com/iuliimanchini-dot/Vigil.git"
146
+ ```
147
+
148
+ Then inside Claude Code:
149
+
150
+ ```
151
+ /plugin marketplace add iuliimanchini-dot/Vigil
152
+ /plugin install vigil-tools@vigil-marketplace
153
+ /mcp # code-map + forensic-audit appear
154
+ ```
155
+
156
+ The plugin ships `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, and a
157
+ root `.mcp.json` declaring both stdio servers. (If `python` is not the interpreter with
158
+ `vigil` installed, edit `.mcp.json`'s `command` to the full path of that interpreter.)
159
+
160
+ ---
161
+
162
+ ## Tool list
163
+
164
+ ### Server: `code-map`
165
+
166
+ | Tool | Description |
167
+ |------|-------------|
168
+ | `start_code_map` | Start a background map-build job. Args: `path` (absolute project root), `map` (`"all"` or specific map name). Returns `job_id`. |
169
+ | `get_code_map_status` | Poll job status. Args: `job_id`. Returns `status`: `running / done / error / cancelled / not_found`. |
170
+ | `get_code_map_results` | Retrieve completed results (paginated). Args: `job_id`, `page` (0-based), `page_size_chars`. Returns structured maps payload. |
171
+ | `load_code_map_by_path` | Load previously built maps from disk without a job. Args: `path`, `page`, `page_size_chars`. |
172
+ | `cancel_code_map` | Cancel a running job. Args: `job_id`. |
173
+
174
+ ### Server: `forensic-audit`
175
+
176
+ | Tool | Description |
177
+ |------|-------------|
178
+ | `start_forensic_audit` | Start a background forensic audit. Args: `path`, `gates` (comma-separated check_ids or empty for all), `severity` (`LOW / MEDIUM / HIGH / CRITICAL`), `all_languages`. Returns `job_id`. |
179
+ | `get_forensic_status` | Poll job status. Args: `job_id`. |
180
+ | `get_forensic_results` | Retrieve results (paginated + capped). Args: `job_id`, `page`, `page_size_chars`, `max_findings` (default 200). Returns `exit_code`, `findings`, `meta`, `errors`. |
181
+ | `cancel_forensic_audit` | Cancel a running audit. Args: `job_id`. |
182
+
183
+ ---
184
+
185
+ ## Usage pattern: the poll workflow
186
+
187
+ Both servers use the same start → poll → retrieve pattern. Push delivery is not used here (see note below).
188
+
189
+ ```python
190
+ # Example: map build via MCP tool calls (pseudocode showing the call sequence)
191
+
192
+ # 1. Start the job
193
+ result = call_tool("start_code_map", {"path": "/path/to/project", "map": "all"})
194
+ job_id = result["job_id"]
195
+
196
+ if result["status"] == "busy":
197
+ # Server is at max concurrent jobs; wait and retry start_code_map
198
+ ...
199
+
200
+ # 2. Poll until done
201
+ while True:
202
+ s = call_tool("get_code_map_status", {"job_id": job_id})
203
+ if s["status"] in ("done", "error", "cancelled"):
204
+ break
205
+ time.sleep(2)
206
+
207
+ # 3. Retrieve results (paginated if large)
208
+ page = 0
209
+ while True:
210
+ r = call_tool("get_code_map_results", {"job_id": job_id, "page": page})
211
+ process(r["payload"]) # JSON string
212
+ if not r["truncated"]:
213
+ break
214
+ page += 1
215
+ ```
216
+
217
+ The same three-step pattern applies to `forensic-audit`: `start_forensic_audit` → `get_forensic_status` → `get_forensic_results`.
218
+
219
+ ---
220
+
221
+ ## Resource and concurrency guarantees
222
+
223
+ - **Max 2 concurrent jobs** per server process (enforced by `_jobs.JobRegistry`).
224
+ - **`forensic-audit` additionally uses `workers=1`** internally inside `run_forensic_audit`.
225
+ - Jobs are **cancellable** at any time via `cancel_code_map` / `cancel_forensic_audit`.
226
+ - Output is **paginated and capped**: each results page is at most 80 000 chars (~25 k tokens); findings are capped at 200 per `get_forensic_results` call by default.
227
+ - Map analysis is **incremental**: tree-sitter parses file-by-file; `run_map_build` has a 300 s time budget and writes each map independently — the server will not hang the host process.
228
+ - **File-count guard (anti-hang on huge repos).** Both tools do per-file AST work (forensic averages ~0.4 s/file), so a repo with thousands of files would take *hours*. When the collected source-file count exceeds **`max_files` (default 800 ≈ a ~5 min ceiling)** the tool **does not scan** — it returns a fast structured skip instead: forensic sets `meta.skipped_reason="too_many_files"` (with `file_count`, `max_files`, `top_subdirs`, `suggestion`); code-map surfaces the same via `get_code_map_results` (`view="skipped"`). Pass `max_files=` to `start_forensic_audit` / `start_code_map` to narrow scope or raise the ceiling to force a full scan of a submodule. Vendored/build dirs (`.venv`, `site-packages`, `dist-packages`, `node_modules`, `build`, `dist`, `.tox`, `.eggs`, `.mypy_cache`, `.pytest_cache`, `.next`, …) are excluded from the count and the scan even when they sit outside a venv.
229
+
230
+ ---
231
+
232
+ ## Job persistence (results survive a restart)
233
+
234
+ Completed job results are **disk-backed**, so a finished audit or map build is
235
+ still retrievable after the MCP server process restarts.
236
+
237
+ - **Where files live.** Each job is persisted under its own project root at
238
+ `<project_dir>/.cortex/cortex_jobs/<job_id>.json` (the `project_dir` is the
239
+ resolved path the `start_*` tool targeted). A small global index keyed by
240
+ `job_id` lives under the user state dir (`~/.cortex/cortex_jobs_index/`) so a
241
+ restarted server — which polls by `job_id` only — can locate the owning
242
+ project. Persistence engages only when a `project_dir` is known; an in-memory
243
+ job started without one keeps the legacy behaviour (lost on exit).
244
+ - **Atomic mechanism.** Records are written via `tempfile.mkstemp` + `os.replace`
245
+ under a per-job `filelock.FileLock` — the same atomic pattern as
246
+ `vigil_mapper.map_storage`. `os.replace` is atomic on POSIX and Windows,
247
+ so a reader never observes a half-written file. The terminal record is written
248
+ to disk **before** the in-memory status flips to terminal, so disk is never
249
+ behind what `get_*_status` reports.
250
+ - **Restart / interrupted semantics.** Terminal records (`done` / `error` /
251
+ `cancelled` / `timeout`) reload verbatim. A record left in the `running` state
252
+ means the process died mid-flight; since the worker thread is gone and cannot
253
+ be resumed, it reloads as **`interrupted`** — never as `done`.
254
+ - **Cross-project rule.** A job's file lives only under its own project. Polling
255
+ by `job_id` resolves through the global index; polling *scoped to a specific
256
+ project* only reads that project's directory, so a job that ran under project
257
+ X is **not** visible when resolved scoped to project Y.
258
+ - **Bounded reads.** Disk lookups are by `job_id` (one index read + one record
259
+ read) — never a directory scan. Records carry the full result payload; there
260
+ is currently **no automatic cleanup** of `.cortex/cortex_jobs/` (large results
261
+ accumulate there until removed), so treat it like the `.cortex/maps/` cache.
262
+
263
+ ---
264
+
265
+ ## MCP push note
266
+
267
+ The default delivery mode for both servers is **poll** (the client calls `get_*_status` / `get_*_results` repeatedly). Claude Code does support server-to-client push notifications via `claude/channel` + `--channels`, but these servers do not use that mechanism — poll was chosen for simplicity and portability. If you need push-style delivery you can add it via the FastMCP channel API; it is not impossible, just not wired here.
268
+
269
+ ---
270
+
271
+ ## Default gate profile (size-noise control)
272
+
273
+ The forensic auditor reads size/complexity thresholds from a **gate profile**. A
274
+ default profile ships **inside the package** (so it is bundled in the wheel and
275
+ available after `pip install`):
276
+ [`vigil_forensic/gate_profile.json`](vigil_forensic/gate_profile.json).
277
+ Its only job is to cut **size-noise false-positives** — file-length,
278
+ function-length, and nesting-depth warnings firing on legitimately large code —
279
+ *without* hiding genuinely extreme outliers (a 2 000-line god-file still
280
+ surfaces).
281
+
282
+ ### Where the profile is discovered
283
+
284
+ `vigil_forensic.self_audit._load_gate_profile_if_present` looks, in order:
285
+
286
+ 1. `<audit-target>/gate_profile.json`
287
+ 2. `<audit-target>/.cortex/gate_profile.json`
288
+ 3. **ancestor walk** — the first `gate_profile.json` found in any parent
289
+ directory of the audit target.
290
+ 4. **packaged default** — the profile shipped inside the `vigil_forensic`
291
+ package. This is the effective default for any target with no profile of its
292
+ own and no ancestor profile (e.g. an arbitrary path audited after
293
+ `pip install`), and is why a sub-package audit such as
294
+ `run_forensic_audit("vigil_forensic")` still picks up the shipped default.
295
+
296
+ A target-local profile always wins over an ancestor or the packaged default. A
297
+ missing or malformed profile is logged and skipped — never fatal. The
298
+ **committed** default lives inside the package at `vigil_forensic/gate_profile.json`
299
+ so it ships in the wheel.
300
+
301
+ ### How to set your own
302
+
303
+ Copy the shipped file to your project root and edit `size_thresholds`:
304
+
305
+ ```bash
306
+ cp vigil_forensic/gate_profile.json /path/to/your-project/gate_profile.json
307
+ # then edit size_thresholds to taste
308
+ ```
309
+
310
+ ### Thresholds and their cited sources
311
+
312
+ JSON forbids comments, so the justification for every value is here. Each value
313
+ is a **published linter default**, not an arbitrary constant. `warn` =
314
+ MEDIUM-severity heads-up (advisory); `revise` = HIGH-severity "refactor now".
315
+
316
+ | Key | Value | Source / rationale |
317
+ |-----|-------|--------------------|
318
+ | `function_warn` | **100** | SonarQube `S138` and PMD `ExcessiveMethodLength` both default to **100** lines. (Clean Code's ~20–60 is an ideal, not a linter default — too aggressive for a real engine, would re-introduce noise.) |
319
+ | `function_revise` | **150** | 1.5× the SonarQube/PMD limit — a "clearly excessive" function that should be split. Isolates true outliers (e.g. 325- and 290-line functions in this repo). |
320
+ | `nesting_warn` | **5** | pylint `max-nested-blocks` **default = 5**. Nesting depth is the structural-complexity signal the engine actually measures; deep nesting is the same code smell McCabe's cyclomatic-complexity ≈10 guideline targets, expressed as a nesting bound. (SonarQube `S134`=3 is stricter; pylint's 5 is the widely-shipped default and avoids flagging ordinary depth-4 control flow.) |
321
+ | `nesting_revise` | **8** | Beyond any common linter's tolerance — genuinely tangled control flow worth flattening. |
322
+ | `file_warn` | **750** | SonarQube file-size flag default = **750** lines. |
323
+ | `file_revise` | **1000** | pylint `max-module-lines` **default = 1000**. A file past 1 000 lines is a god-file candidate. |
324
+
325
+ > **Note on cyclomatic complexity.** The size/complexity engine measures file
326
+ > LOC, function LOC, and **nesting depth** — it does not compute a McCabe
327
+ > cyclomatic-complexity number, and the profile has no `cyclomatic` key (one
328
+ > would be dead config). Nesting depth is used as the structural-complexity
329
+ > proxy, calibrated to pylint's `max-nested-blocks` default; the McCabe ≈10
330
+ > guideline informs that choice rather than being read directly.
331
+
332
+ ### Effect (measured before → after on this repo)
333
+
334
+ | Audit target | total before | total after | `size.*` before | `size.*` after |
335
+ |--------------|-------------:|------------:|----------------:|---------------:|
336
+ | `vigil_forensic/` | 125 | 86 | 92 | 55 |
337
+ | `vigil_mapper/`| 115 | 93 | 49 | 37 |
338
+
339
+ The remaining `size.*` findings are functions over 100 lines and nesting deeper
340
+ than 5 — code that genuinely exceeds the published limits, which is the intended
341
+ behavior, not a miss.
342
+
343
+ ---
344
+
345
+ ## Real-world metrics
346
+
347
+ Measured on **real third-party Python packages** copied out of this repo's
348
+ `.venv` (sans `__pycache__`), audited with the shipped default `gate_profile.json`
349
+ active (`file_warn 750 / file_revise 1000 / nesting_warn 5`). Reproduce with
350
+ [`tests/benchmark_realworld.py`](tests/benchmark_realworld.py)
351
+ (`python tests/benchmark_realworld.py` — single-threaded, KB-scale targets, light).
352
+
353
+ Hardware: Windows 11, CPython 3.11, `workers=1` (forensic enforces this
354
+ internally). "mem" = peak RSS delta over the call, sampled at 20 ms in a
355
+ background thread. "tokens" = MCP summary-view chars ÷ 4.
356
+
357
+ | Target | `.py` files | LOC | forensic time | forensic peak RSS | map(`all`) time | map peak RSS |
358
+ |--------|------------:|----:|--------------:|------------------:|----------------:|-------------:|
359
+ | `filelock` | 14 | 3 385 | 1.6 s | 8.1 MB | 0.5 s | 4.1 MB |
360
+ | `click` | 17 | 12 179 | 3.7 s | 2.3 MB | 0.9 s | 2.8 MB |
361
+ | `mcp` | 110 | 20 824 | 10.8 s | 6.2 MB | 1.4 s | 3.5 MB |
362
+
363
+ Forensic time is roughly linear in file count (~0.1 s/file here); the map build
364
+ is much cheaper. Memory stays low (single-digit MB peak delta) — these tools are
365
+ light enough to run inline.
366
+
367
+ ### MCP output stays in budget
368
+
369
+ The summary views (`forensic_server._build_forensic_summary`,
370
+ `map_server._build_map_summary`) are what an agent actually receives. Both stay
371
+ well under the ~6 k-token budget on every target:
372
+
373
+ | Target | forensic summary | map summary |
374
+ |--------|-----------------:|------------:|
375
+ | `filelock` | ~3.0 k tok (11.9 KB) | ~0.5 k tok (1.9 KB) |
376
+ | `click` | ~1.5 k tok (6.1 KB) | ~0.4 k tok (1.7 KB) |
377
+ | `mcp` | ~1.7 k tok (6.7 KB) | ~0.6 k tok (2.2 KB) |
378
+
379
+ (`filelock`'s summary is the largest because its findings are dominated by one
380
+ duplication cluster, so the per-`check_id` breakdown is wide. Still < 3 k tokens.)
381
+
382
+ ### Determinism
383
+
384
+ `run_forensic_audit` is deterministic: run twice on each target, the sorted
385
+ `(check_id, file, line)` finding set is identical (no ordering or count drift).
386
+
387
+ ### False-positive reduction on clean code (2026-06)
388
+
389
+ The default gate selection was re-tuned to cut the ~50 % false-positive rate
390
+ observed on clean, idiomatic third-party code. Inspected baseline vs. current
391
+ on `filelock` (every finding checked against the cited `file:line`):
392
+
393
+ | Target | findings before | findings after | actual FPs after |
394
+ |--------|----------------:|---------------:|-----------------:|
395
+ | `filelock` | 32 | **2** | **0** |
396
+ | `click` | 54 | **33** | low (mostly real `size.*` / `broad_except.swallow`) |
397
+ | `mcp` | 110 | **43** | low (mostly real `size.*` / `broad_except.swallow`) |
398
+
399
+ The two `filelock` findings that remain are both honest: one `size.file_warn`
400
+ (`_soft_rw/_sync.py` is genuinely 858 lines > 750) and one informational
401
+ `meta.git_unavailable` (see below). Zero false claims about the code.
402
+
403
+ Fixes landed (each TDD'd; items 1–5 in
404
+ [`tests/test_forensic_fp_clean_code.py`](tests/test_forensic_fp_clean_code.py),
405
+ items 6–7 in [`tests/test_dup_and_sqli.py`](tests/test_dup_and_sqli.py)):
406
+
407
+ 1. **`broad_except` cleanup-then-reraise.** `except BaseException: <cleanup>;
408
+ raise` (filelock `_api.py:513`, `asyncio.py:268`) is the correct cancel-
409
+ cleanup idiom — it *re-raises*, it does not swallow. Both the regex
410
+ (`broad_except.base_exception`/`.bare`) and AST
411
+ (`broad_except.hidden_sentinel.bare_or_base`) detectors now skip any handler
412
+ whose body contains a top-level `raise`. Genuine swallows (no re-raise) still
413
+ fire.
414
+ 2. **`duplication.text_block` inflation + docstrings.** One duplicated region no
415
+ longer emits one finding per sliding-window line (~13 → 1); windows of the
416
+ same file-set at adjacent start lines are merged into a single region. Lines
417
+ inside string literals (shared docstrings / `:param` blocks on sync↔async API
418
+ mirrors) and pure parameter-declaration lines are excluded. Genuine copy-
419
+ pasted **code** blocks are still detected.
420
+ 3. **Zone-inference gates are now opt-in.** `god_object_zones` infers
421
+ "responsibility zones" from function-name prefixes against a fixed verb list
422
+ (`acquire/release/read/write/open/close/...`); a cohesive read-write-lock
423
+ class collides with that vocabulary and is wrongly flagged — ~0 true
424
+ positives here. It is **off by default** (moved to an opt-in set in
425
+ `self_audit._NOISY_OPT_IN_GATES`) and runs only when explicitly requested
426
+ (`run_forensic_audit(target, gates=["god_object_zones"])` or
427
+ `--gates god_object_zones`). The twin `size_complexity.zone_overload` sub-
428
+ check, which used the *same* name-prefix logic and double-reported the same
429
+ files, was **removed** outright; `size_complexity` keeps its objective
430
+ size/function-length/nesting budget checks.
431
+ 4. **`api.public_function_signature_change` in no-git mode.** With no git
432
+ baseline (no work tree, or no changed file resolves at `HEAD~1`, e.g. a
433
+ vendored/`site-packages` dir) the old code fell back to a docstring-param-
434
+ count heuristic that fired on every documented variadic API
435
+ (`click.decorators.option(*param_decls, **attrs)` → "0 params vs 3
436
+ documented"). The whole signature-drift check is now **skipped without a git
437
+ baseline** and reported once via `meta.git_unavailable`. It runs normally
438
+ when a real `HEAD~1` diff exists.
439
+ 5. **Profile fallback foot-gun.** An external target with no ancestor
440
+ `gate_profile.json` previously fell back to the *strict* code-defaults
441
+ (600/800/4) instead of the shipped defaults (750/1000/5). The loader
442
+ (`self_audit._load_gate_profile_if_present`) now falls back to the package's
443
+ **own shipped** `gate_profile.json` (bundled INSIDE the `vigil_forensic`
444
+ package and resolved relative to the module, so it ships in the wheel) as the
445
+ last resort. A target-local profile still wins.
446
+ 6. **`duplicate_scan` (near-duplicate code) per-line inflation.** The
447
+ intra-file near-duplicate detector (`assess_near_duplicate_code`) hashes a
448
+ sliding 4-line window, so one duplicated region of N lines emitted N−3
449
+ near-identical findings ("block at lines 118 and 201", "119 and 202", …).
450
+ On `filelock` this produced **39** `duplicate_scan` findings for only a
451
+ handful of real blocks. Adjacent/overlapping window-pairs are now **merged**
452
+ into ONE finding per contiguous block (same region-grouping idea as
453
+ `duplication.text_block`'s `_merge_starts`), reported as a line range:
454
+ `Near-duplicate block at lines 118-126 ↔ 201-209 (9 lines)`. filelock drops
455
+ **39 → 13** — a true merge, not a cap: genuinely separate duplicate blocks
456
+ still each report once (verified: `_api.py` `__call__`/`__init__` signature
457
+ mirror at `118-126 ↔ 201-209` is preserved as a single finding).
458
+ 7. **Focused SQL-injection detection (cluster 12, `security_scan`).** For
459
+ Python, `assess_security_patterns` flags a dynamic query passed to a
460
+ DB-call site (`.execute`/`.executemany`/`.executescript`/`.query`/`.raw`)
461
+ when the query is built by **f-string** interpolation, **`%`-format**,
462
+ **`str.format()`**, or **`+` string concatenation** with at least one
463
+ non-literal (variable) operand. The flagged string must have real SQL-clause
464
+ structure (`SELECT … FROM`, `UPDATE … SET`, `DELETE FROM`, …) and meet a
465
+ minimum length, so a SQL keyword in prose/log lines does not trip it. A plain
466
+ literal `execute("SELECT 1")`, a parametrised `execute("… ?", (x,))`, and a
467
+ constant concat of two literals (`"SELECT … " + "WHERE …"`) are **not**
468
+ flagged.
469
+ **Limits (honest):** detection is purely local/syntactic — it fires only
470
+ when the dynamic string is the *direct first argument* of the DB call.
471
+ There is **no taint tracking**: a query assembled in a prior statement
472
+ (`q = "SELECT … " + user_input; db.execute(q)`), passed through a helper, or
473
+ stored on a variable first is **not** detected. Non-Python languages get the
474
+ regex security patterns only (no SQLi AST rule). This is deliberately
475
+ low-false-positive, not full SQLi coverage.
476
+ 8. **`debug_print_scan` substring / CLI-output false positives.** The detector
477
+ matched the substring `print(` anywhere on a line, so it fired on (a) `print(`
478
+ *inside a string literal* (e.g. a detector's own pattern tuple
479
+ `(..., "print(", ...)`), (b) lines already carrying `# noqa: debug_print_scan`,
480
+ and (c) intentional user-facing `print()` in CLI/output functions (the path
481
+ allowlist only knew the pre-migration `BRAIN/autoforensics/self_audit.py` path,
482
+ not the packaged `self_audit.py`). For **Python** the gate is now AST-driven:
483
+ only a line carrying a genuine `print(...)` **call** (`ast.Call` with
484
+ `func=Name('print')`) can be flagged — a `print(` in a string literal or an
485
+ attribute call (`obj.print(...)`) is never flagged. On a file that fails to
486
+ parse it falls back to requiring the stripped line to **start** with `print(`
487
+ (statement position). Across all languages the gate now (i) respects
488
+ `# noqa: debug_print_scan` and a bare `# noqa` on the offending line, and
489
+ (ii) skips prints inside conventionally-named output functions — name starts
490
+ with `print_`/`_print_`, or is `main`/`cli`/`run`/`cli_main` (and underscore
491
+ variants). The rule is deliberately conservative: a `print_*` function
492
+ elsewhere in the file does **not** silence a stray `print()` in an unrelated
493
+ normal function, and a genuine `print("DEBUG", x)` in ordinary code is still
494
+ flagged. On `vigil_forensic` itself this cut `debug_print_scan` **12 → 0**
495
+ (all 12 were FPs: 10 in `print_human_summary()`, 2 in detector pattern
496
+ tuples); the corpus oracle (`tests/oracle/sample_quality.py:63`) stays flagged.
497
+ TDD'd in [`tests/test_debug_print_fp.py`](tests/test_debug_print_fp.py).
498
+ 9. **`commented_code_scan` prose false positives.** The detector grouped
499
+ consecutive comment lines and flagged a block when ≥2 of its lines matched a
500
+ permissive `code_indicators` regex (`\w=\w`, `def `, `return \w`, `for \w`,
501
+ `except \w`, …). Explanatory **prose** that merely *mentions* a code keyword in
502
+ an English sentence therefore tripped it — e.g. the design-rationale comment at
503
+ `broad_except_checks.py:21` ("… a line-only regex cannot tell a swallow from the
504
+ correct `except BaseException: <cleanup>; raise` idiom.") matched `except \w`
505
+ twice. The `code_indicators` count is now only a cheap **pre-filter**; a block is
506
+ reported as commented-out code solely when a prose-vs-code discriminator confirms
507
+ it. For **Python** that means a contiguous run of ≥2 of the de-commented body
508
+ lines **`ast.parse`-s** as valid statements (a leading prose intro line that
509
+ alone breaks parsing is trimmed, so a real block introduced by a sentence — like
510
+ the corpus oracle's `# legacy implementation kept around just in case:` followed
511
+ by a commented `for`/`return` body — is still caught via its inner code run).
512
+ For every language there is a fallback: ≥2 **distinct strong** structural signals
513
+ (an assignment with an identifier LHS, a `def`/`class`/`import`/`func`/`const`
514
+ header, a bare `name(...)` call statement, or a block-header line). A single
515
+ keyword inside grammatical English is not a strong signal, so prose does not reach
516
+ the bar. On `vigil_forensic` itself this cut `commented_code_scan` **22 → 0**
517
+ (all 22 were prose: design-rationale / FP-tightening notes that referenced code
518
+ in backticks — verified by inspecting each block); the corpus oracle
519
+ (`tests/oracle/sample_quality.py:69`, a genuine 5-line commented-out block) stays
520
+ flagged, so recall is preserved. **Honest limit:** discrimination is per-block and
521
+ purely syntactic — the AST path is Python-only, and a *non-Python* prose comment
522
+ that happens to start ≥2 lines with assignment/call/header shapes could still be
523
+ flagged; the "22 → 0" figure is measured on this repo, not a guarantee for all
524
+ codebases. TDD'd in [`tests/test_commented_code_fp.py`](tests/test_commented_code_fp.py).
525
+
526
+ 10. **Round-2 FP cuts on large real projects (TYPE_CHECKING imports,
527
+ magic-number bounds, docstring & duplicate tightening).** Measured against
528
+ the vendored `click` / `mcp` / `filelock` packages, line-by-line inspection
529
+ of the noisiest gates found four distinct false-positive *sources*; each was
530
+ fixed at the source (not suppressed) and the corpus oracle stays **22/22**.
531
+ Totals: **click 128 → 66, mcp 236 → 189, filelock 38 → 14.**
532
+ - **`unused_import_scan` on `if TYPE_CHECKING:` imports.** Two bugs. (a) The
533
+ TYPE_CHECKING line-collector walked the guard's `else:` branch too, so
534
+ runtime fallback imports (`filelock/__init__.py:26-27`) were mis-tagged as
535
+ type-only and flagged. Now only the `if` body is scanned. (b) A
536
+ TYPE_CHECKING import is "used" only if it backs a *type annotation* — but
537
+ it also legitimately backs runtime `TypeVar(...)` construction
538
+ (`click/shell_completion.py:59`), `te.ParamSpec` / `sys.version_info`
539
+ attribute access (`click/utils.py:26`, `filelock/asyncio.py:22`), and
540
+ `__all__` re-exports. These are now counted as uses. A genuinely dead
541
+ TYPE_CHECKING import (referenced nowhere) is still flagged. click `2 → 0`,
542
+ filelock `7 → 0`.
543
+ - **`magic_number_scan` bounds.** The old window suppressed only `-10..10`
544
+ plus a fixed safe-set, so every bare small integer (terminal widths `24`,
545
+ ASCII `127`, byte/column values `11/12/20/50`) and sub-unit float
546
+ (`0.1`/`0.5`) dominated the noise. The small-int suppression bound is
547
+ raised to `|n| < 256` and sub-unit floats are skipped; HTTP codes / powers
548
+ of two / time constants remain explicitly safe. Large/unusual literals
549
+ (oracle's `86400`, mcp's `8707`) stay flagged. click `11 → 0`.
550
+ - **`docstring_param_scan` rebuilt on AST.** The old `def …(([^)]*))` regex
551
+ truncated parameters at the first `)` inside an annotation
552
+ (`f: t.Callable[..., t.Any]` → garbage param `t.Any]`) and could not span
553
+ multi-line / overloaded signatures, yielding 16 phantom mismatches on
554
+ click (zero real). Parameters now come from `ast` (including `*args` /
555
+ `**kwargs`, which idiomatic docstrings document by bare name), the docstring
556
+ is read via `ast.get_docstring`, Google-style `Args:` parsing stops at the
557
+ next `Returns:`/`Raises:` section (no more `Returns`/`Raises` "params"), and
558
+ the reST `:param <type> name:` form is parsed by last-token. Only the
559
+ genuine **documented-but-absent-parameter** direction is reported. click
560
+ `16 → 0`; mcp/filelock retain only real drift (e.g. `mcp …/server.py:125`
561
+ documents `server` for a param renamed to `_`).
562
+ - **`duplicate_scan` signature/parameter mirrors.** ~75 % of click's 38
563
+ hits and filelock's were `@overload` stubs, parameter-list mirrors, and
564
+ shared signatures (e.g. filelock `AsyncFileLockMeta.__call__` ↔
565
+ `BaseAsyncFileLock.__init__`) — typing scaffolding repeated by API
566
+ contract, not refactorable logic. Signature-scaffolding lines (decorators,
567
+ `def` headers, bare `name: type = default,` parameter lines, `): ...`
568
+ stubs) are excluded from the duplicate-fingerprint, and a region must span
569
+ **≥ 5 meaningful lines** to report. Genuine multi-statement logic
570
+ duplicates survive (oracle's 6-line `route_alpha`/`route_beta`; click's
571
+ `_termui_impl.py` pager fallbacks). click `38 → 5`, filelock `13 → 2`.
572
+ - **Left as real (not tightened):** `context_fallback_save.fallback_without_else`
573
+ (4 on click, 4 on mcp). Inspected — these are heterogeneous low-severity
574
+ advisories (input-validation `return 400`, mode dispatch, non-task counter
575
+ increments). They are *advisory by design* ("a reviewer must confirm …
576
+ intentional") and no single safe predicate separates them from real
577
+ fallbacks without risking over-suppression, so they are reported honestly
578
+ rather than gamed away. TDD'd in
579
+ [`tests/test_fp_round2.py`](tests/test_fp_round2.py).
580
+
581
+ **Residual honesty.** The remaining output is dominated by the objective
582
+ `size.*` gates (real breaches of published linter limits) and
583
+ `broad_except.swallow` (genuine `except: pass`). These are trustworthy. The
584
+ zone heuristic still exists as an *opt-in* capability for teams that want it on
585
+ their own diffs — re-enable it per run via the `gates` argument. It is not
586
+ deleted, just no longer in the default scan.
587
+
588
+ ---
589
+
590
+ ## Per-project configurability
591
+
592
+ Three knobs let a project tune the forensic auditor without forking it:
593
+ **disable noisy gates**, **raise the severity floor**, and **add your own gate**.
594
+
595
+ ### Disable specific gates — `.cortex/disabled_gates.json`
596
+
597
+ Drop a `disabled_gates.json` into your project's `.cortex/` directory to switch
598
+ off gates that are noisy for your codebase. `run_forensic_audit` auto-loads it
599
+ from `<project_dir>/.cortex/disabled_gates.json`. Two accepted shapes:
600
+
601
+ ```jsonc
602
+ // a bare list of gate check_ids …
603
+ ["broad_except", "duplication"]
604
+ ```
605
+ ```jsonc
606
+ // … or an object with a "disabled" key
607
+ { "disabled": ["broad_except", "duplication"] }
608
+ ```
609
+
610
+ A disabled gate never runs (produces no findings) and is reported in
611
+ `meta["gates_skipped"]` with reason `"disabled_by_project"`:
612
+
613
+ ```python
614
+ from vigil_forensic import run_forensic_audit
615
+
616
+ res = run_forensic_audit("/path/to/project")
617
+ # .cortex/disabled_gates.json contains ["broad_except"]
618
+ assert {e["gate_id"] for e in res["meta"]["gates_skipped"]
619
+ if e["reason"] == "disabled_by_project"} == {"broad_except"}
620
+ ```
621
+
622
+ Behavior:
623
+
624
+ - The disable list takes precedence over every other resolution rule — a
625
+ disabled gate is always reported as `disabled_by_project`, even one that the
626
+ static-mode policy or a `gates=` filter would have skipped anyway.
627
+ - **Missing or empty file → no-op.** Nothing is disabled; all gates run.
628
+ - **Malformed file never raises.** A JSON-syntax error, an unreadable file, or a
629
+ wrong-typed payload is *logged-and-ignored* (narrow exception handling, no
630
+ bare `except`): the audit completes, nothing is disabled, and a
631
+ `meta.profile_load_failed` finding (HIGH/WARN) records the failure so the
632
+ silent-disable is fail-loud rather than swallowed.
633
+ - `.cortex/` is git-ignored by default in this repo's audit policy, so the file
634
+ is a *local* opt-out unless you commit it deliberately.
635
+
636
+ The same file is honored by the CLI (`python -m vigil_forensic.self_audit
637
+ --project <dir>`).
638
+
639
+ > Gate ids are the `check_id` values — run `python -m vigil_forensic.self_audit
640
+ > --list-gates` to print the file-based gates, or read the `GATE_SPECS` table in
641
+ > [`vigil_forensic/gate_packs/universal.py`](vigil_forensic/gate_packs/universal.py).
642
+ > Note a *family* gate id (`broad_except`) and its sub-checks emitted under a
643
+ > dotted child id (`broad_except.return_none`) are produced by the same runner;
644
+ > disabling the family id (`broad_except`) stops that runner entirely. A
645
+ > *separately registered* gate such as `broad_except.hidden_sentinel` has its
646
+ > own id and must be disabled separately.
647
+
648
+ ### Raise the severity floor — `severity=`
649
+
650
+ `run_forensic_audit(project_dir, *, severity="LOW")` filters the returned
651
+ `findings` to those **at or above** the floor. Ordering is
652
+ `LOW < MEDIUM < HIGH < CRITICAL` (case-insensitive); the default `"LOW"` returns
653
+ everything.
654
+
655
+ ```python
656
+ res = run_forensic_audit("/path/to/project", severity="HIGH")
657
+ # res["findings"] contains only HIGH and CRITICAL findings.
658
+ ```
659
+
660
+ The `meta.*` counters (`severity_counts`, `total_findings`, `category_counts`)
661
+ are computed **before** the floor is applied, so they always reflect the full
662
+ finding set; `meta["findings_after_severity_filter"]` records the post-filter
663
+ count whenever a non-LOW floor is used. The process exit code is likewise driven
664
+ by the unfiltered HIGH/CRITICAL counts.
665
+
666
+ ### Add your own gate
667
+
668
+ There is **no plugin auto-discovery** — the gate set is the module-level
669
+ `GATE_SPECS` tuple in
670
+ [`vigil_forensic/gate_packs/universal.py`](vigil_forensic/gate_packs/universal.py),
671
+ resolved once at import into `DEFAULT_GATE_CHECKS`
672
+ ([`gate_registry.py`](vigil_forensic/gate_registry.py)). Registering a gate
673
+ means adding a spec to that tuple. The spec shape is a 3-tuple:
674
+
675
+ ```python
676
+ (check_id, category, runner)
677
+ # │ │ └── Callable[[PostExecGateContext], GateCheckResult]
678
+ # │ └── a vigil_forensic._shared.GateCategory enum member
679
+ # └── str, the gate id (also the prefix for any dotted child ids it emits)
680
+ ```
681
+
682
+ The **runner** takes the synthetic `PostExecGateContext` (its
683
+ `ctx.file_snapshots` maps each touched file's normalized path → a
684
+ `GateFileSnapshot` with `.text`, `.line_count`, `.exists`) and returns a
685
+ `GateCheckResult`:
686
+
687
+ ```python
688
+ from vigil_forensic._shared import (
689
+ GateCheckResult, GateFinding, GateCategory, GateSeverity,
690
+ GateImpact, EvidenceReference,
691
+ )
692
+
693
+ def run_no_print_checks(ctx) -> GateCheckResult:
694
+ findings = []
695
+ for path, snap in ctx.file_snapshots.items():
696
+ if not snap.exists or not path.endswith(".py"):
697
+ continue
698
+ for lineno, line in enumerate(snap.text.splitlines(), start=1):
699
+ if line.lstrip().startswith("print("):
700
+ findings.append(GateFinding(
701
+ check_id="no_print",
702
+ category=GateCategory.REPORTING,
703
+ title="Stray print() in source",
704
+ severity=GateSeverity.LOW,
705
+ impact=GateImpact.WARN,
706
+ summary=f"print() at {path}:{lineno}",
707
+ recommendation="Use logging instead of print().",
708
+ evidence=(EvidenceReference(
709
+ kind="line", path=path, detail=f"L{lineno}", ok=False),),
710
+ fingerprint=f"no_print:{path}:{lineno}",
711
+ ))
712
+ return GateCheckResult(
713
+ check_id="no_print", category=GateCategory.REPORTING,
714
+ findings=tuple(findings),
715
+ )
716
+ ```
717
+
718
+ To wire it in (the supported path — edit the pack):
719
+
720
+ 1. Add `("no_print", GateCategory.REPORTING, run_no_print_checks)` to
721
+ `GATE_SPECS` in `gate_packs/universal.py`.
722
+ 2. Add `"no_print"` to the `_FILE_BASED_GATES` allowlist in
723
+ [`vigil_forensic/self_audit.py`](vigil_forensic/self_audit.py) — the static
724
+ auditor only runs gate ids in that set (anything else is reported as
725
+ `not_file_based` and skipped). A runtime-only gate would instead get a
726
+ `skip_in_static` flag in `GATE_FLAGS`.
727
+
728
+ Each `GateFinding` is validated on construction: `confidence` must be in
729
+ `[0.0, 1.0]`, and a non-`"applicable"` `applicability` requires a non-empty
730
+ `applicability_reason` (see `GateFinding.__post_init__` in `_shared.py`).
731
+
732
+ > If you must register a gate **without** editing the pack (e.g. a downstream
733
+ > wrapper), `vigil_forensic.gate_registry.DEFAULT_GATE_CHECKS` is a plain tuple
734
+ > you can extend before calling `run_gates`, and `run_gates(..., gates_filter=…)`
735
+ > selects a subset — but a new id still has to be present in `_FILE_BASED_GATES`
736
+ > to run in static mode, so editing the pack is the honest, complete path.
737
+
738
+ ### `forensic_clusters` in static mode (static-safe subset)
739
+
740
+ The `forensic_clusters` pack bundles ~40 cluster runners. Most are purely
741
+ static (they read only `file_snapshots` / text / AST): security patterns,
742
+ secrets, mutable defaults, resource leaks, hardcoded paths, dead code,
743
+ unreachable code, shadowed builtins, magic numbers, TODO debt, import cycles,
744
+ exception swallowing, and more. A minority are **runtime-only** — they need a
745
+ real post-execution context (`artifact_refs`, `transport_mode`,
746
+ reported-vs-observed changed files, validation-contract proofs, or a disk
747
+ re-read compared against an expected hash) and are meaningless / false-positive
748
+ prone without it. The runtime-only set is listed in
749
+ [`forensic_cluster_runners/core.py`](vigil_forensic/gate_checks/forensic_cluster_runners/core.py)
750
+ as `_RUNTIME_ONLY_CLUSTERS` (`cluster2_success_without_proof`,
751
+ `cluster3_proxy_as_truth`, `cluster4_config_accepted_ignored_*`,
752
+ `cluster6_state_divergence`, `cluster7_fallback_hides_truth`,
753
+ `cluster10_edit_consistency`, `cluster11_mutation_verified`).
754
+
755
+ So the pack is **not** flagged `skip_in_static`. Instead, when `run_gates`
756
+ hands it a synthetic static context (`_is_static_mode(ctx)` → no runtime
757
+ signals), the runner filters the runtime-only clusters out and runs only the
758
+ static-safe checks. When a real execution context is present the full pack runs
759
+ unchanged. The worst FP this prevents is `cluster11_mutation_verified`: it
760
+ hashes the *decoded* snapshot text but the assessor hashes the *raw* disk bytes,
761
+ so every CRLF / BOM file would otherwise fire a bogus "content DIVERGED" HIGH.
762
+
763
+ > **`dead_code_scan` caveat.** Cluster 20 marks a public function "dead" when it
764
+ > is not referenced anywhere in the **scanned set**. `run_forensic_audit` always
765
+ > discovers the whole project directory, so cross-file references resolve and it
766
+ > is accurate (0 findings on `filelock`, which uses `__all__`). It can over-report
767
+ > only on a *partial / single-file* scan, where a function's caller lives in a
768
+ > file outside the scan — that path is not used by `run_forensic_audit`. Findings
769
+ > are MEDIUM, and names in `__all__`, framework-decorated, or matching standalone
770
+ > markers are already classified as `standalone_utility` and skipped.
771
+
772
+ ---
773
+
774
+ ## Running tests
775
+
776
+ ```bash
777
+ pytest tests/ -p no:cacheprovider
778
+ ```
779
+
780
+ No parallel execution (`-n auto`) — keep it light, tree-sitter grammars load on first call.