vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,780 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vigil-codeintel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Multi-language code intelligence: structural mapper, forensic gate auditor, and two FastMCP stdio servers
|
|
5
|
+
Author: Julio
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Development Status :: 4 - Beta
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: tree-sitter<0.26,>=0.25
|
|
20
|
+
Requires-Dist: tree-sitter-language-pack>=1.10
|
|
21
|
+
Requires-Dist: filelock<4,>=3.12
|
|
22
|
+
Requires-Dist: mcp>=1.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest<8,>=7.4; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# vigil
|
|
28
|
+
|
|
29
|
+
Two FastMCP stdio servers for code intelligence, backed by multi-language static analysis cores.
|
|
30
|
+
|
|
31
|
+
**License:** MIT (see [LICENSE](LICENSE)). Change the copyright holder before any publication.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## What it is
|
|
36
|
+
|
|
37
|
+
`vigil` packages three cooperating libraries:
|
|
38
|
+
|
|
39
|
+
- **`vigil_mapper`** — structural code mapper. Parses Python (stdlib `ast`) and Go/Java/JS/TS (tree-sitter). Produces typed maps: structural (imports + symbols), data contracts, runtime signals, authority writes, hotspots, refactor boundaries, conflicts, and findings. Output is written to `<project>/.cortex/maps/` as JSON.
|
|
40
|
+
|
|
41
|
+
- **`vigil_forensic`** — static forensic gate auditor. Runs a suite of 40+ pattern-based checks (broad-except, hallucinations, TOCTOU, security injection, config-safety, contract drift, etc.) against a project directory. Returns structured findings with severity, category, evidence, and fingerprint. Single public function: `run_forensic_audit(project_dir, ...) -> dict`.
|
|
42
|
+
|
|
43
|
+
- **`vigil_mcp`** — two FastMCP stdio servers (`code-map`, `forensic-audit`) that wrap the above cores behind a **background-job + poll** API. Resource-constrained: max 2 concurrent jobs, cancellable, output paginated/capped at 80 000 chars (~25 k tokens) per page.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Capability matrix
|
|
48
|
+
|
|
49
|
+
The table below reflects the actual `supports_*` flags and implementation state read from the adapter sources.
|
|
50
|
+
|
|
51
|
+
| Language | Structural (imports + symbols) | Contracts | Runtime signals | Authority writes |
|
|
52
|
+
|----------|-------------------------------|-----------|-----------------|------------------|
|
|
53
|
+
| **Python** | yes — stdlib `ast`, fully implemented | yes — `ast`: `@dataclass`, pydantic `BaseModel`, `TypedDict`, `NamedTuple` | yes — `ast`: import-time side effects, decorator registries, `os.getenv`/`environ` reads | yes — `ast`: `write_text`/`write_bytes`/`save`/`json.dump`/`open(...,"w")` |
|
|
54
|
+
| **Go** | yes — tree-sitter, fully implemented | yes — structs and interfaces via tree-sitter | yes — `init`, goroutine spawns, package-level `var = call(...)` | yes — `os.WriteFile`, `os.Create`, `.Write`, `.Exec` |
|
|
55
|
+
| **Java** | yes — tree-sitter, fully implemented | yes — class/record/interface/enum via tree-sitter | yes — `static {}`, Spring stereotypes, thread/executor spawns | yes — `Files.write`, `.write`/`.append`, `.save`/`.persist`, `new FileWriter` |
|
|
56
|
+
| **JavaScript** | yes — tree-sitter, fully implemented | not supported (`supports_contracts = False`) | yes — timer, event listener, top-level effects | yes — write patterns via tree-sitter |
|
|
57
|
+
| **TypeScript** | yes — tree-sitter, fully implemented | yes — via regex (contracts, interfaces, zod schemas) | yes — via regex | yes — via tree-sitter |
|
|
58
|
+
|
|
59
|
+
**Forensic gates:** language-aware; runs on all five languages where applicable. The gate framework uses `vigil_mapper` sources internally. Includes an **ML/NN check pack** (`ml.*`): future-data leakage (`.shift(-N)`), scaler `fit`/`fit_transform` on `*_test`/`*_val` splits (train→test leakage), `train_test_split` without `random_state` (non-reproducible), and RNG use without a seed — high-precision static checks for data-science / model code.
|
|
60
|
+
|
|
61
|
+
> **Note on the Python row.** `PythonAdapter` extracts contracts/runtime/writers directly via `ast` (parity with Go/Java/TS at the adapter layer). The map builders (`data_contract_builder.py`, `authority_builder.py`, `runtime_builder.py`) remain the authoritative L2+ path and add deeper detection (e.g. the atomic-write trio `os.fdopen`+`write`+`os.replace`); the adapter methods surface the same signals at the source-adapter layer. Reads (`open(p)` / `open(p, "r")` / `.read_text()` / `json.load` / `json.dumps`) are not writes.
|
|
62
|
+
|
|
63
|
+
### Authority map works out-of-the-box (no seed required)
|
|
64
|
+
|
|
65
|
+
The authority map (`vigil_mapper/authority_builder.py`) is useful on any project **without configuration**. With **no** `<project>/.cortex/map_seeds/authority_domains.json`, every discovered write site is auto-surfaced as an *inferred* per-writer `AuthorityDomain` (`status="inferred"`, `source="static_scan"`, modest confidence). Each entry names the writer file (`canonical_owner`) and lists its resolved write targets + operation kinds, so the map is immediately actionable. A pure read never produces an entry.
|
|
66
|
+
|
|
67
|
+
Providing a seed switches to the structured behaviour: domains carry `target_file_patterns`, writers are attributed by AST-resolved target match, and seed entries are `status="observed"`. With a seed present, the per-writer auto-surfacing is **not** added (no double-surfacing).
|
|
68
|
+
|
|
69
|
+
Known limitation: write sites whose target is unresolvable and which use idioms outside the detected set — notably the atomic-write trio `os.fdopen(fd, "w")` + `fh.write(...)` + `os.replace(tmp, str(path))` — are not detected, so a file that *only* writes that way (e.g. `vigil_mapper/map_storage.py`) will not surface. This is a discovery-layer limitation, independent of the seed behaviour.
|
|
70
|
+
|
|
71
|
+
### Runtime map surfaces entrypoints out-of-the-box (no seed required)
|
|
72
|
+
|
|
73
|
+
The runtime map (`vigil_mapper/runtime_builder.py`) surfaces real entrypoints on any project **without configuration**. With **no** `<project>/.cortex/map_seeds/runtime_seed.json`, the Python AST scanner (`_runtime_ast._RuntimeVisitor`) emits inferred `RuntimeNode` entries (`status="inferred"`, `source="static_scan"`, evidence pointing at `file:line`) for:
|
|
74
|
+
|
|
75
|
+
- `if __name__ == "__main__":` blocks (`kind="main_entrypoint"`); the invoked entry functions are recorded in `calls`;
|
|
76
|
+
- the module-level function(s) invoked from that block (`kind="entry_function"`);
|
|
77
|
+
- async entrypoints (`asyncio.run(...)` in a `__main__` block) — tagged `async_entrypoint`.
|
|
78
|
+
|
|
79
|
+
Adapter-provided runtime signals (Go `init`/goroutine, Java static-block/Spring/thread, JS timer/listener/top-level effect) already surface without a seed via `collect_adapter_runtime_nodes`; this change adds the Python `__main__`/entry-function path that was previously missing.
|
|
80
|
+
|
|
81
|
+
Precision guard: an ordinary helper function or a plain import is **not** an entrypoint. A `def main(): ...` *without* a `__main__` guard is just a function and does **not** produce a `main_entrypoint` node. Providing a seed keeps the existing behaviour — seed nodes are `status="canonical"` and win on name conflicts, so the same node is never double-surfaced; auto-discovered nodes augment the seed.
|
|
82
|
+
|
|
83
|
+
Known limitation: entrypoints exposed only via packaging (`console_scripts` / `[project.scripts]`) without an in-file `__main__` guard — e.g. `vigil_mapper/cli_entry.py` — are not surfaced by the static scan (there is no in-source signal to key on). Background tasks/routes are detected only inside init-style function bodies (`__init__`/`bootstrap`/`setup`/`startup`/`start`/`initialize`/`init`), per the existing visitor scope.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Install
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install -e .
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Hard dependencies** (pulled automatically by pip):
|
|
94
|
+
- `tree-sitter >= 0.25, < 0.26`
|
|
95
|
+
- `tree-sitter-language-pack >= 1.10`
|
|
96
|
+
- `filelock >= 3.12, < 4`
|
|
97
|
+
- `mcp >= 1.0`
|
|
98
|
+
|
|
99
|
+
**Dev extras** (adds pytest):
|
|
100
|
+
```bash
|
|
101
|
+
pip install -e ".[dev]"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Register in Claude Code
|
|
107
|
+
|
|
108
|
+
### Option A — `claude mcp add` (stdio, recommended)
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
claude mcp add code-map -- vigil-mapper-mcp
|
|
112
|
+
claude mcp add forensic-audit -- vigil-forensic-mcp
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Both commands are entry points installed by `pip install -e .`.
|
|
116
|
+
|
|
117
|
+
### Option B — `.mcp.json` (project file)
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"mcpServers": {
|
|
122
|
+
"code-map": {
|
|
123
|
+
"type": "stdio",
|
|
124
|
+
"command": "vigil-mapper-mcp",
|
|
125
|
+
"args": []
|
|
126
|
+
},
|
|
127
|
+
"forensic-audit": {
|
|
128
|
+
"type": "stdio",
|
|
129
|
+
"command": "vigil-forensic-mcp",
|
|
130
|
+
"args": []
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Place `.mcp.json` in the project root or in `~/.claude/`.
|
|
137
|
+
|
|
138
|
+
### Option C — Claude Code plugin marketplace
|
|
139
|
+
|
|
140
|
+
Installable as a Claude Code **plugin** straight from GitHub. The plugin launches the
|
|
141
|
+
servers via `python -m vigil_mcp.*`, so the package must be importable in the Python that
|
|
142
|
+
Claude Code uses — install it first, then add the marketplace:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
pip install "git+https://github.com/iuliimanchini-dot/Vigil.git"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Then inside Claude Code:
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
/plugin marketplace add iuliimanchini-dot/Vigil
|
|
152
|
+
/plugin install vigil-tools@vigil-marketplace
|
|
153
|
+
/mcp # code-map + forensic-audit appear
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The plugin ships `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, and a
|
|
157
|
+
root `.mcp.json` declaring both stdio servers. (If `python` is not the interpreter with
|
|
158
|
+
`vigil` installed, edit `.mcp.json`'s `command` to the full path of that interpreter.)
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## Tool list
|
|
163
|
+
|
|
164
|
+
### Server: `code-map`
|
|
165
|
+
|
|
166
|
+
| Tool | Description |
|
|
167
|
+
|------|-------------|
|
|
168
|
+
| `start_code_map` | Start a background map-build job. Args: `path` (absolute project root), `map` (`"all"` or specific map name). Returns `job_id`. |
|
|
169
|
+
| `get_code_map_status` | Poll job status. Args: `job_id`. Returns `status`: `running / done / error / cancelled / not_found`. |
|
|
170
|
+
| `get_code_map_results` | Retrieve completed results (paginated). Args: `job_id`, `page` (0-based), `page_size_chars`. Returns structured maps payload. |
|
|
171
|
+
| `load_code_map_by_path` | Load previously built maps from disk without a job. Args: `path`, `page`, `page_size_chars`. |
|
|
172
|
+
| `cancel_code_map` | Cancel a running job. Args: `job_id`. |
|
|
173
|
+
|
|
174
|
+
### Server: `forensic-audit`
|
|
175
|
+
|
|
176
|
+
| Tool | Description |
|
|
177
|
+
|------|-------------|
|
|
178
|
+
| `start_forensic_audit` | Start a background forensic audit. Args: `path`, `gates` (comma-separated check_ids or empty for all), `severity` (`LOW / MEDIUM / HIGH / CRITICAL`), `all_languages`. Returns `job_id`. |
|
|
179
|
+
| `get_forensic_status` | Poll job status. Args: `job_id`. |
|
|
180
|
+
| `get_forensic_results` | Retrieve results (paginated + capped). Args: `job_id`, `page`, `page_size_chars`, `max_findings` (default 200). Returns `exit_code`, `findings`, `meta`, `errors`. |
|
|
181
|
+
| `cancel_forensic_audit` | Cancel a running audit. Args: `job_id`. |
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Usage pattern: the poll workflow
|
|
186
|
+
|
|
187
|
+
Both servers use the same start → poll → retrieve pattern. Push delivery is not used here (see note below).
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
# Example: map build via MCP tool calls (pseudocode showing the call sequence)
|
|
191
|
+
|
|
192
|
+
# 1. Start the job
|
|
193
|
+
result = call_tool("start_code_map", {"path": "/path/to/project", "map": "all"})
|
|
194
|
+
job_id = result["job_id"]
|
|
195
|
+
|
|
196
|
+
if result["status"] == "busy":
|
|
197
|
+
# Server is at max concurrent jobs; wait and retry start_code_map
|
|
198
|
+
...
|
|
199
|
+
|
|
200
|
+
# 2. Poll until done
|
|
201
|
+
while True:
|
|
202
|
+
s = call_tool("get_code_map_status", {"job_id": job_id})
|
|
203
|
+
if s["status"] in ("done", "error", "cancelled"):
|
|
204
|
+
break
|
|
205
|
+
time.sleep(2)
|
|
206
|
+
|
|
207
|
+
# 3. Retrieve results (paginated if large)
|
|
208
|
+
page = 0
|
|
209
|
+
while True:
|
|
210
|
+
r = call_tool("get_code_map_results", {"job_id": job_id, "page": page})
|
|
211
|
+
process(r["payload"]) # JSON string
|
|
212
|
+
if not r["truncated"]:
|
|
213
|
+
break
|
|
214
|
+
page += 1
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
The same three-step pattern applies to `forensic-audit`: `start_forensic_audit` → `get_forensic_status` → `get_forensic_results`.
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## Resource and concurrency guarantees
|
|
222
|
+
|
|
223
|
+
- **Max 2 concurrent jobs** per server process (enforced by `_jobs.JobRegistry`).
|
|
224
|
+
- **`forensic-audit` additionally uses `workers=1`** internally inside `run_forensic_audit`.
|
|
225
|
+
- Jobs are **cancellable** at any time via `cancel_code_map` / `cancel_forensic_audit`.
|
|
226
|
+
- Output is **paginated and capped**: each results page is at most 80 000 chars (~25 k tokens); findings are capped at 200 per `get_forensic_results` call by default.
|
|
227
|
+
- Map analysis is **incremental**: tree-sitter parses file-by-file; `run_map_build` has a 300 s time budget and writes each map independently — the server will not hang the host process.
|
|
228
|
+
- **File-count guard (anti-hang on huge repos).** Both tools do per-file AST work (forensic averages ~0.4 s/file), so a repo with thousands of files would take *hours*. When the collected source-file count exceeds **`max_files` (default 800 ≈ a ~5 min ceiling)** the tool **does not scan** — it returns a fast structured skip instead: forensic sets `meta.skipped_reason="too_many_files"` (with `file_count`, `max_files`, `top_subdirs`, `suggestion`); code-map surfaces the same via `get_code_map_results` (`view="skipped"`). Pass `max_files=` to `start_forensic_audit` / `start_code_map` to narrow scope or raise the ceiling to force a full scan of a submodule. Vendored/build dirs (`.venv`, `site-packages`, `dist-packages`, `node_modules`, `build`, `dist`, `.tox`, `.eggs`, `.mypy_cache`, `.pytest_cache`, `.next`, …) are excluded from the count and the scan even when they sit outside a venv.
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## Job persistence (results survive a restart)
|
|
233
|
+
|
|
234
|
+
Completed job results are **disk-backed**, so a finished audit or map build is
|
|
235
|
+
still retrievable after the MCP server process restarts.
|
|
236
|
+
|
|
237
|
+
- **Where files live.** Each job is persisted under its own project root at
|
|
238
|
+
`<project_dir>/.cortex/cortex_jobs/<job_id>.json` (the `project_dir` is the
|
|
239
|
+
resolved path the `start_*` tool targeted). A small global index keyed by
|
|
240
|
+
`job_id` lives under the user state dir (`~/.cortex/cortex_jobs_index/`) so a
|
|
241
|
+
restarted server — which polls by `job_id` only — can locate the owning
|
|
242
|
+
project. Persistence engages only when a `project_dir` is known; an in-memory
|
|
243
|
+
job started without one keeps the legacy behaviour (lost on exit).
|
|
244
|
+
- **Atomic mechanism.** Records are written via `tempfile.mkstemp` + `os.replace`
|
|
245
|
+
under a per-job `filelock.FileLock` — the same atomic pattern as
|
|
246
|
+
`vigil_mapper.map_storage`. `os.replace` is atomic on POSIX and Windows,
|
|
247
|
+
so a reader never observes a half-written file. The terminal record is written
|
|
248
|
+
to disk **before** the in-memory status flips to terminal, so disk is never
|
|
249
|
+
behind what `get_*_status` reports.
|
|
250
|
+
- **Restart / interrupted semantics.** Terminal records (`done` / `error` /
|
|
251
|
+
`cancelled` / `timeout`) reload verbatim. A record left in the `running` state
|
|
252
|
+
means the process died mid-flight; since the worker thread is gone and cannot
|
|
253
|
+
be resumed, it reloads as **`interrupted`** — never as `done`.
|
|
254
|
+
- **Cross-project rule.** A job's file lives only under its own project. Polling
|
|
255
|
+
by `job_id` resolves through the global index; polling *scoped to a specific
|
|
256
|
+
project* only reads that project's directory, so a job that ran under project
|
|
257
|
+
X is **not** visible when resolved scoped to project Y.
|
|
258
|
+
- **Bounded reads.** Disk lookups are by `job_id` (one index read + one record
|
|
259
|
+
read) — never a directory scan. Records carry the full result payload; there
|
|
260
|
+
is currently **no automatic cleanup** of `.cortex/cortex_jobs/` (large results
|
|
261
|
+
accumulate there until removed), so treat it like the `.cortex/maps/` cache.
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
## MCP push note
|
|
266
|
+
|
|
267
|
+
The default delivery mode for both servers is **poll** (the client calls `get_*_status` / `get_*_results` repeatedly). Claude Code does support server-to-client push notifications via `claude/channel` + `--channels`, but these servers do not use that mechanism — poll was chosen for simplicity and portability. If you need push-style delivery you can add it via the FastMCP channel API; it is not impossible, just not wired here.
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Default gate profile (size-noise control)
|
|
272
|
+
|
|
273
|
+
The forensic auditor reads size/complexity thresholds from a **gate profile**. A
|
|
274
|
+
default profile ships **inside the package** (so it is bundled in the wheel and
|
|
275
|
+
available after `pip install`):
|
|
276
|
+
[`vigil_forensic/gate_profile.json`](vigil_forensic/gate_profile.json).
|
|
277
|
+
Its only job is to cut **size-noise false-positives** — file-length,
|
|
278
|
+
function-length, and nesting-depth warnings firing on legitimately large code —
|
|
279
|
+
*without* hiding genuinely extreme outliers (a 2 000-line god-file still
|
|
280
|
+
surfaces).
|
|
281
|
+
|
|
282
|
+
### Where the profile is discovered
|
|
283
|
+
|
|
284
|
+
`vigil_forensic.self_audit._load_gate_profile_if_present` looks, in order:
|
|
285
|
+
|
|
286
|
+
1. `<audit-target>/gate_profile.json`
|
|
287
|
+
2. `<audit-target>/.cortex/gate_profile.json`
|
|
288
|
+
3. **ancestor walk** — the first `gate_profile.json` found in any parent
|
|
289
|
+
directory of the audit target.
|
|
290
|
+
4. **packaged default** — the profile shipped inside the `vigil_forensic`
|
|
291
|
+
package. This is the effective default for any target with no profile of its
|
|
292
|
+
own and no ancestor profile (e.g. an arbitrary path audited after
|
|
293
|
+
`pip install`), and is why a sub-package audit such as
|
|
294
|
+
`run_forensic_audit("vigil_forensic")` still picks up the shipped default.
|
|
295
|
+
|
|
296
|
+
A target-local profile always wins over an ancestor or the packaged default. A
|
|
297
|
+
missing or malformed profile is logged and skipped — never fatal. The
|
|
298
|
+
**committed** default lives inside the package at `vigil_forensic/gate_profile.json`
|
|
299
|
+
so it ships in the wheel.
|
|
300
|
+
|
|
301
|
+
### How to set your own
|
|
302
|
+
|
|
303
|
+
Copy the shipped file to your project root and edit `size_thresholds`:
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
cp vigil_forensic/gate_profile.json /path/to/your-project/gate_profile.json
|
|
307
|
+
# then edit size_thresholds to taste
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### Thresholds and their cited sources
|
|
311
|
+
|
|
312
|
+
JSON forbids comments, so the justification for every value is here. Each value
|
|
313
|
+
is a **published linter default**, not an arbitrary constant. `warn` =
|
|
314
|
+
MEDIUM-severity heads-up (advisory); `revise` = HIGH-severity "refactor now".
|
|
315
|
+
|
|
316
|
+
| Key | Value | Source / rationale |
|
|
317
|
+
|-----|-------|--------------------|
|
|
318
|
+
| `function_warn` | **100** | SonarQube `S138` and PMD `ExcessiveMethodLength` both default to **100** lines. (Clean Code's ~20–60 is an ideal, not a linter default — too aggressive for a real engine, would re-introduce noise.) |
|
|
319
|
+
| `function_revise` | **150** | 1.5× the SonarQube/PMD limit — a "clearly excessive" function that should be split. Isolates true outliers (e.g. 325- and 290-line functions in this repo). |
|
|
320
|
+
| `nesting_warn` | **5** | pylint `max-nested-blocks` **default = 5**. Nesting depth is the structural-complexity signal the engine actually measures; deep nesting is the same code smell McCabe's cyclomatic-complexity ≈10 guideline targets, expressed as a nesting bound. (SonarQube `S134`=3 is stricter; pylint's 5 is the widely-shipped default and avoids flagging ordinary depth-4 control flow.) |
|
|
321
|
+
| `nesting_revise` | **8** | Beyond any common linter's tolerance — genuinely tangled control flow worth flattening. |
|
|
322
|
+
| `file_warn` | **750** | SonarQube file-size flag default = **750** lines. |
|
|
323
|
+
| `file_revise` | **1000** | pylint `max-module-lines` **default = 1000**. A file past 1 000 lines is a god-file candidate. |
|
|
324
|
+
|
|
325
|
+
> **Note on cyclomatic complexity.** The size/complexity engine measures file
|
|
326
|
+
> LOC, function LOC, and **nesting depth** — it does not compute a McCabe
|
|
327
|
+
> cyclomatic-complexity number, and the profile has no `cyclomatic` key (one
|
|
328
|
+
> would be dead config). Nesting depth is used as the structural-complexity
|
|
329
|
+
> proxy, calibrated to pylint's `max-nested-blocks` default; the McCabe ≈10
|
|
330
|
+
> guideline informs that choice rather than being read directly.
|
|
331
|
+
|
|
332
|
+
### Effect (measured before → after on this repo)
|
|
333
|
+
|
|
334
|
+
| Audit target | total before | total after | `size.*` before | `size.*` after |
|
|
335
|
+
|--------------|-------------:|------------:|----------------:|---------------:|
|
|
336
|
+
| `vigil_forensic/` | 125 | 86 | 92 | 55 |
|
|
337
|
+
| `vigil_mapper/`| 115 | 93 | 49 | 37 |
|
|
338
|
+
|
|
339
|
+
The remaining `size.*` findings are functions over 100 lines and nesting deeper
|
|
340
|
+
than 5 — code that genuinely exceeds the published limits, which is the intended
|
|
341
|
+
behavior, not a miss.
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Real-world metrics
|
|
346
|
+
|
|
347
|
+
Measured on **real third-party Python packages** copied out of this repo's
|
|
348
|
+
`.venv` (sans `__pycache__`), audited with the shipped default `gate_profile.json`
|
|
349
|
+
active (`file_warn 750 / file_revise 1000 / nesting_warn 5`). Reproduce with
|
|
350
|
+
[`tests/benchmark_realworld.py`](tests/benchmark_realworld.py)
|
|
351
|
+
(`python tests/benchmark_realworld.py` — single-threaded, KB-scale targets, light).
|
|
352
|
+
|
|
353
|
+
Hardware: Windows 11, CPython 3.11, `workers=1` (forensic enforces this
|
|
354
|
+
internally). "mem" = peak RSS delta over the call, sampled at 20 ms in a
|
|
355
|
+
background thread. "tokens" = MCP summary-view chars ÷ 4.
|
|
356
|
+
|
|
357
|
+
| Target | `.py` files | LOC | forensic time | forensic peak RSS | map(`all`) time | map peak RSS |
|
|
358
|
+
|--------|------------:|----:|--------------:|------------------:|----------------:|-------------:|
|
|
359
|
+
| `filelock` | 14 | 3 385 | 1.6 s | 8.1 MB | 0.5 s | 4.1 MB |
|
|
360
|
+
| `click` | 17 | 12 179 | 3.7 s | 2.3 MB | 0.9 s | 2.8 MB |
|
|
361
|
+
| `mcp` | 110 | 20 824 | 10.8 s | 6.2 MB | 1.4 s | 3.5 MB |
|
|
362
|
+
|
|
363
|
+
Forensic time is roughly linear in file count (~0.1 s/file here); the map build
|
|
364
|
+
is much cheaper. Memory stays low (single-digit MB peak delta) — these tools are
|
|
365
|
+
light enough to run inline.
|
|
366
|
+
|
|
367
|
+
### MCP output stays in budget
|
|
368
|
+
|
|
369
|
+
The summary views (`forensic_server._build_forensic_summary`,
|
|
370
|
+
`map_server._build_map_summary`) are what an agent actually receives. Both stay
|
|
371
|
+
well under the ~6 k-token budget on every target:
|
|
372
|
+
|
|
373
|
+
| Target | forensic summary | map summary |
|
|
374
|
+
|--------|-----------------:|------------:|
|
|
375
|
+
| `filelock` | ~3.0 k tok (11.9 KB) | ~0.5 k tok (1.9 KB) |
|
|
376
|
+
| `click` | ~1.5 k tok (6.1 KB) | ~0.4 k tok (1.7 KB) |
|
|
377
|
+
| `mcp` | ~1.7 k tok (6.7 KB) | ~0.6 k tok (2.2 KB) |
|
|
378
|
+
|
|
379
|
+
(`filelock`'s summary is the largest because its findings are dominated by one
|
|
380
|
+
duplication cluster, so the per-`check_id` breakdown is wide. Still < 3 k tokens.)
|
|
381
|
+
|
|
382
|
+
### Determinism
|
|
383
|
+
|
|
384
|
+
`run_forensic_audit` is deterministic: run twice on each target, the sorted
|
|
385
|
+
`(check_id, file, line)` finding set is identical (no ordering or count drift).
|
|
386
|
+
|
|
387
|
+
### False-positive reduction on clean code (2026-06)
|
|
388
|
+
|
|
389
|
+
The default gate selection was re-tuned to cut the ~50 % false-positive rate
|
|
390
|
+
observed on clean, idiomatic third-party code. Inspected baseline vs. current
|
|
391
|
+
on `filelock` (every finding checked against the cited `file:line`):
|
|
392
|
+
|
|
393
|
+
| Target | findings before | findings after | actual FPs after |
|
|
394
|
+
|--------|----------------:|---------------:|-----------------:|
|
|
395
|
+
| `filelock` | 32 | **2** | **0** |
|
|
396
|
+
| `click` | 54 | **33** | low (mostly real `size.*` / `broad_except.swallow`) |
|
|
397
|
+
| `mcp` | 110 | **43** | low (mostly real `size.*` / `broad_except.swallow`) |
|
|
398
|
+
|
|
399
|
+
The two `filelock` findings that remain are both honest: one `size.file_warn`
|
|
400
|
+
(`_soft_rw/_sync.py` is genuinely 858 lines > 750) and one informational
|
|
401
|
+
`meta.git_unavailable` (see below). Zero false claims about the code.
|
|
402
|
+
|
|
403
|
+
Fixes landed (each TDD'd; items 1–5 in
|
|
404
|
+
[`tests/test_forensic_fp_clean_code.py`](tests/test_forensic_fp_clean_code.py),
|
|
405
|
+
items 6–7 in [`tests/test_dup_and_sqli.py`](tests/test_dup_and_sqli.py)):
|
|
406
|
+
|
|
407
|
+
1. **`broad_except` cleanup-then-reraise.** `except BaseException: <cleanup>;
|
|
408
|
+
raise` (filelock `_api.py:513`, `asyncio.py:268`) is the correct cancel-
|
|
409
|
+
cleanup idiom — it *re-raises*, it does not swallow. Both the regex
|
|
410
|
+
(`broad_except.base_exception`/`.bare`) and AST
|
|
411
|
+
(`broad_except.hidden_sentinel.bare_or_base`) detectors now skip any handler
|
|
412
|
+
whose body contains a top-level `raise`. Genuine swallows (no re-raise) still
|
|
413
|
+
fire.
|
|
414
|
+
2. **`duplication.text_block` inflation + docstrings.** One duplicated region no
|
|
415
|
+
longer emits one finding per sliding-window line (~13 → 1); windows of the
|
|
416
|
+
same file-set at adjacent start lines are merged into a single region. Lines
|
|
417
|
+
inside string literals (shared docstrings / `:param` blocks on sync↔async API
|
|
418
|
+
mirrors) and pure parameter-declaration lines are excluded. Genuine copy-
|
|
419
|
+
pasted **code** blocks are still detected.
|
|
420
|
+
3. **Zone-inference gates are now opt-in.** `god_object_zones` infers
|
|
421
|
+
"responsibility zones" from function-name prefixes against a fixed verb list
|
|
422
|
+
(`acquire/release/read/write/open/close/...`); a cohesive read-write-lock
|
|
423
|
+
class collides with that vocabulary and is wrongly flagged — ~0 true
|
|
424
|
+
positives here. It is **off by default** (moved to an opt-in set in
|
|
425
|
+
`self_audit._NOISY_OPT_IN_GATES`) and runs only when explicitly requested
|
|
426
|
+
(`run_forensic_audit(target, gates=["god_object_zones"])` or
|
|
427
|
+
`--gates god_object_zones`). The twin `size_complexity.zone_overload` sub-
|
|
428
|
+
check, which used the *same* name-prefix logic and double-reported the same
|
|
429
|
+
files, was **removed** outright; `size_complexity` keeps its objective
|
|
430
|
+
size/function-length/nesting budget checks.
|
|
431
|
+
4. **`api.public_function_signature_change` in no-git mode.** With no git
|
|
432
|
+
baseline (no work tree, or no changed file resolves at `HEAD~1`, e.g. a
|
|
433
|
+
vendored/`site-packages` dir) the old code fell back to a docstring-param-
|
|
434
|
+
count heuristic that fired on every documented variadic API
|
|
435
|
+
(`click.decorators.option(*param_decls, **attrs)` → "0 params vs 3
|
|
436
|
+
documented"). The whole signature-drift check is now **skipped without a git
|
|
437
|
+
baseline** and reported once via `meta.git_unavailable`. It runs normally
|
|
438
|
+
when a real `HEAD~1` diff exists.
|
|
439
|
+
5. **Profile fallback foot-gun.** An external target with no ancestor
|
|
440
|
+
`gate_profile.json` previously fell back to the *strict* code-defaults
|
|
441
|
+
(600/800/4) instead of the shipped defaults (750/1000/5). The loader
|
|
442
|
+
(`self_audit._load_gate_profile_if_present`) now falls back to the package's
|
|
443
|
+
**own shipped** `gate_profile.json` (bundled INSIDE the `vigil_forensic`
|
|
444
|
+
package and resolved relative to the module, so it ships in the wheel) as the
|
|
445
|
+
last resort. A target-local profile still wins.
|
|
446
|
+
6. **`duplicate_scan` (near-duplicate code) per-line inflation.** The
|
|
447
|
+
intra-file near-duplicate detector (`assess_near_duplicate_code`) hashes a
|
|
448
|
+
sliding 4-line window, so one duplicated region of N lines emitted N−3
|
|
449
|
+
near-identical findings ("block at lines 118 and 201", "119 and 202", …).
|
|
450
|
+
On `filelock` this produced **39** `duplicate_scan` findings for only a
|
|
451
|
+
handful of real blocks. Adjacent/overlapping window-pairs are now **merged**
|
|
452
|
+
into ONE finding per contiguous block (same region-grouping idea as
|
|
453
|
+
`duplication.text_block`'s `_merge_starts`), reported as a line range:
|
|
454
|
+
`Near-duplicate block at lines 118-126 ↔ 201-209 (9 lines)`. filelock drops
|
|
455
|
+
**39 → 13** — a true merge, not a cap: genuinely separate duplicate blocks
|
|
456
|
+
still each report once (verified: `_api.py` `__call__`/`__init__` signature
|
|
457
|
+
mirror at `118-126 ↔ 201-209` is preserved as a single finding).
|
|
458
|
+
7. **Focused SQL-injection detection (cluster 12, `security_scan`).** For
|
|
459
|
+
Python, `assess_security_patterns` flags a dynamic query passed to a
|
|
460
|
+
DB-call site (`.execute`/`.executemany`/`.executescript`/`.query`/`.raw`)
|
|
461
|
+
when the query is built by **f-string** interpolation, **`%`-format**,
|
|
462
|
+
**`str.format()`**, or **`+` string concatenation** with at least one
|
|
463
|
+
non-literal (variable) operand. The flagged string must have real SQL-clause
|
|
464
|
+
structure (`SELECT … FROM`, `UPDATE … SET`, `DELETE FROM`, …) and meet a
|
|
465
|
+
minimum length, so a SQL keyword in prose/log lines does not trip it. A plain
|
|
466
|
+
literal `execute("SELECT 1")`, a parametrised `execute("… ?", (x,))`, and a
|
|
467
|
+
constant concat of two literals (`"SELECT … " + "WHERE …"`) are **not**
|
|
468
|
+
flagged.
|
|
469
|
+
**Limits (honest):** detection is purely local/syntactic — it fires only
|
|
470
|
+
when the dynamic string is the *direct first argument* of the DB call.
|
|
471
|
+
There is **no taint tracking**: a query assembled in a prior statement
|
|
472
|
+
(`q = "SELECT … " + user_input; db.execute(q)`), passed through a helper, or
|
|
473
|
+
stored on a variable first is **not** detected. Non-Python languages get the
|
|
474
|
+
regex security patterns only (no SQLi AST rule). This is deliberately
|
|
475
|
+
low-false-positive, not full SQLi coverage.
|
|
476
|
+
8. **`debug_print_scan` substring / CLI-output false positives.** The detector
|
|
477
|
+
matched the substring `print(` anywhere on a line, so it fired on (a) `print(`
|
|
478
|
+
*inside a string literal* (e.g. a detector's own pattern tuple
|
|
479
|
+
`(..., "print(", ...)`), (b) lines already carrying `# noqa: debug_print_scan`,
|
|
480
|
+
and (c) intentional user-facing `print()` in CLI/output functions (the path
|
|
481
|
+
allowlist only knew the pre-migration `BRAIN/autoforensics/self_audit.py` path,
|
|
482
|
+
not the packaged `self_audit.py`). For **Python** the gate is now AST-driven:
|
|
483
|
+
only a line carrying a genuine `print(...)` **call** (`ast.Call` with
|
|
484
|
+
`func=Name('print')`) can be flagged — a `print(` in a string literal or an
|
|
485
|
+
attribute call (`obj.print(...)`) is never flagged. On a file that fails to
|
|
486
|
+
parse it falls back to requiring the stripped line to **start** with `print(`
|
|
487
|
+
(statement position). Across all languages the gate now (i) respects
|
|
488
|
+
`# noqa: debug_print_scan` and a bare `# noqa` on the offending line, and
|
|
489
|
+
(ii) skips prints inside conventionally-named output functions — name starts
|
|
490
|
+
with `print_`/`_print_`, or is `main`/`cli`/`run`/`cli_main` (and underscore
|
|
491
|
+
variants). The rule is deliberately conservative: a `print_*` function
|
|
492
|
+
elsewhere in the file does **not** silence a stray `print()` in an unrelated
|
|
493
|
+
normal function, and a genuine `print("DEBUG", x)` in ordinary code is still
|
|
494
|
+
flagged. On `vigil_forensic` itself this cut `debug_print_scan` **12 → 0**
|
|
495
|
+
(all 12 were FPs: 10 in `print_human_summary()`, 2 in detector pattern
|
|
496
|
+
tuples); the corpus oracle (`tests/oracle/sample_quality.py:63`) stays flagged.
|
|
497
|
+
TDD'd in [`tests/test_debug_print_fp.py`](tests/test_debug_print_fp.py).
|
|
498
|
+
9. **`commented_code_scan` prose false positives.** The detector grouped
|
|
499
|
+
consecutive comment lines and flagged a block when ≥2 of its lines matched a
|
|
500
|
+
permissive `code_indicators` regex (`\w=\w`, `def `, `return \w`, `for \w`,
|
|
501
|
+
`except \w`, …). Explanatory **prose** that merely *mentions* a code keyword in
|
|
502
|
+
an English sentence therefore tripped it — e.g. the design-rationale comment at
|
|
503
|
+
`broad_except_checks.py:21` ("… a line-only regex cannot tell a swallow from the
|
|
504
|
+
correct `except BaseException: <cleanup>; raise` idiom.") matched `except \w`
|
|
505
|
+
twice. The `code_indicators` count is now only a cheap **pre-filter**; a block is
|
|
506
|
+
reported as commented-out code solely when a prose-vs-code discriminator confirms
|
|
507
|
+
it. For **Python** that means a contiguous run of ≥2 of the de-commented body
|
|
508
|
+
lines **`ast.parse`-s** as valid statements (a leading prose intro line that
|
|
509
|
+
alone breaks parsing is trimmed, so a real block introduced by a sentence — like
|
|
510
|
+
the corpus oracle's `# legacy implementation kept around just in case:` followed
|
|
511
|
+
by a commented `for`/`return` body — is still caught via its inner code run).
|
|
512
|
+
For every language there is a fallback: ≥2 **distinct strong** structural signals
|
|
513
|
+
(an assignment with an identifier LHS, a `def`/`class`/`import`/`func`/`const`
|
|
514
|
+
header, a bare `name(...)` call statement, or a block-header line). A single
|
|
515
|
+
keyword inside grammatical English is not a strong signal, so prose does not reach
|
|
516
|
+
the bar. On `vigil_forensic` itself this cut `commented_code_scan` **22 → 0**
|
|
517
|
+
(all 22 were prose: design-rationale / FP-tightening notes that referenced code
|
|
518
|
+
in backticks — verified by inspecting each block); the corpus oracle
|
|
519
|
+
(`tests/oracle/sample_quality.py:69`, a genuine 5-line commented-out block) stays
|
|
520
|
+
flagged, so recall is preserved. **Honest limit:** discrimination is per-block and
|
|
521
|
+
purely syntactic — the AST path is Python-only, and a *non-Python* prose comment
|
|
522
|
+
that happens to start ≥2 lines with assignment/call/header shapes could still be
|
|
523
|
+
flagged; the "22 → 0" figure is measured on this repo, not a guarantee for all
|
|
524
|
+
codebases. TDD'd in [`tests/test_commented_code_fp.py`](tests/test_commented_code_fp.py).
|
|
525
|
+
|
|
526
|
+
10. **Round-2 FP cuts on large real projects (TYPE_CHECKING imports,
|
|
527
|
+
magic-number bounds, docstring & duplicate tightening).** Measured against
|
|
528
|
+
the vendored `click` / `mcp` / `filelock` packages, line-by-line inspection
|
|
529
|
+
of the noisiest gates found four distinct false-positive *sources*; each was
|
|
530
|
+
fixed at the source (not suppressed) and the corpus oracle stays **22/22**.
|
|
531
|
+
Totals: **click 128 → 66, mcp 236 → 189, filelock 38 → 14.**
|
|
532
|
+
- **`unused_import_scan` on `if TYPE_CHECKING:` imports.** Two bugs. (a) The
|
|
533
|
+
TYPE_CHECKING line-collector walked the guard's `else:` branch too, so
|
|
534
|
+
runtime fallback imports (`filelock/__init__.py:26-27`) were mis-tagged as
|
|
535
|
+
type-only and flagged. Now only the `if` body is scanned. (b) A
|
|
536
|
+
TYPE_CHECKING import is "used" only if it backs a *type annotation* — but
|
|
537
|
+
it also legitimately backs runtime `TypeVar(...)` construction
|
|
538
|
+
(`click/shell_completion.py:59`), `te.ParamSpec` / `sys.version_info`
|
|
539
|
+
attribute access (`click/utils.py:26`, `filelock/asyncio.py:22`), and
|
|
540
|
+
`__all__` re-exports. These are now counted as uses. A genuinely dead
|
|
541
|
+
TYPE_CHECKING import (referenced nowhere) is still flagged. click `2 → 0`,
|
|
542
|
+
filelock `7 → 0`.
|
|
543
|
+
- **`magic_number_scan` bounds.** The old window suppressed only `-10..10`
|
|
544
|
+
plus a fixed safe-set, so every bare small integer (terminal widths `24`,
|
|
545
|
+
ASCII `127`, byte/column values `11/12/20/50`) and sub-unit float
|
|
546
|
+
(`0.1`/`0.5`) dominated the noise. The small-int suppression bound is
|
|
547
|
+
raised to `|n| < 256` and sub-unit floats are skipped; HTTP codes / powers
|
|
548
|
+
of two / time constants remain explicitly safe. Large/unusual literals
|
|
549
|
+
(oracle's `86400`, mcp's `8707`) stay flagged. click `11 → 0`.
|
|
550
|
+
- **`docstring_param_scan` rebuilt on AST.** The old `def …(([^)]*))` regex
|
|
551
|
+
truncated parameters at the first `)` inside an annotation
|
|
552
|
+
(`f: t.Callable[..., t.Any]` → garbage param `t.Any]`) and could not span
|
|
553
|
+
multi-line / overloaded signatures, yielding 16 phantom mismatches on
|
|
554
|
+
click (zero real). Parameters now come from `ast` (including `*args` /
|
|
555
|
+
`**kwargs`, which idiomatic docstrings document by bare name), the docstring
|
|
556
|
+
is read via `ast.get_docstring`, Google-style `Args:` parsing stops at the
|
|
557
|
+
next `Returns:`/`Raises:` section (no more `Returns`/`Raises` "params"), and
|
|
558
|
+
the reST `:param <type> name:` form is parsed by last-token. Only the
|
|
559
|
+
genuine **documented-but-absent-parameter** direction is reported. click
|
|
560
|
+
`16 → 0`; mcp/filelock retain only real drift (e.g. `mcp …/server.py:125`
|
|
561
|
+
documents `server` for a param renamed to `_`).
|
|
562
|
+
- **`duplicate_scan` signature/parameter mirrors.** ~75 % of click's 38
|
|
563
|
+
hits and filelock's were `@overload` stubs, parameter-list mirrors, and
|
|
564
|
+
shared signatures (e.g. filelock `AsyncFileLockMeta.__call__` ↔
|
|
565
|
+
`BaseAsyncFileLock.__init__`) — typing scaffolding repeated by API
|
|
566
|
+
contract, not refactorable logic. Signature-scaffolding lines (decorators,
|
|
567
|
+
`def` headers, bare `name: type = default,` parameter lines, `): ...`
|
|
568
|
+
stubs) are excluded from the duplicate-fingerprint, and a region must span
|
|
569
|
+
**≥ 5 meaningful lines** to report. Genuine multi-statement logic
|
|
570
|
+
duplicates survive (oracle's 6-line `route_alpha`/`route_beta`; click's
|
|
571
|
+
`_termui_impl.py` pager fallbacks). click `38 → 5`, filelock `13 → 2`.
|
|
572
|
+
- **Left as real (not tightened):** `context_fallback_save.fallback_without_else`
|
|
573
|
+
(4 on click, 4 on mcp). Inspected — these are heterogeneous low-severity
|
|
574
|
+
advisories (input-validation `return 400`, mode dispatch, non-task counter
|
|
575
|
+
increments). They are *advisory by design* ("a reviewer must confirm …
|
|
576
|
+
intentional") and no single safe predicate separates them from real
|
|
577
|
+
fallbacks without risking over-suppression, so they are reported honestly
|
|
578
|
+
rather than gamed away. TDD'd in
|
|
579
|
+
[`tests/test_fp_round2.py`](tests/test_fp_round2.py).
|
|
580
|
+
|
|
581
|
+
**Residual honesty.** The remaining output is dominated by the objective
|
|
582
|
+
`size.*` gates (real breaches of published linter limits) and
|
|
583
|
+
`broad_except.swallow` (genuine `except: pass`). These are trustworthy. The
|
|
584
|
+
zone heuristic still exists as an *opt-in* capability for teams that want it on
|
|
585
|
+
their own diffs — re-enable it per run via the `gates` argument. It is not
|
|
586
|
+
deleted, just no longer in the default scan.
|
|
587
|
+
|
|
588
|
+
---
|
|
589
|
+
|
|
590
|
+
## Per-project configurability
|
|
591
|
+
|
|
592
|
+
Three knobs let a project tune the forensic auditor without forking it:
|
|
593
|
+
**disable noisy gates**, **raise the severity floor**, and **add your own gate**.
|
|
594
|
+
|
|
595
|
+
### Disable specific gates — `.cortex/disabled_gates.json`
|
|
596
|
+
|
|
597
|
+
Drop a `disabled_gates.json` into your project's `.cortex/` directory to switch
|
|
598
|
+
off gates that are noisy for your codebase. `run_forensic_audit` auto-loads it
|
|
599
|
+
from `<project_dir>/.cortex/disabled_gates.json`. Two accepted shapes:
|
|
600
|
+
|
|
601
|
+
```jsonc
|
|
602
|
+
// a bare list of gate check_ids …
|
|
603
|
+
["broad_except", "duplication"]
|
|
604
|
+
```
|
|
605
|
+
```jsonc
|
|
606
|
+
// … or an object with a "disabled" key
|
|
607
|
+
{ "disabled": ["broad_except", "duplication"] }
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
A disabled gate never runs (produces no findings) and is reported in
|
|
611
|
+
`meta["gates_skipped"]` with reason `"disabled_by_project"`:
|
|
612
|
+
|
|
613
|
+
```python
|
|
614
|
+
from vigil_forensic import run_forensic_audit
|
|
615
|
+
|
|
616
|
+
res = run_forensic_audit("/path/to/project")
|
|
617
|
+
# .cortex/disabled_gates.json contains ["broad_except"]
|
|
618
|
+
assert {e["gate_id"] for e in res["meta"]["gates_skipped"]
|
|
619
|
+
if e["reason"] == "disabled_by_project"} == {"broad_except"}
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
Behavior:
|
|
623
|
+
|
|
624
|
+
- The disable list takes precedence over every other resolution rule — a
|
|
625
|
+
disabled gate is always reported as `disabled_by_project`, even one that the
|
|
626
|
+
static-mode policy or a `gates=` filter would have skipped anyway.
|
|
627
|
+
- **Missing or empty file → no-op.** Nothing is disabled; all gates run.
|
|
628
|
+
- **Malformed file never raises.** A JSON-syntax error, an unreadable file, or a
|
|
629
|
+
wrong-typed payload is *logged-and-ignored* (narrow exception handling, no
|
|
630
|
+
bare `except`): the audit completes, nothing is disabled, and a
|
|
631
|
+
`meta.profile_load_failed` finding (HIGH/WARN) records the failure so the
|
|
632
|
+
silent-disable is fail-loud rather than swallowed.
|
|
633
|
+
- `.cortex/` is git-ignored by default in this repo's audit policy, so the file
|
|
634
|
+
is a *local* opt-out unless you commit it deliberately.
|
|
635
|
+
|
|
636
|
+
The same file is honored by the CLI (`python -m vigil_forensic.self_audit
|
|
637
|
+
--project <dir>`).
|
|
638
|
+
|
|
639
|
+
> Gate ids are the `check_id` values — run `python -m vigil_forensic.self_audit
|
|
640
|
+
> --list-gates` to print the file-based gates, or read the `GATE_SPECS` table in
|
|
641
|
+
> [`vigil_forensic/gate_packs/universal.py`](vigil_forensic/gate_packs/universal.py).
|
|
642
|
+
> Note a *family* gate id (`broad_except`) and its sub-checks emitted under a
|
|
643
|
+
> dotted child id (`broad_except.return_none`) are produced by the same runner;
|
|
644
|
+
> disabling the family id (`broad_except`) stops that runner entirely. A
|
|
645
|
+
> *separately registered* gate such as `broad_except.hidden_sentinel` has its
|
|
646
|
+
> own id and must be disabled separately.
|
|
647
|
+
|
|
648
|
+
### Raise the severity floor — `severity=`
|
|
649
|
+
|
|
650
|
+
`run_forensic_audit(project_dir, *, severity="LOW")` filters the returned
|
|
651
|
+
`findings` to those **at or above** the floor. Ordering is
|
|
652
|
+
`LOW < MEDIUM < HIGH < CRITICAL` (case-insensitive); the default `"LOW"` returns
|
|
653
|
+
everything.
|
|
654
|
+
|
|
655
|
+
```python
|
|
656
|
+
res = run_forensic_audit("/path/to/project", severity="HIGH")
|
|
657
|
+
# res["findings"] contains only HIGH and CRITICAL findings.
|
|
658
|
+
```
|
|
659
|
+
|
|
660
|
+
The `meta.*` counters (`severity_counts`, `total_findings`, `category_counts`)
|
|
661
|
+
are computed **before** the floor is applied, so they always reflect the full
|
|
662
|
+
finding set; `meta["findings_after_severity_filter"]` records the post-filter
|
|
663
|
+
count whenever a non-LOW floor is used. The process exit code is likewise driven
|
|
664
|
+
by the unfiltered HIGH/CRITICAL counts.
|
|
665
|
+
|
|
666
|
+
### Add your own gate
|
|
667
|
+
|
|
668
|
+
There is **no plugin auto-discovery** — the gate set is the module-level
|
|
669
|
+
`GATE_SPECS` tuple in
|
|
670
|
+
[`vigil_forensic/gate_packs/universal.py`](vigil_forensic/gate_packs/universal.py),
|
|
671
|
+
resolved once at import into `DEFAULT_GATE_CHECKS`
|
|
672
|
+
([`gate_registry.py`](vigil_forensic/gate_registry.py)). Registering a gate
|
|
673
|
+
means adding a spec to that tuple. The spec shape is a 3-tuple:
|
|
674
|
+
|
|
675
|
+
```python
|
|
676
|
+
(check_id, category, runner)
|
|
677
|
+
# │ │ └── Callable[[PostExecGateContext], GateCheckResult]
|
|
678
|
+
# │ └── a vigil_forensic._shared.GateCategory enum member
|
|
679
|
+
# └── str, the gate id (also the prefix for any dotted child ids it emits)
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
The **runner** takes the synthetic `PostExecGateContext` (its
|
|
683
|
+
`ctx.file_snapshots` maps each touched file's normalized path → a
|
|
684
|
+
`GateFileSnapshot` with `.text`, `.line_count`, `.exists`) and returns a
|
|
685
|
+
`GateCheckResult`:
|
|
686
|
+
|
|
687
|
+
```python
|
|
688
|
+
from vigil_forensic._shared import (
|
|
689
|
+
GateCheckResult, GateFinding, GateCategory, GateSeverity,
|
|
690
|
+
GateImpact, EvidenceReference,
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
def run_no_print_checks(ctx) -> GateCheckResult:
|
|
694
|
+
findings = []
|
|
695
|
+
for path, snap in ctx.file_snapshots.items():
|
|
696
|
+
if not snap.exists or not path.endswith(".py"):
|
|
697
|
+
continue
|
|
698
|
+
for lineno, line in enumerate(snap.text.splitlines(), start=1):
|
|
699
|
+
if line.lstrip().startswith("print("):
|
|
700
|
+
findings.append(GateFinding(
|
|
701
|
+
check_id="no_print",
|
|
702
|
+
category=GateCategory.REPORTING,
|
|
703
|
+
title="Stray print() in source",
|
|
704
|
+
severity=GateSeverity.LOW,
|
|
705
|
+
impact=GateImpact.WARN,
|
|
706
|
+
summary=f"print() at {path}:{lineno}",
|
|
707
|
+
recommendation="Use logging instead of print().",
|
|
708
|
+
evidence=(EvidenceReference(
|
|
709
|
+
kind="line", path=path, detail=f"L{lineno}", ok=False),),
|
|
710
|
+
fingerprint=f"no_print:{path}:{lineno}",
|
|
711
|
+
))
|
|
712
|
+
return GateCheckResult(
|
|
713
|
+
check_id="no_print", category=GateCategory.REPORTING,
|
|
714
|
+
findings=tuple(findings),
|
|
715
|
+
)
|
|
716
|
+
```
|
|
717
|
+
|
|
718
|
+
To wire it in (the supported path — edit the pack):
|
|
719
|
+
|
|
720
|
+
1. Add `("no_print", GateCategory.REPORTING, run_no_print_checks)` to
|
|
721
|
+
`GATE_SPECS` in `gate_packs/universal.py`.
|
|
722
|
+
2. Add `"no_print"` to the `_FILE_BASED_GATES` allowlist in
|
|
723
|
+
[`vigil_forensic/self_audit.py`](vigil_forensic/self_audit.py) — the static
|
|
724
|
+
auditor only runs gate ids in that set (anything else is reported as
|
|
725
|
+
`not_file_based` and skipped). A runtime-only gate would instead get a
|
|
726
|
+
`skip_in_static` flag in `GATE_FLAGS`.
|
|
727
|
+
|
|
728
|
+
Each `GateFinding` is validated on construction: `confidence` must be in
|
|
729
|
+
`[0.0, 1.0]`, and a non-`"applicable"` `applicability` requires a non-empty
|
|
730
|
+
`applicability_reason` (see `GateFinding.__post_init__` in `_shared.py`).
|
|
731
|
+
|
|
732
|
+
> If you must register a gate **without** editing the pack (e.g. a downstream
|
|
733
|
+
> wrapper), `vigil_forensic.gate_registry.DEFAULT_GATE_CHECKS` is a plain tuple
|
|
734
|
+
> you can extend before calling `run_gates`, and `run_gates(..., gates_filter=…)`
|
|
735
|
+
> selects a subset — but a new id still has to be present in `_FILE_BASED_GATES`
|
|
736
|
+
> to run in static mode, so editing the pack is the honest, complete path.
|
|
737
|
+
|
|
738
|
+
### `forensic_clusters` in static mode (static-safe subset)
|
|
739
|
+
|
|
740
|
+
The `forensic_clusters` pack bundles ~40 cluster runners. Most are purely
|
|
741
|
+
static (they read only `file_snapshots` / text / AST): security patterns,
|
|
742
|
+
secrets, mutable defaults, resource leaks, hardcoded paths, dead code,
|
|
743
|
+
unreachable code, shadowed builtins, magic numbers, TODO debt, import cycles,
|
|
744
|
+
exception swallowing, and more. A minority are **runtime-only** — they need a
|
|
745
|
+
real post-execution context (`artifact_refs`, `transport_mode`,
|
|
746
|
+
reported-vs-observed changed files, validation-contract proofs, or a disk
|
|
747
|
+
re-read compared against an expected hash) and are meaningless / false-positive
|
|
748
|
+
prone without it. The runtime-only set is listed in
|
|
749
|
+
[`forensic_cluster_runners/core.py`](vigil_forensic/gate_checks/forensic_cluster_runners/core.py)
|
|
750
|
+
as `_RUNTIME_ONLY_CLUSTERS` (`cluster2_success_without_proof`,
|
|
751
|
+
`cluster3_proxy_as_truth`, `cluster4_config_accepted_ignored_*`,
|
|
752
|
+
`cluster6_state_divergence`, `cluster7_fallback_hides_truth`,
|
|
753
|
+
`cluster10_edit_consistency`, `cluster11_mutation_verified`).
|
|
754
|
+
|
|
755
|
+
So the pack is **not** flagged `skip_in_static`. Instead, when `run_gates`
|
|
756
|
+
hands it a synthetic static context (`_is_static_mode(ctx)` → no runtime
|
|
757
|
+
signals), the runner filters the runtime-only clusters out and runs only the
|
|
758
|
+
static-safe checks. When a real execution context is present the full pack runs
|
|
759
|
+
unchanged. The worst FP this prevents is `cluster11_mutation_verified`: it
|
|
760
|
+
hashes the *decoded* snapshot text but the assessor hashes the *raw* disk bytes,
|
|
761
|
+
so every CRLF / BOM file would otherwise fire a bogus "content DIVERGED" HIGH.
|
|
762
|
+
|
|
763
|
+
> **`dead_code_scan` caveat.** Cluster 20 marks a public function "dead" when it
|
|
764
|
+
> is not referenced anywhere in the **scanned set**. `run_forensic_audit` always
|
|
765
|
+
> discovers the whole project directory, so cross-file references resolve and it
|
|
766
|
+
> is accurate (0 findings on `filelock`, which uses `__all__`). It can over-report
|
|
767
|
+
> only on a *partial / single-file* scan, where a function's caller lives in a
|
|
768
|
+
> file outside the scan — that path is not used by `run_forensic_audit`. Findings
|
|
769
|
+
> are MEDIUM, and names in `__all__`, framework-decorated, or matching standalone
|
|
770
|
+
> markers are already classified as `standalone_utility` and skipped.
|
|
771
|
+
|
|
772
|
+
---
|
|
773
|
+
|
|
774
|
+
## Running tests
|
|
775
|
+
|
|
776
|
+
```bash
|
|
777
|
+
pytest tests/ -p no:cacheprovider
|
|
778
|
+
```
|
|
779
|
+
|
|
780
|
+
No parallel execution (`-n auto`) — keep it light, tree-sitter grammars load on first call.
|