vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,725 @@
1
+ """Forensic Self-Audit — standalone static audit using autoforensics gates.
2
+
3
+ Adapted from the Vigil autoforensics self_audit.
4
+ All cluster imports rewritten to vigil_forensic.* or _stubs.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+ import concurrent.futures
10
+ import json
11
+ import sys
12
+ import threading
13
+ import traceback
14
+ from dataclasses import dataclass, field
15
+ from pathlib import Path
16
+ from typing import Any, Callable, Optional
17
+ import logging
18
+
19
+ # Thread-local storage: run_gates sets _tl_cancel.event before dispatching each
20
+ # gate so cluster runners (which don't receive cancel_event directly) can check
21
+ # the same event via get_cancel_event().
22
+ _tl_cancel = threading.local()
23
+
24
+
25
+ def get_cancel_event() -> Optional[Any]:
26
+ """Return the cancel_event for the current thread, or None."""
27
+ return getattr(_tl_cancel, "event", None)
28
+
29
+ from vigil_forensic._shared import GateCheckResult, GateFinding
30
+ from vigil_forensic.gate_models import (
31
+ PostExecGateContext, RuntimeState, VerificationSummary, detect_source_package_roots,
32
+ )
33
+ from vigil_forensic.gate_packs.universal import GATE_FLAGS
34
+ from vigil_forensic.gate_registry import DEFAULT_GATE_CHECKS
35
+ from vigil_forensic._stubs import ValidationContractProfile, PocketCoderForensicReport
36
+
37
+ _log = logging.getLogger(__name__)
38
+
39
+ _MAX_ERRORS_BEFORE_TRUNCATE = 10
40
+
41
+ _SKIP_IN_STATIC_MODE: frozenset[str] = frozenset(
42
+ gid for gid, flags in GATE_FLAGS.items() if "skip_in_static" in flags
43
+ )
44
+
45
+ _SELF_MATCH_PRONE_GATES = frozenset({
46
+ "magic_number_scan",
47
+ "todo_scan",
48
+ "legacy_compat_debt.stale_migration_marker",
49
+ })
50
+
51
+ # Noisy, opt-in-only gates. These run ONLY when explicitly named in the gates
52
+ # filter (run_forensic_audit(..., gates=[...]) / --gates). They are excluded
53
+ # from a default full scan because they produce a high false-positive rate on
54
+ # finished third-party code.
55
+ #
56
+ # god_object_zones infers "responsibility zones" from FUNCTION-NAME PREFIXES
57
+ # against a fixed verb list (acquire/release/read/write/open/close/...). A
58
+ # cohesive class whose natural method names happen to match several verbs (e.g.
59
+ # a read/write lock) is wrongly flagged as a god object — ~0 true positives on
60
+ # the filelock/click/mcp corpus. The capability is preserved (opt in via
61
+ # gates=["god_object_zones"]); it is just not part of the default set. Re-enable
62
+ # for your own repo by listing it in the `gates` argument or, project-wide, by
63
+ # NOT listing it in `.cortex/disabled_gates.json` and passing it explicitly.
64
+ #
65
+ # The twin name-prefix heuristic that previously lived in
66
+ # size_complexity.zone_overload was REMOVED outright (it double-reported the
67
+ # same files as god_object_zones); the zone heuristic now has a single home here.
68
+ _NOISY_OPT_IN_GATES: frozenset[str] = frozenset({
69
+ "god_object_zones",
70
+ })
71
+
72
+ _SELF_MATCH_PATH_PREFIX = "gate_checks/"
73
+
74
+
75
+ def _is_self_match_finding(finding: GateFinding) -> bool:
76
+ check_id = getattr(finding, "check_id", "") or ""
77
+ if check_id not in _SELF_MATCH_PRONE_GATES:
78
+ return False
79
+ evidence = getattr(finding, "evidence", ()) or ()
80
+ if not evidence:
81
+ return False
82
+ path = getattr(evidence[0], "path", "") or ""
83
+ normalized = path.replace("\\", "/").lstrip("./")
84
+ return normalized.startswith(_SELF_MATCH_PATH_PREFIX)
85
+
86
+
87
+ _DEFAULT_EXCLUDE_DIRS = frozenset({
88
+ "__pycache__", ".git", ".venv", "venv", ".cortex", "node_modules",
89
+ "libs", ".pytest_cache", "build", "dist", ".mypy_cache", ".ruff_cache", ".tox",
90
+ # Vendored / build-output dirs that can appear OUTSIDE a venv (e.g. a repo
91
+ # that ships a checked-in dependency tree). Excluded so the file-count guard
92
+ # and the gate walk never spend time on third-party code.
93
+ "site-packages", "dist-packages", ".eggs", ".next",
94
+ # Tool / agent config dirs — never project source, and (e.g. .claude) can
95
+ # hold thousands of files (worktrees, plans, memory). Mirrors the code-map
96
+ # exclusion set so both tools agree on what "project source" means.
97
+ ".claude", ".codex", ".prompt-engineer", ".a1",
98
+ })
99
+
100
+
101
+ def discover_source_files(
102
+ project_dir: Path,
103
+ exclude_dirs: frozenset[str] = _DEFAULT_EXCLUDE_DIRS,
104
+ ) -> list[str]:
105
+ """Return sorted list of relative source-file paths under project_dir.
106
+
107
+ Uses ``os.walk`` with ``topdown=True`` and PRUNES excluded directories from
108
+ ``dirnames`` in place so the walk never descends into them. This is both a
109
+ correctness and a performance fix: the previous ``rglob('*')`` walked INTO
110
+ excluded trees (e.g. a 7000-file ``.claude``) and only filtered afterward,
111
+ which dominated the runtime on large repos and made the anti-hang file-count
112
+ guard itself slow.
113
+ """
114
+ import os
115
+ from vigil_forensic.source_analysis import is_source_file
116
+
117
+ project_dir = project_dir.resolve()
118
+ src_files: list[str] = []
119
+ for dirpath_str, dirnames, filenames in os.walk(str(project_dir), topdown=True):
120
+ # Prune excluded dirs in place — os.walk will not descend into them.
121
+ dirnames[:] = [d for d in dirnames if d not in exclude_dirs]
122
+ dirpath = Path(dirpath_str)
123
+ for fname in filenames:
124
+ full = dirpath / fname
125
+ if not full.is_file():
126
+ continue
127
+ try:
128
+ rel = full.relative_to(project_dir)
129
+ except ValueError:
130
+ continue
131
+ rel_str = str(rel).replace("\\", "/")
132
+ if is_source_file(rel_str):
133
+ src_files.append(rel_str)
134
+ return sorted(src_files)
135
+
136
+
137
+ def _probe_meta_integrity(project_dir: Path) -> None:
138
+ """Walk well-known audit artifact locations and emit meta findings for corrupted / unreadable files."""
139
+ from vigil_forensic.meta_findings import emit_meta_finding
140
+
141
+ project_dir = Path(project_dir)
142
+ profile_path = project_dir / "gate_profile.json"
143
+ if profile_path.is_file():
144
+ try:
145
+ json.loads(profile_path.read_text(encoding="utf-8"))
146
+ except json.JSONDecodeError as exc:
147
+ emit_meta_finding("meta.profile_load_failed", path=str(profile_path), detail=f"JSONDecodeError: {exc}")
148
+ except (OSError, PermissionError) as exc:
149
+ emit_meta_finding("meta.profile_load_failed", path=str(profile_path), detail=f"{type(exc).__name__}: {exc}")
150
+
151
+ allowlist_path = project_dir / ".prompt-engineer" / "forensic_gates" / "false_positive_allowlist.json"
152
+ if allowlist_path.is_file():
153
+ try:
154
+ json.loads(allowlist_path.read_text(encoding="utf-8"))
155
+ except json.JSONDecodeError as exc:
156
+ emit_meta_finding("meta.allowlist_corrupted", path=str(allowlist_path), detail=f"JSONDecodeError: {exc}")
157
+ except (OSError, PermissionError) as exc:
158
+ emit_meta_finding("meta.allowlist_corrupted", path=str(allowlist_path), detail=f"{type(exc).__name__}: {exc}")
159
+
160
+ cortex = project_dir / ".cortex"
161
+ if cortex.is_dir():
162
+ try:
163
+ cortex_children = sorted(cortex.rglob("*.json"))
164
+ except (OSError, PermissionError) as exc:
165
+ emit_meta_finding("meta.artifact_unreadable", path=str(cortex), detail=f"{type(exc).__name__} walking .cortex: {exc}")
166
+ cortex_children = []
167
+ for artifact in cortex_children:
168
+ if not artifact.is_file():
169
+ continue
170
+ try:
171
+ raw = artifact.read_text(encoding="utf-8")
172
+ except (OSError, PermissionError) as exc:
173
+ emit_meta_finding("meta.artifact_unreadable", path=str(artifact), detail=f"{type(exc).__name__}: {exc}")
174
+ continue
175
+ try:
176
+ json.loads(raw)
177
+ except json.JSONDecodeError as exc:
178
+ emit_meta_finding("meta.artifact_corrupted", path=str(artifact), detail=f"JSONDecodeError: {exc}")
179
+
180
+
181
+ def _load_project_disabled_gates(project_dir: Path) -> frozenset[str]:
182
+ """Load the set of project-disabled gate IDs from ``.cortex/disabled_gates.json``.
183
+
184
+ Ported from the Vigil ``cli_forensic_audit._load_project_disabled_gates``.
185
+ Lets a project switch off noisy gates without code changes. The file may be
186
+ either a bare JSON list of gate IDs::
187
+
188
+ ["broad_except", "duplication"]
189
+
190
+ or an object with a ``"disabled"`` key::
191
+
192
+ {"disabled": ["broad_except", "duplication"]}
193
+
194
+ Narrow exception handling: only JSON-decode, IO/permission, and
195
+ coercion errors are caught. A corrupt / unreadable file surfaces as a
196
+ ``meta.profile_load_failed`` finding (via the ``emit_meta_finding``
197
+ side-channel) and yields an empty set — it never raises and never silently
198
+ disables. Any other exception (a bug inside json/pathlib, or an upstream
199
+ monkeypatch) must propagate rather than be swallowed.
200
+ """
201
+ from vigil_forensic.meta_findings import emit_meta_finding
202
+
203
+ path = Path(project_dir) / ".cortex" / "disabled_gates.json"
204
+ if not path.is_file():
205
+ return frozenset()
206
+ try:
207
+ payload = json.loads(path.read_text(encoding="utf-8"))
208
+ except json.JSONDecodeError as exc:
209
+ emit_meta_finding(
210
+ "meta.profile_load_failed",
211
+ path=str(path),
212
+ detail=f"JSONDecodeError in disabled_gates.json: {exc}",
213
+ )
214
+ return frozenset()
215
+ except (FileNotFoundError, PermissionError, OSError) as exc:
216
+ emit_meta_finding(
217
+ "meta.profile_load_failed",
218
+ path=str(path),
219
+ detail=f"{type(exc).__name__} reading disabled_gates.json: {exc}",
220
+ )
221
+ return frozenset()
222
+
223
+ if isinstance(payload, dict):
224
+ raw = payload.get("disabled", [])
225
+ else:
226
+ raw = payload
227
+ try:
228
+ return frozenset(str(gid) for gid in raw)
229
+ except TypeError as exc:
230
+ emit_meta_finding(
231
+ "meta.profile_load_failed",
232
+ path=str(path),
233
+ detail=f"disabled_gates.json payload is not iterable: {type(raw).__name__}: {exc}",
234
+ )
235
+ return frozenset()
236
+
237
+
238
+ _FILE_BASED_GATES: frozenset[str] = frozenset({
239
+ "broad_except", "broad_except.hidden_sentinel", "fallback", "context_fallback_save",
240
+ "embedded_string", "duplication", "file_proliferation", "config_ssot", "size_complexity",
241
+ "empty_output", "syntax_validity", "heartbeat_staleness", "god_object_zones",
242
+ "hotspot_inflation", "toctou_check_then_act", "atomic_write_safety", "encoding_safety",
243
+ "subprocess_encoding", "contract_shape_drift", "import_integrity", "drift",
244
+ "authority_checks", "boundary_breach", "performance", "runtime_behavior",
245
+ "runtime_duplicate_side_effect", "init_order_regression", "conflict_touch",
246
+ "test_quality", "test_suite_masking", "empty_test_module", "simulated_instead_of_executed_test",
247
+ "temporal_freshness", "provenance", "reporting", "fix_without_test", "semantic_intent",
248
+ "testing", "forensic_clusters", "project_specific", "hallucination", "artifact_completeness",
249
+ "tool_hook_coverage", "codex_state", "policy_boundary", "draft_boundary", "codex_supervision",
250
+ "ml_checks",
251
+ })
252
+
253
+
254
+ def _load_gate_profile_if_present(project_dir: Path) -> "Optional[Any]":
255
+ """Load gate_profile.json from project_dir (or .cortex/gate_profile.json).
256
+
257
+ Returns a RepoGateProfile on success, None if no file found or on error.
258
+ Error is logged but never raised — missing profile is not fatal.
259
+ """
260
+ from vigil_forensic._shared import RepoGateProfile, GateCategory, GateImpact
261
+
262
+ _PROFILE_CANDIDATES = ("gate_profile.json", ".cortex/gate_profile.json")
263
+ _GENERIC_GENERATED_ROOTS: tuple[str, ...] = (
264
+ ".git", "__pycache__", ".pytest_cache", "dist", "build",
265
+ "node_modules", "venv", ".venv",
266
+ )
267
+ _GENERIC_SIZE_THRESHOLDS: dict[str, int] = {
268
+ "file_warn": 600, "file_revise": 800,
269
+ "function_warn": 80, "function_revise": 120,
270
+ "nesting_warn": 4, "nesting_revise": 6,
271
+ }
272
+
273
+ def _impact_from_value(v: object) -> GateImpact:
274
+ try:
275
+ return GateImpact(str(v))
276
+ except ValueError:
277
+ return GateImpact.WARN
278
+
279
+ def _profile_from_dict(payload: dict, path: Path) -> RepoGateProfile:
280
+ enabled_raw = payload.get("enabled_categories") or [item.value for item in GateCategory]
281
+ fallback_raw = payload.get("forbidden_fallback_patterns") or {}
282
+ canonical_raw = payload.get("canonical_literal_owners") or {}
283
+ size = payload.get("size_thresholds") or {}
284
+ defaults = _GENERIC_SIZE_THRESHOLDS
285
+ return RepoGateProfile(
286
+ profile_name=str(payload.get("profile_name") or "generic"),
287
+ version=str(payload.get("version") or "1.0"),
288
+ generated_roots=tuple(payload.get("generated_roots") or _GENERIC_GENERATED_ROOTS),
289
+ vendored_roots=tuple(payload.get("vendored_roots") or (".vendor", "vendor", "node_modules")),
290
+ forbidden_roots=tuple(payload.get("forbidden_roots") or ()),
291
+ critical_roots=tuple(payload.get("critical_roots") or ()),
292
+ allowlisted_large_files=tuple(payload.get("allowlisted_large_files") or ()),
293
+ performance_sensitive_roots=tuple(payload.get("performance_sensitive_roots") or ()),
294
+ required_test_roots=tuple(payload.get("required_test_roots") or ()),
295
+ canonical_literal_owners={str(k): tuple(v) for k, v in canonical_raw.items()},
296
+ forbidden_fallback_patterns={str(k): _impact_from_value(v) for k, v in fallback_raw.items()},
297
+ size_thresholds={
298
+ "file_warn": int(size.get("file_warn", defaults["file_warn"])),
299
+ "file_revise": int(size.get("file_revise", defaults["file_revise"])),
300
+ "function_warn": int(size.get("function_warn", defaults["function_warn"])),
301
+ "function_revise": int(size.get("function_revise", defaults["function_revise"])),
302
+ "nesting_warn": int(size.get("nesting_warn", defaults["nesting_warn"])),
303
+ "nesting_revise": int(size.get("nesting_revise", defaults["nesting_revise"])),
304
+ },
305
+ severity_overrides={str(k): _impact_from_value(v) for k, v in (payload.get("severity_overrides") or {}).items()},
306
+ required_proofs_overrides={str(k): tuple(v) for k, v in (payload.get("required_proofs_overrides") or {}).items()},
307
+ reporting_required_artifacts=tuple(payload.get("reporting_required_artifacts") or ()),
308
+ enabled_categories=tuple(GateCategory(item) for item in enabled_raw),
309
+ enabled_checks=tuple(payload.get("enabled_checks") or ()),
310
+ disabled_checks=tuple(payload.get("disabled_checks") or ()),
311
+ profile_path=str(path),
312
+ )
313
+
314
+ import json as _json
315
+
316
+ def _try_load(path: Path) -> "Optional[RepoGateProfile]":
317
+ if not path.is_file():
318
+ return None
319
+ try:
320
+ payload = _json.loads(path.read_text(encoding="utf-8"))
321
+ except Exception as exc:
322
+ _log.warning("gate_profile load failed (%s): %s", path, exc)
323
+ return None
324
+ return _profile_from_dict(payload, path)
325
+
326
+ root = Path(project_dir).resolve()
327
+
328
+ # 1) Prefer a profile co-located with the audit target (existing behavior).
329
+ for candidate in _PROFILE_CANDIDATES:
330
+ result = _try_load(root / candidate)
331
+ if result is not None:
332
+ return result
333
+
334
+ # 2) Fallback: walk up to find a shipped default `gate_profile.json` in an
335
+ # ancestor directory (e.g. the repo root) when the audit target is a
336
+ # sub-package or an external path. Config discovery by ancestor-walk is
337
+ # the same pattern linters/git use; the target-local profile always wins.
338
+ # A malformed ancestor file is logged-and-skipped, never raised.
339
+ for ancestor in root.parents:
340
+ candidate_path = ancestor / "gate_profile.json"
341
+ if candidate_path.is_file():
342
+ return _try_load(candidate_path)
343
+
344
+ # 3) Last resort: the package's OWN shipped gate_profile.json. Without this,
345
+ # an external target (e.g. an arbitrary path with no ancestor profile)
346
+ # silently fell back to the STRICT code-default thresholds (600/800/4)
347
+ # instead of the shipped, documented defaults (750/1000/5). The shipped
348
+ # profile is the effective default for every target. Located INSIDE the
349
+ # vigil_forensic package so it ships in the wheel (see
350
+ # _packaged_gate_profile_path).
351
+ packaged = _packaged_gate_profile_path()
352
+ if packaged is not None and packaged.is_file():
353
+ result = _try_load(packaged)
354
+ if result is not None:
355
+ return result
356
+
357
+ return None
358
+
359
+
360
+ def _packaged_gate_profile_path() -> "Optional[Path]":
361
+ """Return the path to the package's shipped ``gate_profile.json``.
362
+
363
+ The default profile ships INSIDE the ``vigil_forensic`` package (next to
364
+ this module) so it is included in the wheel/sdist via
365
+ ``[tool.setuptools.package-data]`` and is therefore available after a plain
366
+ ``pip install`` — there is no repo root at install time. Resolved relative
367
+ to this module so it works regardless of the caller's cwd or the audit
368
+ target location. Returns None if the file cannot be located.
369
+ """
370
+ here = Path(__file__).resolve()
371
+ # here == .../vigil_forensic/self_audit.py → .../vigil_forensic/gate_profile.json
372
+ candidate = here.parent / "gate_profile.json"
373
+ return candidate if candidate.is_file() else None
374
+
375
+
376
+ def build_synthetic_context(project_dir: Path, source_files: list[str]) -> PostExecGateContext:
377
+ """Build minimal PostExecGateContext treating every source file as touched."""
378
+ from vigil_forensic.gate_checks.common import normalize_path, read_snapshot
379
+
380
+ file_snapshots = {
381
+ normalize_path(p): read_snapshot(project_dir, p)
382
+ for p in source_files
383
+ }
384
+ repo_profile = _load_gate_profile_if_present(project_dir)
385
+ return PostExecGateContext(
386
+ project_dir=project_dir,
387
+ session_number=0,
388
+ task_id="FORENSIC_SELF_AUDIT",
389
+ a1_task_id="FORENSIC_SELF_AUDIT",
390
+ validation_contract=ValidationContractProfile.from_mapping({}),
391
+ forensic_report=PocketCoderForensicReport.from_mapping({}),
392
+ runtime_state=RuntimeState.from_mapping({}),
393
+ verification_summary=VerificationSummary.from_mapping({}),
394
+ attempt_id="self_audit",
395
+ gate_round=1,
396
+ touched_files=tuple(source_files),
397
+ changed_files_observed=tuple(source_files),
398
+ is_full_scan=True, # standalone audit is always a full scan, not an incremental diff
399
+ source_package_roots=detect_source_package_roots(project_dir),
400
+ file_snapshots=file_snapshots,
401
+ repo_profile=repo_profile,
402
+ project_context=None,
403
+ )
404
+
405
+
406
+ @dataclass
407
+ class GateOutcome:
408
+ check_id: str
409
+ ok: bool
410
+ error: str = ""
411
+ findings: list[GateFinding] = field(default_factory=list)
412
+ notes: list[str] = field(default_factory=list)
413
+
414
+
415
+ def run_gates(
416
+ ctx: PostExecGateContext,
417
+ gates_filter: Optional[set[str]] = None,
418
+ *,
419
+ workers: int = 1,
420
+ cancel_event: Optional[Any] = None,
421
+ disabled_gates: Optional[frozenset[str]] = None,
422
+ ) -> tuple[list[GateOutcome], list[dict[str, str]]]:
423
+ """Run all file-based gates (or the subset in gates_filter) against ctx.
424
+
425
+ Parameters
426
+ ----------
427
+ cancel_event:
428
+ Optional threading.Event (or any object with an .is_set() method).
429
+ When set, the per-gate loop stops before the next gate starts.
430
+ The MCP _jobs.py injects this by inspecting co_varnames, so the
431
+ parameter name must stay as ``cancel_event``.
432
+ disabled_gates:
433
+ Optional set of gate check_ids the project has switched off (loaded
434
+ from ``.cortex/disabled_gates.json``). A disabled gate never runs and
435
+ is reported in the returned skip list with reason
436
+ ``"disabled_by_project"``. This takes precedence over every other
437
+ resolution rule so a project's intent to silence a gate is always
438
+ visible in ``meta.gates_skipped``.
439
+ """
440
+ disabled = disabled_gates or frozenset()
441
+ gates_skipped: list[dict[str, str]] = []
442
+ runnable: list[tuple[str, Callable[[PostExecGateContext], GateCheckResult]]] = []
443
+ for check_id, _, runner in DEFAULT_GATE_CHECKS:
444
+ if check_id in disabled:
445
+ gates_skipped.append({"gate_id": check_id, "reason": "disabled_by_project"})
446
+ continue
447
+ if check_id in _SKIP_IN_STATIC_MODE:
448
+ gates_skipped.append({"gate_id": check_id, "reason": "skipped_in_static_mode"})
449
+ continue
450
+ if check_id not in _FILE_BASED_GATES:
451
+ gates_skipped.append({"gate_id": check_id, "reason": "not_file_based"})
452
+ continue
453
+ # Noisy opt-in gates run ONLY when explicitly named in the gates filter.
454
+ if check_id in _NOISY_OPT_IN_GATES and not (gates_filter and check_id in gates_filter):
455
+ gates_skipped.append({"gate_id": check_id, "reason": "opt_in_only"})
456
+ continue
457
+ if gates_filter and check_id not in gates_filter:
458
+ gates_skipped.append({"gate_id": check_id, "reason": "not_in_gates_filter"})
459
+ continue
460
+ runnable.append((check_id, runner))
461
+
462
+ if workers > 1 and len(runnable) > 1:
463
+ outcomes = _run_gates_parallel(runnable, ctx, workers)
464
+ else:
465
+ outcomes = []
466
+ # Store cancel_event in thread-local so cluster runners can access it
467
+ # without a signature change (get_cancel_event() from this module).
468
+ _tl_cancel.event = cancel_event
469
+ try:
470
+ for check_id, runner in runnable:
471
+ if cancel_event is not None and cancel_event.is_set():
472
+ _log.info("run_gates: cancel_event set, stopping after %d gates", len(outcomes))
473
+ break
474
+ outcomes.append(_run_single_gate(check_id, runner, ctx))
475
+ finally:
476
+ _tl_cancel.event = None
477
+ return outcomes, gates_skipped
478
+
479
+
480
+ def _run_gates_parallel(
481
+ runnable: list[tuple[str, Callable[[PostExecGateContext], GateCheckResult]]],
482
+ ctx: PostExecGateContext,
483
+ workers: int,
484
+ ) -> list[GateOutcome]:
485
+ effective_workers = max(1, min(int(workers), len(runnable)))
486
+ outcomes_by_id: dict[str, GateOutcome] = {}
487
+ with concurrent.futures.ThreadPoolExecutor(
488
+ max_workers=effective_workers, thread_name_prefix="forensic-gate",
489
+ ) as pool:
490
+ future_to_gate = {
491
+ pool.submit(_run_single_gate, check_id, runner, ctx): check_id
492
+ for check_id, runner in runnable
493
+ }
494
+ for fut in future_to_gate:
495
+ gate_id = future_to_gate[fut]
496
+ try:
497
+ outcomes_by_id[gate_id] = fut.result()
498
+ except BaseException as exc:
499
+ outcomes_by_id[gate_id] = GateOutcome(check_id=gate_id, ok=False, error=f"{type(exc).__name__}: {exc}")
500
+ return [outcomes_by_id[check_id] for check_id, _ in runnable]
501
+
502
+
503
+ def _run_single_gate(
504
+ check_id: str,
505
+ runner: Callable[[PostExecGateContext], GateCheckResult],
506
+ ctx: PostExecGateContext,
507
+ ) -> GateOutcome:
508
+ try:
509
+ result = runner(ctx)
510
+ findings = list(getattr(result, "findings", ()) or ())
511
+ notes = list(getattr(result, "notes", ()) or ())
512
+ return GateOutcome(check_id=check_id, ok=True, findings=findings, notes=notes)
513
+ except Exception as exc:
514
+ return GateOutcome(check_id=check_id, ok=False, error=f"{type(exc).__name__}: {exc}")
515
+
516
+
517
+ _SEVERITY_ORDER: dict[str, int] = {"low": 0, "medium": 1, "high": 2, "critical": 3}
518
+
519
+
520
+ def finding_to_dict(f: GateFinding) -> dict[str, Any]:
521
+ return {
522
+ "check_id": f.check_id,
523
+ "category": str(getattr(f.category, "value", f.category) or ""),
524
+ "title": f.title,
525
+ "severity": str(getattr(f.severity, "value", f.severity) or ""),
526
+ "impact": str(getattr(f.impact, "value", f.impact) or ""),
527
+ "summary": f.summary,
528
+ "recommendation": f.recommendation,
529
+ "evidence": [{"kind": e.kind, "path": e.path, "detail": e.detail} for e in (f.evidence or ())],
530
+ "fingerprint": f.fingerprint,
531
+ "confidence": getattr(f, "confidence", 1.0),
532
+ "applicability": getattr(f, "applicability", "applicable"),
533
+ "analysis_mode": getattr(f, "analysis_mode", "heuristic"),
534
+ "applicability_reason": getattr(f, "applicability_reason", ""),
535
+ }
536
+
537
+
538
+ def build_json_report(
539
+ outcomes: list[GateOutcome],
540
+ project_dir: Path,
541
+ source_file_count: int,
542
+ gates_skipped: Optional[list[dict[str, str]]] = None,
543
+ ) -> dict[str, Any]:
544
+ raw_findings: list[GateFinding] = []
545
+ errors: list[dict[str, str]] = []
546
+ ok_count = 0
547
+ for outcome in outcomes:
548
+ if outcome.ok:
549
+ ok_count += 1
550
+ raw_findings.extend(outcome.findings)
551
+ else:
552
+ errors.append({"check_id": outcome.check_id, "error": outcome.error})
553
+
554
+ suppressed_na = [f for f in raw_findings if getattr(f, "applicability", "applicable") == "not_applicable"]
555
+ uncertain_findings = [f for f in raw_findings if getattr(f, "applicability", "applicable") == "unknown"]
556
+ applicable_findings = [f for f in raw_findings if getattr(f, "applicability", "applicable") != "not_applicable"]
557
+ all_findings = [f for f in applicable_findings if not _is_self_match_finding(f)]
558
+
559
+ sev_counts: dict[str, int] = {}
560
+ for finding in all_findings:
561
+ sev = str(getattr(finding.severity, "value", "unknown") or "unknown").lower()
562
+ sev_counts[sev] = sev_counts.get(sev, 0) + 1
563
+
564
+ category_counts: dict[str, int] = {}
565
+ for finding in all_findings:
566
+ cat = str(getattr(finding.category, "value", "unknown") or "unknown")
567
+ category_counts[cat] = category_counts.get(cat, 0) + 1
568
+
569
+ suppressed_by_gate: dict[str, int] = {}
570
+ for finding in suppressed_na:
571
+ cid = getattr(finding, "check_id", "") or "unknown"
572
+ suppressed_by_gate[cid] = suppressed_by_gate.get(cid, 0) + 1
573
+
574
+ uncertain_by_gate: dict[str, int] = {}
575
+ for finding in uncertain_findings:
576
+ cid = getattr(finding, "check_id", "") or "unknown"
577
+ uncertain_by_gate[cid] = uncertain_by_gate.get(cid, 0) + 1
578
+
579
+ gates_skipped_list = list(gates_skipped or [])
580
+ gates_skipped_in_static = [e["gate_id"] for e in gates_skipped_list if e.get("reason") == "skipped_in_static_mode"]
581
+
582
+ return {
583
+ "meta": {
584
+ "project_dir": str(project_dir),
585
+ "source_files_scanned": source_file_count,
586
+ "gates_attempted": len(outcomes),
587
+ "gates_succeeded": ok_count,
588
+ "gates_errored": len(errors),
589
+ "total_findings": len(all_findings),
590
+ "severity_counts": sev_counts,
591
+ "category_counts": category_counts,
592
+ "schema_version": "1.1",
593
+ "suppressed_not_applicable_count": len(suppressed_na),
594
+ "suppressed_not_applicable_by_gate": suppressed_by_gate,
595
+ "uncertain_findings_count": len(uncertain_findings),
596
+ "uncertain_findings_by_gate": uncertain_by_gate,
597
+ "gates_skipped": gates_skipped_list,
598
+ "gates_skipped_in_static": gates_skipped_in_static,
599
+ },
600
+ "errors": errors,
601
+ "findings": [finding_to_dict(f) for f in all_findings],
602
+ "uncertain_findings": [finding_to_dict(f) for f in uncertain_findings],
603
+ }
604
+
605
+
606
+ def filter_findings_by_severity(findings: list[dict[str, Any]], min_sev: str) -> list[dict[str, Any]]:
607
+ threshold = _SEVERITY_ORDER.get(min_sev.lower(), 0)
608
+ return [f for f in findings if _SEVERITY_ORDER.get(str(f["severity"]).lower(), 0) >= threshold]
609
+
610
+
611
+ def print_human_summary(report: dict[str, Any], top_n: int = 20) -> None:
612
+ meta = report["meta"]
613
+ print("=" * 72)
614
+ print(" FORENSIC SELF-AUDIT SUMMARY")
615
+ print("=" * 72)
616
+ print(f" Project: {meta['project_dir']}")
617
+ print(f" Source files scanned: {meta['source_files_scanned']}")
618
+ print(f" Gates attempted: {meta['gates_attempted']}")
619
+ print(f" Gates succeeded: {meta['gates_succeeded']}")
620
+ print(f" Gates errored: {meta['gates_errored']}")
621
+ print(f" Total findings: {meta['total_findings']}")
622
+ print()
623
+ sev_counts = meta.get("severity_counts", {})
624
+ if sev_counts:
625
+ print(" By severity:")
626
+ for sev in ("critical", "high", "medium", "low"):
627
+ if sev in sev_counts:
628
+ print(f" {sev:>10}: {sev_counts[sev]}")
629
+ print()
630
+ errors = report.get("errors", [])
631
+ if errors:
632
+ print(f" GATE ERRORS ({len(errors)}):")
633
+ for err in errors[:_MAX_ERRORS_BEFORE_TRUNCATE]:
634
+ print(f" {err['check_id']:>30}: {err['error']}")
635
+ if len(errors) > _MAX_ERRORS_BEFORE_TRUNCATE:
636
+ print(f" ... +{len(errors) - _MAX_ERRORS_BEFORE_TRUNCATE} more")
637
+ print()
638
+ findings = report.get("findings", [])
639
+ if findings:
640
+ by_sev = sorted(findings, key=lambda f: -_SEVERITY_ORDER.get(str(f["severity"]).lower(), 0))
641
+ print(f" TOP {min(top_n, len(by_sev))} FINDINGS (by severity):")
642
+ for f in by_sev[:top_n]:
643
+ evidence = f.get("evidence") or []
644
+ path = evidence[0]["path"] if evidence else "<no path>"
645
+ detail = evidence[0]["detail"] if evidence else ""
646
+ loc = f"{path}:{detail}" if detail else path
647
+ sev_tag = f["severity"].upper() if f.get("severity") else "?"
648
+ print(f" [{sev_tag:>8}] {f['check_id']:>30} {loc}")
649
+ print(f" {f['title']}")
650
+
651
+
652
+ def main(argv: Optional[list[str]] = None) -> int:
653
+ parser = argparse.ArgumentParser(description="Forensic Self-Audit (vigil_forensic)")
654
+ parser.add_argument("--project", default="", help="Target project directory")
655
+ parser.add_argument("--gates", default="", help="Comma-separated gate check_ids")
656
+ parser.add_argument("--list-gates", action="store_true", help="Print file-based gates and exit")
657
+ parser.add_argument("--severity", default="low", choices=["low", "medium", "high", "critical"])
658
+ parser.add_argument("--json-out", default="")
659
+ parser.add_argument("--top", type=int, default=20)
660
+ parser.add_argument("--quiet", action="store_true")
661
+ parser.add_argument("--workers", type=int, default=1)
662
+ args = parser.parse_args(argv)
663
+
664
+ if args.list_gates:
665
+ print("File-based gates wired into forensic self-audit:")
666
+ for check_id in sorted(_FILE_BASED_GATES):
667
+ print(f" {check_id}")
668
+ return 0
669
+
670
+ project_dir = Path(args.project).resolve()
671
+ if not project_dir.is_dir():
672
+ print(f"ERROR: {project_dir} is not a directory", file=sys.stderr)
673
+ return 2
674
+
675
+ gates_filter = {g.strip() for g in args.gates.split(",") if g.strip()} or None
676
+
677
+ print(f"[1/3] Discovering source files in {project_dir}...", file=sys.stderr)
678
+ source_files = discover_source_files(project_dir)
679
+ print(f" Found {len(source_files)} files", file=sys.stderr)
680
+ if not source_files:
681
+ print("ERROR: no source files found under project_dir", file=sys.stderr)
682
+ return 2
683
+
684
+ print("[2/3] Building synthetic PostExecGateContext...", file=sys.stderr)
685
+ try:
686
+ ctx = build_synthetic_context(project_dir, source_files)
687
+ except Exception as exc:
688
+ print(f"ERROR: failed to build context: {type(exc).__name__}: {exc}", file=sys.stderr)
689
+ traceback.print_exc(file=sys.stderr)
690
+ return 2
691
+
692
+ workers = max(1, int(getattr(args, "workers", 1) or 1))
693
+ disabled_gates = _load_project_disabled_gates(project_dir)
694
+ if disabled_gates:
695
+ print(f" {len(disabled_gates)} gate(s) disabled by project (.cortex/disabled_gates.json)", file=sys.stderr)
696
+ print(f"[3/3] Running gates ({'parallel x' + str(workers) if workers > 1 else 'sequential'})...", file=sys.stderr)
697
+ outcomes, gates_skipped = run_gates(ctx, gates_filter, workers=workers, disabled_gates=disabled_gates)
698
+
699
+ from vigil_forensic.meta_findings import drain_meta_findings
700
+ _probe_meta_integrity(project_dir)
701
+ meta_findings = drain_meta_findings()
702
+ if meta_findings:
703
+ outcomes.append(GateOutcome(check_id="meta_integrity_probe", ok=True, findings=list(meta_findings)))
704
+
705
+ report = build_json_report(outcomes, project_dir, len(source_files), gates_skipped=gates_skipped)
706
+
707
+ if args.severity != "low":
708
+ filtered = filter_findings_by_severity(report["findings"], args.severity)
709
+ report["findings"] = filtered
710
+ report["meta"]["findings_after_severity_filter"] = len(filtered)
711
+
712
+ if args.json_out:
713
+ out_path = Path(args.json_out).resolve()
714
+ out_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
715
+ print(f"JSON report written to: {out_path}", file=sys.stderr)
716
+
717
+ if not args.quiet:
718
+ print_human_summary(report, top_n=args.top)
719
+
720
+ critical_or_high = sum(report["meta"]["severity_counts"].get(s, 0) for s in ("critical", "high"))
721
+ return 1 if critical_or_high > 0 else 0
722
+
723
+
724
+ if __name__ == "__main__":
725
+ sys.exit(main())