vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,139 @@
1
+ """FX-V5-012 hunter artifact completeness check.
2
+
3
+ Soft-severity gate that scans <project>/.cortex/context_hunter/*.json and
4
+ emits a finding for each file that is corrupted, unparseable, or missing
5
+ expected schema fields. Prevents silent hunter cache corruption from going
6
+ undetected by downstream consumers.
7
+
8
+ Out of scope:
9
+ - Cache directory missing -> no finding (hunter may simply not have run
10
+ for this project; FX-V4-002 remote_authoritative mode also produces
11
+ files under .cortex/_hunter_remote_cache/ which we do NOT scan here).
12
+ - Empty cache directory -> no finding (same reasoning).
13
+ - Stale-by-TTL files -> not our concern (FIX-025 cleanup_stale_cache owns).
14
+
15
+ Fail-open: any I/O / parse error on the *check itself* is logged at DEBUG
16
+ and produces no finding — same contract as every gate runner in this pack.
17
+
18
+ Severity policy:
19
+ - Unparseable JSON -> MEDIUM (operator should inspect / clear)
20
+ - Missing required keys -> LOW (gate uses heuristic; schema may
21
+ evolve faster than the check)
22
+ - I/O failure on read -> LOW (likely transient; symptom not cause)
23
+
24
+ Each finding is per-file (fingerprint includes the relative path).
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import logging
30
+
31
+ from vigil_forensic._shared import (
32
+ EvidenceReference, GateCategory, GateImpact, GateSeverity,
33
+ )
34
+ from vigil_forensic.gate_models import PostExecGateContext
35
+ from .common import build_check_result, build_finding
36
+
37
+ _log = logging.getLogger(__name__)
38
+
39
+ _CHECK_ID = "hunter_artifact_completeness"
40
+ _HUNTER_CACHE_SUBPATH = (".cortex", "context_hunter")
41
+
42
+ # Minimum keys a well-formed hunter artifact JSON should have. Hunter writes
43
+ # {"stage": <name>, "data": {...}} as canonical format; some legacy artifacts
44
+ # may omit "stage" — we treat both keys missing as schema divergence rather
45
+ # than just one (avoid noisy false positives during the v5 schema migration).
46
+ _REQUIRED_KEYS_ANY = ("data", "stage")
47
+
48
+
49
+ def run_hunter_artifact_completeness_checks(ctx: PostExecGateContext):
50
+ findings = []
51
+
52
+ cache_dir = ctx.project_dir.joinpath(*_HUNTER_CACHE_SUBPATH)
53
+ if not cache_dir.exists() or not cache_dir.is_dir():
54
+ return build_check_result(check_id=_CHECK_ID, category=GateCategory.META, findings=findings)
55
+
56
+ for json_path in sorted(cache_dir.glob("*.json")):
57
+ # Skip codex temp schemas — they are not hunter artifacts and have
58
+ # their own cleanup contract (FIX-020 in hunter_runner.cleanup_stale_cache).
59
+ if json_path.name.startswith("_tmp_"):
60
+ continue
61
+
62
+ rel_path = str(json_path.relative_to(ctx.project_dir)).replace("\\", "/")
63
+
64
+ try:
65
+ raw = json_path.read_text(encoding="utf-8")
66
+ except OSError as exc:
67
+ _log.debug("FX-V5-012: I/O failure reading %s: %s", rel_path, exc)
68
+ findings.append(
69
+ build_finding(
70
+ check_id=_CHECK_ID,
71
+ category=GateCategory.META,
72
+ title=f"hunter artifact unreadable: {json_path.name}",
73
+ severity=GateSeverity.LOW,
74
+ impact=GateImpact.WARN,
75
+ summary=f"OSError reading hunter cache artifact: {exc!s}",
76
+ recommendation=(
77
+ "Investigate filesystem permissions / disk health. "
78
+ "Safe to delete the file — hunter will regenerate on next call."
79
+ ),
80
+ evidence=(EvidenceReference(kind="file", path=rel_path, detail=str(exc)),),
81
+ repair_kind="manual_inspection",
82
+ )
83
+ )
84
+ continue
85
+
86
+ try:
87
+ parsed = json.loads(raw)
88
+ except (json.JSONDecodeError, ValueError) as exc:
89
+ findings.append(
90
+ build_finding(
91
+ check_id=_CHECK_ID,
92
+ category=GateCategory.META,
93
+ title=f"hunter artifact corrupted: {json_path.name}",
94
+ severity=GateSeverity.MEDIUM,
95
+ impact=GateImpact.WARN,
96
+ summary=(
97
+ f"JSON parse error in hunter cache artifact: {exc!s}. "
98
+ f"Cache hit on this file would deliver garbage to the "
99
+ f"caller of the hunter intake helper."
100
+ ),
101
+ recommendation=(
102
+ "Delete the corrupted file. Hunter will regenerate it on next "
103
+ "matching stage call. If recurring, investigate the writer in "
104
+ "BRAIN/context_hunter/hunter_runner.py for partial-write race."
105
+ ),
106
+ evidence=(EvidenceReference(kind="file", path=rel_path, detail=f"parse error at offset {getattr(exc, 'pos', '?')}"),),
107
+ repair_kind="delete_artifact",
108
+ )
109
+ )
110
+ continue
111
+
112
+ # Schema sanity: well-formed hunter artifact should have at least
113
+ # one of the required keys. Hunter v5 writes {"stage": ..., "data": ...}.
114
+ # An artifact with neither is suspicious — either a foreign file
115
+ # accidentally placed in the cache dir, or a writer regression.
116
+ if isinstance(parsed, dict) and not any(k in parsed for k in _REQUIRED_KEYS_ANY):
117
+ findings.append(
118
+ build_finding(
119
+ check_id=_CHECK_ID,
120
+ category=GateCategory.META,
121
+ title=f"hunter artifact schema divergence: {json_path.name}",
122
+ severity=GateSeverity.LOW,
123
+ impact=GateImpact.WARN,
124
+ summary=(
125
+ f"Hunter cache artifact lacks required keys "
126
+ f"({' or '.join(_REQUIRED_KEYS_ANY)}). Parsed top-level keys: "
127
+ f"{sorted(parsed.keys())[:10]}."
128
+ ),
129
+ recommendation=(
130
+ "Confirm the file was written by hunter_runner. If not, move it "
131
+ "out of .cortex/context_hunter/. If yes, investigate writer for "
132
+ "schema regression."
133
+ ),
134
+ evidence=(EvidenceReference(kind="file", path=rel_path, detail=f"top_keys={sorted(parsed.keys() if isinstance(parsed, dict) else [])[:10]}"),),
135
+ repair_kind="schema_inspection",
136
+ )
137
+ )
138
+
139
+ return build_check_result(check_id=_CHECK_ID, category=GateCategory.META, findings=findings)
@@ -0,0 +1,380 @@
1
+ """Gate: implementation_overfit
2
+
3
+ Detects code overfit to local context: hardcoded paths, project-specific
4
+ strings, and broken project-agnosticism claims.
5
+
6
+ Sub-checks:
7
+ hardcoded_repo_path -- string literal contains known project-specific cluster paths
8
+ assumes_single_language -- language-neutral file has Python-only conditionals without else
9
+ fake_generic_helper -- function named generic_*/universal_*/common_* has repo literals
10
+ env_tight_coupling -- module imports project-prefixed env vars in universal context
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ import logging
16
+
17
+ from vigil_forensic._shared import (
18
+ EvidenceReference,
19
+ GateCategory,
20
+ GateImpact,
21
+ GateSeverity,
22
+ RepairKind,
23
+ )
24
+ from vigil_forensic.gate_models import PostExecGateContext
25
+ from .common import build_finding, normalize_path
26
+ from ._ast_helpers import parse_python_source_or_emit_finding
27
+
28
+ _log = logging.getLogger(__name__)
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Constants
32
+ # ---------------------------------------------------------------------------
33
+
34
+ # String patterns that indicate hardcoded repo-specific paths/names
35
+ _REPO_LITERAL_PATTERNS: tuple[re.Pattern, ...] = (
36
+ re.compile(r"""["'](BRAIN/|SYSTEM/|INTERFACE/|STORAGE/)"""),
37
+ re.compile(r"""["']/.vigil_launcher/"""),
38
+ re.compile(r"""["']7331["']"""),
39
+ re.compile(r"""["']vigil_control_plane["']"""),
40
+ re.compile(r"""["']vigil_"""),
41
+ )
42
+
43
+ # Python-only conditional patterns that break language-neutrality
44
+ _PYTHON_ONLY_PATTERNS: tuple[re.Pattern, ...] = (
45
+ re.compile(r"""path\.(?:endswith|suffix)\s*\(\s*["']\.py["']\s*\)"""),
46
+ re.compile(r"""(?:lang|language)\s*==\s*["']python["']"""),
47
+ re.compile(r"""\.endswith\s*\(\s*["']\.py["']\s*\)"""),
48
+ )
49
+
50
+ # Prefixes for "generic" helper function names
51
+ _GENERIC_PREFIXES = ("generic_", "universal_", "common_")
52
+
53
+ # Environment variable prefix pattern
54
+ _AI_HOST_ENV_PATTERN = re.compile(
55
+ r"""os\.environ(?:\.get)?\s*\(\s*["']VIGIL_""",
56
+ )
57
+
58
+
59
+ def _is_ai_host_gate_file(path: str) -> bool:
60
+ """True if file belongs to an vigil-specific gate context.
61
+
62
+ Files under SYSTEM/pipeline/gates/ are Vigil-specific orchestration
63
+ gates that are explicitly allowed to contain hardcoded repo path literals.
64
+ """
65
+ normalized = path.replace("\\", "/")
66
+ return "SYSTEM/pipeline/gates/" in normalized
67
+
68
+
69
+ def _has_else_branch(content: str, match_start: int) -> bool:
70
+ """Heuristic: check if the matched if-line has a nearby else/elif."""
71
+ snippet = content[match_start: match_start + 400]
72
+ return bool(re.search(r"\belse\b|\belif\b", snippet))
73
+
74
+
75
+ def _find_repo_literals_in_text(content: str) -> list[tuple[int, str]]:
76
+ """Return list of (line_num, snippet) for all repo literal matches."""
77
+ lines = content.splitlines()
78
+ results: list[tuple[int, str]] = []
79
+ for line_num, line in enumerate(lines, 1):
80
+ # Skip pure comments and docstrings
81
+ stripped = line.strip()
82
+ if stripped.startswith("#"):
83
+ continue
84
+ for pat in _REPO_LITERAL_PATTERNS:
85
+ if pat.search(line):
86
+ results.append((line_num, stripped[:120]))
87
+ break
88
+ return results
89
+
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # Sub-check 1: hardcoded_repo_path
93
+ # ---------------------------------------------------------------------------
94
+
95
+
96
+ def check_hardcoded_repo_path(
97
+ file_path: str,
98
+ content: str,
99
+ ) -> list["GateFinding"]:
100
+ """Detect hardcoded repo-specific path literals in universal-classified files."""
101
+ if not content.strip():
102
+ return []
103
+
104
+ # Vigil-specific gate files are allowed to contain repo path literals.
105
+ if _is_ai_host_gate_file(file_path):
106
+ return []
107
+
108
+ hits = _find_repo_literals_in_text(content)
109
+ if not hits:
110
+ return []
111
+
112
+ findings = []
113
+ for line_num, snippet in hits:
114
+ detail = f"Repo-specific literal at line {line_num}: {snippet!r}"
115
+ findings.append(build_finding(
116
+ check_id="implementation_overfit.hardcoded_repo_path",
117
+ category=GateCategory.DRIFT,
118
+ title=f"[implementation_overfit.hardcoded_repo_path] {file_path}:{line_num}",
119
+ severity=GateSeverity.MEDIUM,
120
+ impact=GateImpact.REVISE,
121
+ summary=(
122
+ f"{detail} in {file_path}. "
123
+ "Hardcoded repo paths break project-agnosticism."
124
+ ),
125
+ recommendation=(
126
+ "Move hardcoded path/constant to a repo-specific profile; "
127
+ "import from profile instead of embedding literal."
128
+ ),
129
+ evidence=(EvidenceReference(
130
+ kind="probe", path=file_path, detail=detail, ok=False,
131
+ ),),
132
+ repair_kind=RepairKind.EDIT_CANONICAL.value,
133
+ executor_action=(
134
+ "Move hardcoded path/constant to repo-specific profile; import from profile"
135
+ ),
136
+ proof_required="literal removed from file; constant lives in profile; grep confirms",
137
+ allowlist_allowed=False,
138
+ preferred_fix_shape="extract to profile constant; import at usage site",
139
+ ))
140
+ if len(findings) >= 15:
141
+ break
142
+ return findings
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Sub-check 2: assumes_single_language
147
+ # ---------------------------------------------------------------------------
148
+
149
+
150
+ def check_assumes_single_language(
151
+ file_path: str,
152
+ content: str,
153
+ ) -> list["GateFinding"]:
154
+ """Detect Python-only conditionals without else branch in language-neutral files."""
155
+ if not content.strip():
156
+ return []
157
+
158
+ if _is_ai_host_gate_file(file_path):
159
+ return []
160
+
161
+ findings = []
162
+ lines = content.splitlines()
163
+ for line_num, line in enumerate(lines, 1):
164
+ stripped = line.strip()
165
+ if stripped.startswith("#"):
166
+ continue
167
+ for pat in _PYTHON_ONLY_PATTERNS:
168
+ m = pat.search(line)
169
+ if not m:
170
+ continue
171
+ # Calculate offset of match in full content
172
+ content_offset = sum(len(l) + 1 for l in lines[:line_num - 1]) + m.start()
173
+ if _has_else_branch(content, content_offset):
174
+ continue
175
+
176
+ detail = f"Python-only conditional at line {line_num} without else branch: {stripped[:100]!r}"
177
+ findings.append(build_finding(
178
+ check_id="implementation_overfit.assumes_single_language",
179
+ category=GateCategory.DRIFT,
180
+ title=f"[implementation_overfit.assumes_single_language] {file_path}:{line_num}",
181
+ severity=GateSeverity.MEDIUM,
182
+ impact=GateImpact.REVISE,
183
+ summary=(
184
+ f"{detail} in {file_path}. "
185
+ "Language-neutral files must handle multiple languages."
186
+ ),
187
+ recommendation=(
188
+ "Add language-aware branch via source_analysis.get_language_id() "
189
+ "or an explicit else/elif covering other languages."
190
+ ),
191
+ evidence=(EvidenceReference(
192
+ kind="probe", path=file_path, detail=detail, ok=False,
193
+ ),),
194
+ repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
195
+ executor_action=(
196
+ "Add language-aware branch via source_analysis.get_language_id()"
197
+ ),
198
+ proof_required="else/elif branch present; other languages handled; tests pass",
199
+ allowlist_allowed=True,
200
+ preferred_fix_shape="if lang == 'python': ... elif lang in ('js', 'ts'): ... else: ...",
201
+ ))
202
+ break
203
+ if len(findings) >= 15:
204
+ break
205
+ return findings
206
+
207
+
208
+ # ---------------------------------------------------------------------------
209
+ # Sub-check 3: fake_generic_helper
210
+ # ---------------------------------------------------------------------------
211
+
212
+
213
+ def check_fake_generic_helper(
214
+ file_path: str,
215
+ content: str,
216
+ ) -> list["GateFinding"]:
217
+ """Detect functions named generic_*/universal_*/common_* that contain repo literals."""
218
+ if not content.strip():
219
+ return []
220
+
221
+ if _is_ai_host_gate_file(file_path):
222
+ return []
223
+
224
+ import ast
225
+ findings: list = []
226
+ # B4 (2026-04-23): replaces silent `except SyntaxError: return []` — now
227
+ # meta.syntax_parse_error is emitted so broken Python is not invisible.
228
+ tree = parse_python_source_or_emit_finding(
229
+ content,
230
+ rel_path=normalize_path(file_path),
231
+ emit_finding=findings.append,
232
+ emitting_gate="implementation_overfit.fake_generic_helper",
233
+ )
234
+ if tree is None:
235
+ return findings
236
+
237
+ lines = content.splitlines()
238
+
239
+ for node in ast.walk(tree):
240
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
241
+ continue
242
+ name = node.name.lower()
243
+ if not any(name.startswith(prefix) for prefix in _GENERIC_PREFIXES):
244
+ continue
245
+
246
+ # Extract function body text
247
+ start = node.lineno - 1
248
+ end = getattr(node, "end_lineno", node.lineno)
249
+ func_lines = lines[start:end]
250
+ func_text = "\n".join(func_lines)
251
+
252
+ repo_hits = _find_repo_literals_in_text(func_text)
253
+ if not repo_hits:
254
+ continue
255
+
256
+ func_line = node.lineno
257
+ first_hit_line, first_hit_snippet = repo_hits[0]
258
+
259
+ detail = (
260
+ f"Function {node.name!r} (line {func_line}) named as generic helper "
261
+ f"but contains repo literal at relative line {first_hit_line}: {first_hit_snippet!r}"
262
+ )
263
+ findings.append(build_finding(
264
+ check_id="implementation_overfit.fake_generic_helper",
265
+ category=GateCategory.DRIFT,
266
+ title=f"[implementation_overfit.fake_generic_helper] {file_path}:{func_line}:{node.name}",
267
+ severity=GateSeverity.MEDIUM,
268
+ impact=GateImpact.REVISE,
269
+ summary=(
270
+ f"{detail} in {file_path}. "
271
+ "A function with a generic name must not contain repo-specific literals."
272
+ ),
273
+ recommendation=(
274
+ f"Rename {node.name!r} to reflect its actual repo-specific coupling, "
275
+ "OR remove the repo literals and make it truly generic."
276
+ ),
277
+ evidence=(EvidenceReference(
278
+ kind="probe", path=file_path, detail=detail, ok=False,
279
+ ),),
280
+ repair_kind=RepairKind.NORMALIZE_SHAPE.value,
281
+ executor_action=(
282
+ "Rename to reflect actual coupling OR remove repo literals"
283
+ ),
284
+ proof_required="function renamed or literals extracted; grep confirms no false-generic name",
285
+ allowlist_allowed=False,
286
+ preferred_fix_shape="rename function OR extract literals to profile import",
287
+ ))
288
+ return findings
289
+
290
+
291
+ # ---------------------------------------------------------------------------
292
+ # Sub-check 4: env_tight_coupling
293
+ # ---------------------------------------------------------------------------
294
+
295
+
296
+ def check_env_tight_coupling(
297
+ file_path: str,
298
+ content: str,
299
+ ) -> list["GateFinding"]:
300
+ """Detect project-prefixed env var usage (VIGIL_*) in universal-classified modules."""
301
+ if not content.strip():
302
+ return []
303
+
304
+ if _is_ai_host_gate_file(file_path):
305
+ return []
306
+
307
+ lines = content.splitlines()
308
+ findings = []
309
+ for line_num, line in enumerate(lines, 1):
310
+ stripped = line.strip()
311
+ if stripped.startswith("#"):
312
+ continue
313
+ m = _AI_HOST_ENV_PATTERN.search(line)
314
+ if not m:
315
+ continue
316
+
317
+ snippet = stripped[:120]
318
+ detail = f"Project-prefixed env var at line {line_num}: {snippet!r}"
319
+ findings.append(build_finding(
320
+ check_id="implementation_overfit.env_tight_coupling",
321
+ category=GateCategory.DRIFT,
322
+ title=f"[implementation_overfit.env_tight_coupling] {file_path}:{line_num}",
323
+ severity=GateSeverity.MEDIUM,
324
+ impact=GateImpact.REVISE,
325
+ summary=(
326
+ f"{detail} in {file_path}. "
327
+ "Universal modules must not reference project-specific env vars directly."
328
+ ),
329
+ recommendation=(
330
+ "Inject env var via constructor parameter or profile config. "
331
+ "Do not read VIGIL_* vars directly in universal code."
332
+ ),
333
+ evidence=(EvidenceReference(
334
+ kind="probe", path=file_path, detail=detail, ok=False,
335
+ ),),
336
+ repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
337
+ executor_action=(
338
+ "Inject env var via parameter; remove direct os.environ.get('VIGIL_*') call"
339
+ ),
340
+ proof_required="no VIGIL_* env reads in module; value injected via parameter; tests pass",
341
+ allowlist_allowed=True,
342
+ preferred_fix_shape="def __init__(self, env_prefix: str = ''): ...",
343
+ ))
344
+ if len(findings) >= 10:
345
+ break
346
+ return findings
347
+
348
+
349
+ # ---------------------------------------------------------------------------
350
+ # Gate runner
351
+ # ---------------------------------------------------------------------------
352
+
353
+
354
+ def run_implementation_overfit_checks(ctx: PostExecGateContext):
355
+ """Run all implementation_overfit sub-checks against touched files."""
356
+ from vigil_forensic.gate_checks.common import build_check_result
357
+ from vigil_forensic._shared import GateCategory
358
+
359
+ snapshots = ctx.file_snapshots or {}
360
+ all_findings = []
361
+
362
+ for path, snap in snapshots.items():
363
+ if not hasattr(snap, "text") or not snap.text:
364
+ continue
365
+ content = snap.text
366
+
367
+ all_findings.extend(check_hardcoded_repo_path(path, content))
368
+ all_findings.extend(check_assumes_single_language(path, content))
369
+ all_findings.extend(check_fake_generic_helper(path, content))
370
+ all_findings.extend(check_env_tight_coupling(path, content))
371
+
372
+ if len(all_findings) >= 50:
373
+ break
374
+
375
+ return build_check_result(
376
+ check_id="implementation_overfit",
377
+ category=GateCategory.DRIFT,
378
+ findings=all_findings,
379
+ notes=[],
380
+ )