vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""FX-V5-012 hunter artifact completeness check.
|
|
2
|
+
|
|
3
|
+
Soft-severity gate that scans <project>/.cortex/context_hunter/*.json and
|
|
4
|
+
emits a finding for each file that is corrupted, unparseable, or missing
|
|
5
|
+
expected schema fields. Prevents silent hunter cache corruption from going
|
|
6
|
+
undetected by downstream consumers.
|
|
7
|
+
|
|
8
|
+
Out of scope:
|
|
9
|
+
- Cache directory missing -> no finding (hunter may simply not have run
|
|
10
|
+
for this project; FX-V4-002 remote_authoritative mode also produces
|
|
11
|
+
files under .cortex/_hunter_remote_cache/ which we do NOT scan here).
|
|
12
|
+
- Empty cache directory -> no finding (same reasoning).
|
|
13
|
+
- Stale-by-TTL files -> not our concern (FIX-025 cleanup_stale_cache owns).
|
|
14
|
+
|
|
15
|
+
Fail-open: any I/O / parse error on the *check itself* is logged at DEBUG
|
|
16
|
+
and produces no finding — same contract as every gate runner in this pack.
|
|
17
|
+
|
|
18
|
+
Severity policy:
|
|
19
|
+
- Unparseable JSON -> MEDIUM (operator should inspect / clear)
|
|
20
|
+
- Missing required keys -> LOW (gate uses heuristic; schema may
|
|
21
|
+
evolve faster than the check)
|
|
22
|
+
- I/O failure on read -> LOW (likely transient; symptom not cause)
|
|
23
|
+
|
|
24
|
+
Each finding is per-file (fingerprint includes the relative path).
|
|
25
|
+
"""
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
|
|
31
|
+
from vigil_forensic._shared import (
|
|
32
|
+
EvidenceReference, GateCategory, GateImpact, GateSeverity,
|
|
33
|
+
)
|
|
34
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
35
|
+
from .common import build_check_result, build_finding
|
|
36
|
+
|
|
37
|
+
_log = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
_CHECK_ID = "hunter_artifact_completeness"
|
|
40
|
+
_HUNTER_CACHE_SUBPATH = (".cortex", "context_hunter")
|
|
41
|
+
|
|
42
|
+
# Minimum keys a well-formed hunter artifact JSON should have. Hunter writes
|
|
43
|
+
# {"stage": <name>, "data": {...}} as canonical format; some legacy artifacts
|
|
44
|
+
# may omit "stage" — we treat both keys missing as schema divergence rather
|
|
45
|
+
# than just one (avoid noisy false positives during the v5 schema migration).
|
|
46
|
+
_REQUIRED_KEYS_ANY = ("data", "stage")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def run_hunter_artifact_completeness_checks(ctx: PostExecGateContext):
|
|
50
|
+
findings = []
|
|
51
|
+
|
|
52
|
+
cache_dir = ctx.project_dir.joinpath(*_HUNTER_CACHE_SUBPATH)
|
|
53
|
+
if not cache_dir.exists() or not cache_dir.is_dir():
|
|
54
|
+
return build_check_result(check_id=_CHECK_ID, category=GateCategory.META, findings=findings)
|
|
55
|
+
|
|
56
|
+
for json_path in sorted(cache_dir.glob("*.json")):
|
|
57
|
+
# Skip codex temp schemas — they are not hunter artifacts and have
|
|
58
|
+
# their own cleanup contract (FIX-020 in hunter_runner.cleanup_stale_cache).
|
|
59
|
+
if json_path.name.startswith("_tmp_"):
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
rel_path = str(json_path.relative_to(ctx.project_dir)).replace("\\", "/")
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
raw = json_path.read_text(encoding="utf-8")
|
|
66
|
+
except OSError as exc:
|
|
67
|
+
_log.debug("FX-V5-012: I/O failure reading %s: %s", rel_path, exc)
|
|
68
|
+
findings.append(
|
|
69
|
+
build_finding(
|
|
70
|
+
check_id=_CHECK_ID,
|
|
71
|
+
category=GateCategory.META,
|
|
72
|
+
title=f"hunter artifact unreadable: {json_path.name}",
|
|
73
|
+
severity=GateSeverity.LOW,
|
|
74
|
+
impact=GateImpact.WARN,
|
|
75
|
+
summary=f"OSError reading hunter cache artifact: {exc!s}",
|
|
76
|
+
recommendation=(
|
|
77
|
+
"Investigate filesystem permissions / disk health. "
|
|
78
|
+
"Safe to delete the file — hunter will regenerate on next call."
|
|
79
|
+
),
|
|
80
|
+
evidence=(EvidenceReference(kind="file", path=rel_path, detail=str(exc)),),
|
|
81
|
+
repair_kind="manual_inspection",
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
parsed = json.loads(raw)
|
|
88
|
+
except (json.JSONDecodeError, ValueError) as exc:
|
|
89
|
+
findings.append(
|
|
90
|
+
build_finding(
|
|
91
|
+
check_id=_CHECK_ID,
|
|
92
|
+
category=GateCategory.META,
|
|
93
|
+
title=f"hunter artifact corrupted: {json_path.name}",
|
|
94
|
+
severity=GateSeverity.MEDIUM,
|
|
95
|
+
impact=GateImpact.WARN,
|
|
96
|
+
summary=(
|
|
97
|
+
f"JSON parse error in hunter cache artifact: {exc!s}. "
|
|
98
|
+
f"Cache hit on this file would deliver garbage to the "
|
|
99
|
+
f"caller of the hunter intake helper."
|
|
100
|
+
),
|
|
101
|
+
recommendation=(
|
|
102
|
+
"Delete the corrupted file. Hunter will regenerate it on next "
|
|
103
|
+
"matching stage call. If recurring, investigate the writer in "
|
|
104
|
+
"BRAIN/context_hunter/hunter_runner.py for partial-write race."
|
|
105
|
+
),
|
|
106
|
+
evidence=(EvidenceReference(kind="file", path=rel_path, detail=f"parse error at offset {getattr(exc, 'pos', '?')}"),),
|
|
107
|
+
repair_kind="delete_artifact",
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# Schema sanity: well-formed hunter artifact should have at least
|
|
113
|
+
# one of the required keys. Hunter v5 writes {"stage": ..., "data": ...}.
|
|
114
|
+
# An artifact with neither is suspicious — either a foreign file
|
|
115
|
+
# accidentally placed in the cache dir, or a writer regression.
|
|
116
|
+
if isinstance(parsed, dict) and not any(k in parsed for k in _REQUIRED_KEYS_ANY):
|
|
117
|
+
findings.append(
|
|
118
|
+
build_finding(
|
|
119
|
+
check_id=_CHECK_ID,
|
|
120
|
+
category=GateCategory.META,
|
|
121
|
+
title=f"hunter artifact schema divergence: {json_path.name}",
|
|
122
|
+
severity=GateSeverity.LOW,
|
|
123
|
+
impact=GateImpact.WARN,
|
|
124
|
+
summary=(
|
|
125
|
+
f"Hunter cache artifact lacks required keys "
|
|
126
|
+
f"({' or '.join(_REQUIRED_KEYS_ANY)}). Parsed top-level keys: "
|
|
127
|
+
f"{sorted(parsed.keys())[:10]}."
|
|
128
|
+
),
|
|
129
|
+
recommendation=(
|
|
130
|
+
"Confirm the file was written by hunter_runner. If not, move it "
|
|
131
|
+
"out of .cortex/context_hunter/. If yes, investigate writer for "
|
|
132
|
+
"schema regression."
|
|
133
|
+
),
|
|
134
|
+
evidence=(EvidenceReference(kind="file", path=rel_path, detail=f"top_keys={sorted(parsed.keys() if isinstance(parsed, dict) else [])[:10]}"),),
|
|
135
|
+
repair_kind="schema_inspection",
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return build_check_result(check_id=_CHECK_ID, category=GateCategory.META, findings=findings)
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""Gate: implementation_overfit
|
|
2
|
+
|
|
3
|
+
Detects code overfit to local context: hardcoded paths, project-specific
|
|
4
|
+
strings, and broken project-agnosticism claims.
|
|
5
|
+
|
|
6
|
+
Sub-checks:
|
|
7
|
+
hardcoded_repo_path -- string literal contains known project-specific cluster paths
|
|
8
|
+
assumes_single_language -- language-neutral file has Python-only conditionals without else
|
|
9
|
+
fake_generic_helper -- function named generic_*/universal_*/common_* has repo literals
|
|
10
|
+
env_tight_coupling -- module imports project-prefixed env vars in universal context
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
from vigil_forensic._shared import (
|
|
18
|
+
EvidenceReference,
|
|
19
|
+
GateCategory,
|
|
20
|
+
GateImpact,
|
|
21
|
+
GateSeverity,
|
|
22
|
+
RepairKind,
|
|
23
|
+
)
|
|
24
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
25
|
+
from .common import build_finding, normalize_path
|
|
26
|
+
from ._ast_helpers import parse_python_source_or_emit_finding
|
|
27
|
+
|
|
28
|
+
_log = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Constants
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
# String patterns that indicate hardcoded repo-specific paths/names
|
|
35
|
+
_REPO_LITERAL_PATTERNS: tuple[re.Pattern, ...] = (
|
|
36
|
+
re.compile(r"""["'](BRAIN/|SYSTEM/|INTERFACE/|STORAGE/)"""),
|
|
37
|
+
re.compile(r"""["']/.vigil_launcher/"""),
|
|
38
|
+
re.compile(r"""["']7331["']"""),
|
|
39
|
+
re.compile(r"""["']vigil_control_plane["']"""),
|
|
40
|
+
re.compile(r"""["']vigil_"""),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Python-only conditional patterns that break language-neutrality
|
|
44
|
+
_PYTHON_ONLY_PATTERNS: tuple[re.Pattern, ...] = (
|
|
45
|
+
re.compile(r"""path\.(?:endswith|suffix)\s*\(\s*["']\.py["']\s*\)"""),
|
|
46
|
+
re.compile(r"""(?:lang|language)\s*==\s*["']python["']"""),
|
|
47
|
+
re.compile(r"""\.endswith\s*\(\s*["']\.py["']\s*\)"""),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Prefixes for "generic" helper function names
|
|
51
|
+
_GENERIC_PREFIXES = ("generic_", "universal_", "common_")
|
|
52
|
+
|
|
53
|
+
# Environment variable prefix pattern
|
|
54
|
+
_AI_HOST_ENV_PATTERN = re.compile(
|
|
55
|
+
r"""os\.environ(?:\.get)?\s*\(\s*["']VIGIL_""",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _is_ai_host_gate_file(path: str) -> bool:
|
|
60
|
+
"""True if file belongs to an vigil-specific gate context.
|
|
61
|
+
|
|
62
|
+
Files under SYSTEM/pipeline/gates/ are Vigil-specific orchestration
|
|
63
|
+
gates that are explicitly allowed to contain hardcoded repo path literals.
|
|
64
|
+
"""
|
|
65
|
+
normalized = path.replace("\\", "/")
|
|
66
|
+
return "SYSTEM/pipeline/gates/" in normalized
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _has_else_branch(content: str, match_start: int) -> bool:
|
|
70
|
+
"""Heuristic: check if the matched if-line has a nearby else/elif."""
|
|
71
|
+
snippet = content[match_start: match_start + 400]
|
|
72
|
+
return bool(re.search(r"\belse\b|\belif\b", snippet))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _find_repo_literals_in_text(content: str) -> list[tuple[int, str]]:
|
|
76
|
+
"""Return list of (line_num, snippet) for all repo literal matches."""
|
|
77
|
+
lines = content.splitlines()
|
|
78
|
+
results: list[tuple[int, str]] = []
|
|
79
|
+
for line_num, line in enumerate(lines, 1):
|
|
80
|
+
# Skip pure comments and docstrings
|
|
81
|
+
stripped = line.strip()
|
|
82
|
+
if stripped.startswith("#"):
|
|
83
|
+
continue
|
|
84
|
+
for pat in _REPO_LITERAL_PATTERNS:
|
|
85
|
+
if pat.search(line):
|
|
86
|
+
results.append((line_num, stripped[:120]))
|
|
87
|
+
break
|
|
88
|
+
return results
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# Sub-check 1: hardcoded_repo_path
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def check_hardcoded_repo_path(
|
|
97
|
+
file_path: str,
|
|
98
|
+
content: str,
|
|
99
|
+
) -> list["GateFinding"]:
|
|
100
|
+
"""Detect hardcoded repo-specific path literals in universal-classified files."""
|
|
101
|
+
if not content.strip():
|
|
102
|
+
return []
|
|
103
|
+
|
|
104
|
+
# Vigil-specific gate files are allowed to contain repo path literals.
|
|
105
|
+
if _is_ai_host_gate_file(file_path):
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
hits = _find_repo_literals_in_text(content)
|
|
109
|
+
if not hits:
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
findings = []
|
|
113
|
+
for line_num, snippet in hits:
|
|
114
|
+
detail = f"Repo-specific literal at line {line_num}: {snippet!r}"
|
|
115
|
+
findings.append(build_finding(
|
|
116
|
+
check_id="implementation_overfit.hardcoded_repo_path",
|
|
117
|
+
category=GateCategory.DRIFT,
|
|
118
|
+
title=f"[implementation_overfit.hardcoded_repo_path] {file_path}:{line_num}",
|
|
119
|
+
severity=GateSeverity.MEDIUM,
|
|
120
|
+
impact=GateImpact.REVISE,
|
|
121
|
+
summary=(
|
|
122
|
+
f"{detail} in {file_path}. "
|
|
123
|
+
"Hardcoded repo paths break project-agnosticism."
|
|
124
|
+
),
|
|
125
|
+
recommendation=(
|
|
126
|
+
"Move hardcoded path/constant to a repo-specific profile; "
|
|
127
|
+
"import from profile instead of embedding literal."
|
|
128
|
+
),
|
|
129
|
+
evidence=(EvidenceReference(
|
|
130
|
+
kind="probe", path=file_path, detail=detail, ok=False,
|
|
131
|
+
),),
|
|
132
|
+
repair_kind=RepairKind.EDIT_CANONICAL.value,
|
|
133
|
+
executor_action=(
|
|
134
|
+
"Move hardcoded path/constant to repo-specific profile; import from profile"
|
|
135
|
+
),
|
|
136
|
+
proof_required="literal removed from file; constant lives in profile; grep confirms",
|
|
137
|
+
allowlist_allowed=False,
|
|
138
|
+
preferred_fix_shape="extract to profile constant; import at usage site",
|
|
139
|
+
))
|
|
140
|
+
if len(findings) >= 15:
|
|
141
|
+
break
|
|
142
|
+
return findings
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
# Sub-check 2: assumes_single_language
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def check_assumes_single_language(
|
|
151
|
+
file_path: str,
|
|
152
|
+
content: str,
|
|
153
|
+
) -> list["GateFinding"]:
|
|
154
|
+
"""Detect Python-only conditionals without else branch in language-neutral files."""
|
|
155
|
+
if not content.strip():
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
if _is_ai_host_gate_file(file_path):
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
findings = []
|
|
162
|
+
lines = content.splitlines()
|
|
163
|
+
for line_num, line in enumerate(lines, 1):
|
|
164
|
+
stripped = line.strip()
|
|
165
|
+
if stripped.startswith("#"):
|
|
166
|
+
continue
|
|
167
|
+
for pat in _PYTHON_ONLY_PATTERNS:
|
|
168
|
+
m = pat.search(line)
|
|
169
|
+
if not m:
|
|
170
|
+
continue
|
|
171
|
+
# Calculate offset of match in full content
|
|
172
|
+
content_offset = sum(len(l) + 1 for l in lines[:line_num - 1]) + m.start()
|
|
173
|
+
if _has_else_branch(content, content_offset):
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
detail = f"Python-only conditional at line {line_num} without else branch: {stripped[:100]!r}"
|
|
177
|
+
findings.append(build_finding(
|
|
178
|
+
check_id="implementation_overfit.assumes_single_language",
|
|
179
|
+
category=GateCategory.DRIFT,
|
|
180
|
+
title=f"[implementation_overfit.assumes_single_language] {file_path}:{line_num}",
|
|
181
|
+
severity=GateSeverity.MEDIUM,
|
|
182
|
+
impact=GateImpact.REVISE,
|
|
183
|
+
summary=(
|
|
184
|
+
f"{detail} in {file_path}. "
|
|
185
|
+
"Language-neutral files must handle multiple languages."
|
|
186
|
+
),
|
|
187
|
+
recommendation=(
|
|
188
|
+
"Add language-aware branch via source_analysis.get_language_id() "
|
|
189
|
+
"or an explicit else/elif covering other languages."
|
|
190
|
+
),
|
|
191
|
+
evidence=(EvidenceReference(
|
|
192
|
+
kind="probe", path=file_path, detail=detail, ok=False,
|
|
193
|
+
),),
|
|
194
|
+
repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
|
|
195
|
+
executor_action=(
|
|
196
|
+
"Add language-aware branch via source_analysis.get_language_id()"
|
|
197
|
+
),
|
|
198
|
+
proof_required="else/elif branch present; other languages handled; tests pass",
|
|
199
|
+
allowlist_allowed=True,
|
|
200
|
+
preferred_fix_shape="if lang == 'python': ... elif lang in ('js', 'ts'): ... else: ...",
|
|
201
|
+
))
|
|
202
|
+
break
|
|
203
|
+
if len(findings) >= 15:
|
|
204
|
+
break
|
|
205
|
+
return findings
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ---------------------------------------------------------------------------
|
|
209
|
+
# Sub-check 3: fake_generic_helper
|
|
210
|
+
# ---------------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def check_fake_generic_helper(
|
|
214
|
+
file_path: str,
|
|
215
|
+
content: str,
|
|
216
|
+
) -> list["GateFinding"]:
|
|
217
|
+
"""Detect functions named generic_*/universal_*/common_* that contain repo literals."""
|
|
218
|
+
if not content.strip():
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
if _is_ai_host_gate_file(file_path):
|
|
222
|
+
return []
|
|
223
|
+
|
|
224
|
+
import ast
|
|
225
|
+
findings: list = []
|
|
226
|
+
# B4 (2026-04-23): replaces silent `except SyntaxError: return []` — now
|
|
227
|
+
# meta.syntax_parse_error is emitted so broken Python is not invisible.
|
|
228
|
+
tree = parse_python_source_or_emit_finding(
|
|
229
|
+
content,
|
|
230
|
+
rel_path=normalize_path(file_path),
|
|
231
|
+
emit_finding=findings.append,
|
|
232
|
+
emitting_gate="implementation_overfit.fake_generic_helper",
|
|
233
|
+
)
|
|
234
|
+
if tree is None:
|
|
235
|
+
return findings
|
|
236
|
+
|
|
237
|
+
lines = content.splitlines()
|
|
238
|
+
|
|
239
|
+
for node in ast.walk(tree):
|
|
240
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
241
|
+
continue
|
|
242
|
+
name = node.name.lower()
|
|
243
|
+
if not any(name.startswith(prefix) for prefix in _GENERIC_PREFIXES):
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
# Extract function body text
|
|
247
|
+
start = node.lineno - 1
|
|
248
|
+
end = getattr(node, "end_lineno", node.lineno)
|
|
249
|
+
func_lines = lines[start:end]
|
|
250
|
+
func_text = "\n".join(func_lines)
|
|
251
|
+
|
|
252
|
+
repo_hits = _find_repo_literals_in_text(func_text)
|
|
253
|
+
if not repo_hits:
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
func_line = node.lineno
|
|
257
|
+
first_hit_line, first_hit_snippet = repo_hits[0]
|
|
258
|
+
|
|
259
|
+
detail = (
|
|
260
|
+
f"Function {node.name!r} (line {func_line}) named as generic helper "
|
|
261
|
+
f"but contains repo literal at relative line {first_hit_line}: {first_hit_snippet!r}"
|
|
262
|
+
)
|
|
263
|
+
findings.append(build_finding(
|
|
264
|
+
check_id="implementation_overfit.fake_generic_helper",
|
|
265
|
+
category=GateCategory.DRIFT,
|
|
266
|
+
title=f"[implementation_overfit.fake_generic_helper] {file_path}:{func_line}:{node.name}",
|
|
267
|
+
severity=GateSeverity.MEDIUM,
|
|
268
|
+
impact=GateImpact.REVISE,
|
|
269
|
+
summary=(
|
|
270
|
+
f"{detail} in {file_path}. "
|
|
271
|
+
"A function with a generic name must not contain repo-specific literals."
|
|
272
|
+
),
|
|
273
|
+
recommendation=(
|
|
274
|
+
f"Rename {node.name!r} to reflect its actual repo-specific coupling, "
|
|
275
|
+
"OR remove the repo literals and make it truly generic."
|
|
276
|
+
),
|
|
277
|
+
evidence=(EvidenceReference(
|
|
278
|
+
kind="probe", path=file_path, detail=detail, ok=False,
|
|
279
|
+
),),
|
|
280
|
+
repair_kind=RepairKind.NORMALIZE_SHAPE.value,
|
|
281
|
+
executor_action=(
|
|
282
|
+
"Rename to reflect actual coupling OR remove repo literals"
|
|
283
|
+
),
|
|
284
|
+
proof_required="function renamed or literals extracted; grep confirms no false-generic name",
|
|
285
|
+
allowlist_allowed=False,
|
|
286
|
+
preferred_fix_shape="rename function OR extract literals to profile import",
|
|
287
|
+
))
|
|
288
|
+
return findings
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# ---------------------------------------------------------------------------
|
|
292
|
+
# Sub-check 4: env_tight_coupling
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def check_env_tight_coupling(
|
|
297
|
+
file_path: str,
|
|
298
|
+
content: str,
|
|
299
|
+
) -> list["GateFinding"]:
|
|
300
|
+
"""Detect project-prefixed env var usage (VIGIL_*) in universal-classified modules."""
|
|
301
|
+
if not content.strip():
|
|
302
|
+
return []
|
|
303
|
+
|
|
304
|
+
if _is_ai_host_gate_file(file_path):
|
|
305
|
+
return []
|
|
306
|
+
|
|
307
|
+
lines = content.splitlines()
|
|
308
|
+
findings = []
|
|
309
|
+
for line_num, line in enumerate(lines, 1):
|
|
310
|
+
stripped = line.strip()
|
|
311
|
+
if stripped.startswith("#"):
|
|
312
|
+
continue
|
|
313
|
+
m = _AI_HOST_ENV_PATTERN.search(line)
|
|
314
|
+
if not m:
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
snippet = stripped[:120]
|
|
318
|
+
detail = f"Project-prefixed env var at line {line_num}: {snippet!r}"
|
|
319
|
+
findings.append(build_finding(
|
|
320
|
+
check_id="implementation_overfit.env_tight_coupling",
|
|
321
|
+
category=GateCategory.DRIFT,
|
|
322
|
+
title=f"[implementation_overfit.env_tight_coupling] {file_path}:{line_num}",
|
|
323
|
+
severity=GateSeverity.MEDIUM,
|
|
324
|
+
impact=GateImpact.REVISE,
|
|
325
|
+
summary=(
|
|
326
|
+
f"{detail} in {file_path}. "
|
|
327
|
+
"Universal modules must not reference project-specific env vars directly."
|
|
328
|
+
),
|
|
329
|
+
recommendation=(
|
|
330
|
+
"Inject env var via constructor parameter or profile config. "
|
|
331
|
+
"Do not read VIGIL_* vars directly in universal code."
|
|
332
|
+
),
|
|
333
|
+
evidence=(EvidenceReference(
|
|
334
|
+
kind="probe", path=file_path, detail=detail, ok=False,
|
|
335
|
+
),),
|
|
336
|
+
repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
|
|
337
|
+
executor_action=(
|
|
338
|
+
"Inject env var via parameter; remove direct os.environ.get('VIGIL_*') call"
|
|
339
|
+
),
|
|
340
|
+
proof_required="no VIGIL_* env reads in module; value injected via parameter; tests pass",
|
|
341
|
+
allowlist_allowed=True,
|
|
342
|
+
preferred_fix_shape="def __init__(self, env_prefix: str = ''): ...",
|
|
343
|
+
))
|
|
344
|
+
if len(findings) >= 10:
|
|
345
|
+
break
|
|
346
|
+
return findings
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
# ---------------------------------------------------------------------------
|
|
350
|
+
# Gate runner
|
|
351
|
+
# ---------------------------------------------------------------------------
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def run_implementation_overfit_checks(ctx: PostExecGateContext):
|
|
355
|
+
"""Run all implementation_overfit sub-checks against touched files."""
|
|
356
|
+
from vigil_forensic.gate_checks.common import build_check_result
|
|
357
|
+
from vigil_forensic._shared import GateCategory
|
|
358
|
+
|
|
359
|
+
snapshots = ctx.file_snapshots or {}
|
|
360
|
+
all_findings = []
|
|
361
|
+
|
|
362
|
+
for path, snap in snapshots.items():
|
|
363
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
364
|
+
continue
|
|
365
|
+
content = snap.text
|
|
366
|
+
|
|
367
|
+
all_findings.extend(check_hardcoded_repo_path(path, content))
|
|
368
|
+
all_findings.extend(check_assumes_single_language(path, content))
|
|
369
|
+
all_findings.extend(check_fake_generic_helper(path, content))
|
|
370
|
+
all_findings.extend(check_env_tight_coupling(path, content))
|
|
371
|
+
|
|
372
|
+
if len(all_findings) >= 50:
|
|
373
|
+
break
|
|
374
|
+
|
|
375
|
+
return build_check_result(
|
|
376
|
+
check_id="implementation_overfit",
|
|
377
|
+
category=GateCategory.DRIFT,
|
|
378
|
+
findings=all_findings,
|
|
379
|
+
notes=[],
|
|
380
|
+
)
|