vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Advanced cluster wrappers -- clusters 32-50, 53.
|
|
2
|
+
|
|
3
|
+
Covers: hardcoded paths, boundary validation, unreachable code, shadowed
|
|
4
|
+
builtins, mutable defaults, resource leaks, docstring drift, broad catch,
|
|
5
|
+
debug prints, commented code, missing await, unchecked response, naive
|
|
6
|
+
timezone, near-duplicate code, missing null check, path concatenation,
|
|
7
|
+
log without context, test secrets, unpinned dependencies,
|
|
8
|
+
legacy compatibility debt (C53).
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from ...source_analysis import is_source_file
|
|
13
|
+
from ...gate_models import GateFinding
|
|
14
|
+
from ..forensic_clusters import (
|
|
15
|
+
assess_boundary_validation,
|
|
16
|
+
assess_broad_catch_no_reraise,
|
|
17
|
+
assess_commented_code,
|
|
18
|
+
assess_debug_prints,
|
|
19
|
+
assess_docstring_params,
|
|
20
|
+
assess_hardcoded_paths,
|
|
21
|
+
assess_log_without_context,
|
|
22
|
+
assess_missing_await,
|
|
23
|
+
assess_missing_null_check,
|
|
24
|
+
assess_mutable_defaults,
|
|
25
|
+
assess_naive_timezone,
|
|
26
|
+
assess_near_duplicate_code,
|
|
27
|
+
assess_path_concatenation,
|
|
28
|
+
assess_resource_leaks,
|
|
29
|
+
assess_shadowed_builtins,
|
|
30
|
+
assess_test_secrets,
|
|
31
|
+
assess_unchecked_response,
|
|
32
|
+
assess_unpinned_dependencies,
|
|
33
|
+
assess_unreachable_code,
|
|
34
|
+
)
|
|
35
|
+
from ._helpers import _MAX_FINDINGS_PER_CLUSTER
|
|
36
|
+
import logging
|
|
37
|
+
_log = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _check_hardcoded_paths(ctx) -> list[GateFinding]:
|
|
41
|
+
snapshots = ctx.file_snapshots or {}
|
|
42
|
+
findings: list[GateFinding] = []
|
|
43
|
+
for path, snap in snapshots.items():
|
|
44
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
45
|
+
continue
|
|
46
|
+
findings.extend(assess_hardcoded_paths(path, snap.text))
|
|
47
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
48
|
+
break
|
|
49
|
+
return findings
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _check_boundary_validation(ctx) -> list[GateFinding]:
|
|
53
|
+
snapshots = ctx.file_snapshots or {}
|
|
54
|
+
findings: list[GateFinding] = []
|
|
55
|
+
for path, snap in snapshots.items():
|
|
56
|
+
if not is_source_file(path) or not hasattr(snap, "text") or not snap.text:
|
|
57
|
+
continue
|
|
58
|
+
findings.extend(assess_boundary_validation(path, snap.text))
|
|
59
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
60
|
+
break
|
|
61
|
+
return findings
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _check_unreachable_code(ctx) -> list[GateFinding]:
|
|
65
|
+
snapshots = ctx.file_snapshots or {}
|
|
66
|
+
findings: list[GateFinding] = []
|
|
67
|
+
for path, snap in snapshots.items():
|
|
68
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
69
|
+
continue
|
|
70
|
+
findings.extend(assess_unreachable_code(path, snap.text))
|
|
71
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
72
|
+
break
|
|
73
|
+
return findings
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _check_shadowed_builtins(ctx) -> list[GateFinding]:
|
|
77
|
+
snapshots = ctx.file_snapshots or {}
|
|
78
|
+
findings: list[GateFinding] = []
|
|
79
|
+
for path, snap in snapshots.items():
|
|
80
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
81
|
+
continue
|
|
82
|
+
findings.extend(assess_shadowed_builtins(path, snap.text))
|
|
83
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
84
|
+
break
|
|
85
|
+
return findings
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _check_mutable_defaults(ctx) -> list[GateFinding]:
|
|
89
|
+
snapshots = ctx.file_snapshots or {}
|
|
90
|
+
findings: list[GateFinding] = []
|
|
91
|
+
for path, snap in snapshots.items():
|
|
92
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
93
|
+
continue
|
|
94
|
+
findings.extend(assess_mutable_defaults(path, snap.text))
|
|
95
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
96
|
+
break
|
|
97
|
+
return findings
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _check_resource_leaks(ctx) -> list[GateFinding]:
|
|
101
|
+
snapshots = ctx.file_snapshots or {}
|
|
102
|
+
findings: list[GateFinding] = []
|
|
103
|
+
for path, snap in snapshots.items():
|
|
104
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
105
|
+
continue
|
|
106
|
+
findings.extend(assess_resource_leaks(path, snap.text))
|
|
107
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
108
|
+
break
|
|
109
|
+
return findings
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _check_docstring_params(ctx) -> list[GateFinding]:
|
|
113
|
+
snapshots = ctx.file_snapshots or {}
|
|
114
|
+
findings: list[GateFinding] = []
|
|
115
|
+
for path, snap in snapshots.items():
|
|
116
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
117
|
+
continue
|
|
118
|
+
findings.extend(assess_docstring_params(path, snap.text))
|
|
119
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
120
|
+
break
|
|
121
|
+
return findings
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _check_broad_catch_no_reraise(ctx) -> list[GateFinding]:
|
|
125
|
+
snapshots = ctx.file_snapshots or {}
|
|
126
|
+
findings: list[GateFinding] = []
|
|
127
|
+
for path, snap in snapshots.items():
|
|
128
|
+
if not is_source_file(path) or not hasattr(snap, "text") or not snap.text:
|
|
129
|
+
continue
|
|
130
|
+
findings.extend(assess_broad_catch_no_reraise(path, snap.text))
|
|
131
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
132
|
+
break
|
|
133
|
+
return findings
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _check_debug_prints(ctx) -> list[GateFinding]:
|
|
137
|
+
snapshots = ctx.file_snapshots or {}
|
|
138
|
+
findings: list[GateFinding] = []
|
|
139
|
+
for path, snap in snapshots.items():
|
|
140
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
141
|
+
continue
|
|
142
|
+
findings.extend(assess_debug_prints(path, snap.text))
|
|
143
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
144
|
+
break
|
|
145
|
+
return findings
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _check_commented_code(ctx) -> list[GateFinding]:
|
|
149
|
+
snapshots = ctx.file_snapshots or {}
|
|
150
|
+
findings: list[GateFinding] = []
|
|
151
|
+
for path, snap in snapshots.items():
|
|
152
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
153
|
+
continue
|
|
154
|
+
findings.extend(assess_commented_code(path, snap.text))
|
|
155
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
156
|
+
break
|
|
157
|
+
return findings
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _check_missing_await(ctx) -> list[GateFinding]:
|
|
161
|
+
snapshots = ctx.file_snapshots or {}
|
|
162
|
+
findings: list[GateFinding] = []
|
|
163
|
+
for path, snap in snapshots.items():
|
|
164
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
165
|
+
continue
|
|
166
|
+
findings.extend(assess_missing_await(path, snap.text))
|
|
167
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
168
|
+
break
|
|
169
|
+
return findings
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _check_unchecked_response(ctx) -> list[GateFinding]:
|
|
173
|
+
snapshots = ctx.file_snapshots or {}
|
|
174
|
+
findings: list[GateFinding] = []
|
|
175
|
+
for path, snap in snapshots.items():
|
|
176
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
177
|
+
continue
|
|
178
|
+
findings.extend(assess_unchecked_response(path, snap.text))
|
|
179
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
180
|
+
break
|
|
181
|
+
return findings
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _check_naive_timezone(ctx) -> list[GateFinding]:
|
|
185
|
+
snapshots = ctx.file_snapshots or {}
|
|
186
|
+
findings: list[GateFinding] = []
|
|
187
|
+
for path, snap in snapshots.items():
|
|
188
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
189
|
+
continue
|
|
190
|
+
findings.extend(assess_naive_timezone(path, snap.text))
|
|
191
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
192
|
+
break
|
|
193
|
+
return findings
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _check_near_duplicate_code(ctx) -> list[GateFinding]:
|
|
197
|
+
snapshots = ctx.file_snapshots or {}
|
|
198
|
+
findings: list[GateFinding] = []
|
|
199
|
+
for path, snap in snapshots.items():
|
|
200
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
201
|
+
continue
|
|
202
|
+
findings.extend(assess_near_duplicate_code(path, snap.text))
|
|
203
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
204
|
+
break
|
|
205
|
+
return findings
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _check_missing_null_check(ctx) -> list[GateFinding]:
|
|
209
|
+
snapshots = ctx.file_snapshots or {}
|
|
210
|
+
findings: list[GateFinding] = []
|
|
211
|
+
for path, snap in snapshots.items():
|
|
212
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
213
|
+
continue
|
|
214
|
+
findings.extend(assess_missing_null_check(path, snap.text))
|
|
215
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
216
|
+
break
|
|
217
|
+
return findings
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _check_path_concatenation(ctx) -> list[GateFinding]:
|
|
221
|
+
snapshots = ctx.file_snapshots or {}
|
|
222
|
+
findings: list[GateFinding] = []
|
|
223
|
+
for path, snap in snapshots.items():
|
|
224
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
225
|
+
continue
|
|
226
|
+
findings.extend(assess_path_concatenation(path, snap.text))
|
|
227
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
228
|
+
break
|
|
229
|
+
return findings
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _check_log_without_context(ctx) -> list[GateFinding]:
|
|
233
|
+
snapshots = ctx.file_snapshots or {}
|
|
234
|
+
findings: list[GateFinding] = []
|
|
235
|
+
for path, snap in snapshots.items():
|
|
236
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
237
|
+
continue
|
|
238
|
+
findings.extend(assess_log_without_context(path, snap.text))
|
|
239
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
240
|
+
break
|
|
241
|
+
return findings
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _check_test_secrets(ctx) -> list[GateFinding]:
|
|
245
|
+
snapshots = ctx.file_snapshots or {}
|
|
246
|
+
findings: list[GateFinding] = []
|
|
247
|
+
for path, snap in snapshots.items():
|
|
248
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
249
|
+
continue
|
|
250
|
+
findings.extend(assess_test_secrets(path, snap.text))
|
|
251
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
252
|
+
break
|
|
253
|
+
return findings
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _check_unpinned_dependencies(ctx) -> list[GateFinding]:
|
|
257
|
+
snapshots = ctx.file_snapshots or {}
|
|
258
|
+
findings: list[GateFinding] = []
|
|
259
|
+
for path, snap in snapshots.items():
|
|
260
|
+
if not hasattr(snap, "text") or not snap.text:
|
|
261
|
+
continue
|
|
262
|
+
findings.extend(assess_unpinned_dependencies(path, snap.text))
|
|
263
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
264
|
+
break
|
|
265
|
+
return findings
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ---------------------------------------------------------------------------
|
|
269
|
+
# Cluster 53: Legacy Compatibility Debt
|
|
270
|
+
# ---------------------------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _check_legacy_compat_debt(ctx) -> list[GateFinding]:
|
|
274
|
+
"""C53: Detect forwarding wrappers, unused shims, stale markers, dead adapters."""
|
|
275
|
+
snapshots = ctx.file_snapshots or {}
|
|
276
|
+
if not snapshots:
|
|
277
|
+
return []
|
|
278
|
+
from ...gate_checks.forensic_clusters.legacy_debt import (
|
|
279
|
+
check_forwarding_wrapper,
|
|
280
|
+
check_unused_shim_module,
|
|
281
|
+
check_stale_migration_marker,
|
|
282
|
+
check_shape_adapter_without_producer,
|
|
283
|
+
build_import_index,
|
|
284
|
+
)
|
|
285
|
+
all_content = {
|
|
286
|
+
path: snap.text
|
|
287
|
+
for path, snap in snapshots.items()
|
|
288
|
+
if hasattr(snap, "text") and snap.text
|
|
289
|
+
}
|
|
290
|
+
# Build the stem -> importers index ONCE in a single O(N) pass. unused_shim
|
|
291
|
+
# then does an O(1) lookup per module instead of rescanning the whole
|
|
292
|
+
# corpus (which made the gate O(N^2) and hung on large monorepos).
|
|
293
|
+
import_index = build_import_index(all_content)
|
|
294
|
+
findings: list[GateFinding] = []
|
|
295
|
+
for path, content in all_content.items():
|
|
296
|
+
findings.extend(check_forwarding_wrapper(path, content))
|
|
297
|
+
findings.extend(check_unused_shim_module(path, content, import_index))
|
|
298
|
+
findings.extend(check_stale_migration_marker(path, content))
|
|
299
|
+
# shape_adapter keeps the corpus dict: its trigger pattern is rare, so
|
|
300
|
+
# the producer scan runs for almost no files (early-returns on no match).
|
|
301
|
+
findings.extend(check_shape_adapter_without_producer(path, content, all_content))
|
|
302
|
+
if len(findings) >= _MAX_FINDINGS_PER_CLUSTER:
|
|
303
|
+
break
|
|
304
|
+
return findings
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# ---------------------------------------------------------------------------
|
|
308
|
+
# Cluster 52: Shared Logic Fragmentation
|
|
309
|
+
# ---------------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _check_shared_logic_fragmentation(ctx) -> list[GateFinding]:
|
|
313
|
+
"""C52: Detect duplicate module proliferation and abstraction bypass."""
|
|
314
|
+
snapshots = ctx.file_snapshots or {}
|
|
315
|
+
if not snapshots:
|
|
316
|
+
return []
|
|
317
|
+
from ...gate_checks.forensic_clusters.structural_quality import assess_shared_logic_fragmentation
|
|
318
|
+
return assess_shared_logic_fragmentation(
|
|
319
|
+
snapshots,
|
|
320
|
+
project_dir=ctx.project_dir,
|
|
321
|
+
source_package_roots=ctx.source_package_roots,
|
|
322
|
+
)
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Core orchestrator: run_forensic_cluster_checks.
|
|
2
|
+
|
|
3
|
+
Imports all _check_* wrappers from sibling modules and runs them via
|
|
4
|
+
_safe_run(), merging results into a single GateCheckResult.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import concurrent.futures
|
|
9
|
+
import logging
|
|
10
|
+
import time
|
|
11
|
+
from typing import List
|
|
12
|
+
|
|
13
|
+
from ...gate_models import GateCategory, GateCheckResult, GateFinding, PostExecGateContext
|
|
14
|
+
from ..common import build_check_result
|
|
15
|
+
|
|
16
|
+
from ._helpers import _safe_run
|
|
17
|
+
from .integrity_checks import (
|
|
18
|
+
_check_config_applied,
|
|
19
|
+
_check_config_general,
|
|
20
|
+
_check_fallback_transparency,
|
|
21
|
+
_check_proxy_as_truth,
|
|
22
|
+
_check_state_divergence,
|
|
23
|
+
_check_success_proof,
|
|
24
|
+
)
|
|
25
|
+
from .quality_checks import (
|
|
26
|
+
_check_dead_code,
|
|
27
|
+
_check_dependency_vulnerabilities,
|
|
28
|
+
_check_edit_consistency,
|
|
29
|
+
_check_embedded_code_syntax,
|
|
30
|
+
_check_encoding_consistency,
|
|
31
|
+
_check_error_message_quality,
|
|
32
|
+
_check_exception_swallowing,
|
|
33
|
+
_check_http_method_consistency,
|
|
34
|
+
_check_import_cycles,
|
|
35
|
+
_check_js_surface_coverage,
|
|
36
|
+
_check_log_level_quality,
|
|
37
|
+
_check_magic_numbers,
|
|
38
|
+
_check_mutation_verified,
|
|
39
|
+
_check_naming_consistency,
|
|
40
|
+
_check_response_shape_drift,
|
|
41
|
+
_check_roundtrip_consistency,
|
|
42
|
+
_check_secrets_in_code,
|
|
43
|
+
_check_security_patterns,
|
|
44
|
+
_check_shared_mutable_state,
|
|
45
|
+
_check_test_quality,
|
|
46
|
+
_check_todo_debt,
|
|
47
|
+
_check_unused_imports,
|
|
48
|
+
)
|
|
49
|
+
from .advanced_checks import (
|
|
50
|
+
_check_boundary_validation,
|
|
51
|
+
_check_broad_catch_no_reraise,
|
|
52
|
+
_check_commented_code,
|
|
53
|
+
_check_debug_prints,
|
|
54
|
+
_check_docstring_params,
|
|
55
|
+
_check_hardcoded_paths,
|
|
56
|
+
_check_legacy_compat_debt,
|
|
57
|
+
_check_log_without_context,
|
|
58
|
+
_check_missing_await,
|
|
59
|
+
_check_missing_null_check,
|
|
60
|
+
_check_mutable_defaults,
|
|
61
|
+
_check_naive_timezone,
|
|
62
|
+
_check_near_duplicate_code,
|
|
63
|
+
_check_path_concatenation,
|
|
64
|
+
_check_resource_leaks,
|
|
65
|
+
_check_shadowed_builtins,
|
|
66
|
+
_check_shared_logic_fragmentation,
|
|
67
|
+
_check_test_secrets,
|
|
68
|
+
_check_unchecked_response,
|
|
69
|
+
_check_unpinned_dependencies,
|
|
70
|
+
_check_unreachable_code,
|
|
71
|
+
)
|
|
72
|
+
_log = logging.getLogger(__name__)
|
|
73
|
+
|
|
74
|
+
_FORENSIC_CLUSTERS_TIMEOUT = 90
|
|
75
|
+
|
|
76
|
+
# Cluster runners that depend on RUNTIME / verification context (artifact_refs,
|
|
77
|
+
# transport_mode, reported-vs-observed changes, validation-contract proofs, or a
|
|
78
|
+
# disk re-read compared against an expected hash). They are meaningful ONLY when
|
|
79
|
+
# the gate runs against a real post-execution context. In *static mode* — where
|
|
80
|
+
# the context is synthesised from on-disk file_snapshots alone (no artifact_refs,
|
|
81
|
+
# empty transport_mode, session_number == 0) — they either trivially self-skip OR
|
|
82
|
+
# emit false positives, so they are filtered out entirely.
|
|
83
|
+
#
|
|
84
|
+
# The worst offender is cluster11_mutation_verified: the runner hashes the
|
|
85
|
+
# *decoded* snapshot text (LF, BOM-stripped) while the assessor hashes the *raw*
|
|
86
|
+
# disk bytes (CRLF / BOM on Windows). On any CRLF or BOM file those hashes differ
|
|
87
|
+
# → a bogus "File content DIVERGED from expected" HIGH on otherwise-clean code.
|
|
88
|
+
# That check exists to catch "edit applied in memory but not on disk" and has no
|
|
89
|
+
# meaning when the snapshot WAS read from disk, as it is in static mode.
|
|
90
|
+
_RUNTIME_ONLY_CLUSTERS: frozenset[str] = frozenset({
|
|
91
|
+
"cluster2_success_without_proof", # needs session_number + artifact_refs
|
|
92
|
+
"cluster4_config_accepted_ignored_proofs", # needs validation_contract proofs + artifact_refs
|
|
93
|
+
"cluster6_state_divergence", # reported vs observed changed files
|
|
94
|
+
"cluster7_fallback_hides_truth", # transport_mode + artifact_refs
|
|
95
|
+
"cluster3_proxy_as_truth", # transport_mode + artifact_refs
|
|
96
|
+
"cluster4_config_accepted_ignored_general", # transport_mode / project_mode / task_intent
|
|
97
|
+
"cluster10_edit_consistency", # operator_api runtime-edit consistency
|
|
98
|
+
"cluster11_mutation_verified", # disk re-read vs expected hash → CRLF/BOM FP
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _is_static_mode(ctx: PostExecGateContext) -> bool:
|
|
103
|
+
"""True when *ctx* carries no runtime/verification context.
|
|
104
|
+
|
|
105
|
+
Static mode is how ``run_forensic_audit`` invokes the pack: the context is
|
|
106
|
+
built from on-disk ``file_snapshots`` only (see
|
|
107
|
+
``self_audit.build_synthetic_context``) with no ``artifact_refs``, empty
|
|
108
|
+
``transport_mode`` and ``session_number == 0``. When a real post-execution
|
|
109
|
+
context is present (any of those populated) the full pack runs unchanged.
|
|
110
|
+
"""
|
|
111
|
+
has_artifacts = bool(getattr(ctx, "artifact_refs", None))
|
|
112
|
+
has_transport = bool(getattr(ctx, "transport_mode", "") or "")
|
|
113
|
+
has_session = bool(getattr(ctx, "session_number", 0))
|
|
114
|
+
return not (has_artifacts or has_transport or has_session)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def run_forensic_cluster_checks(ctx: PostExecGateContext) -> GateCheckResult:
|
|
118
|
+
"""Run universal forensic cluster checks against the real gate context.
|
|
119
|
+
|
|
120
|
+
Universal (project-agnostic) integrity clusters:
|
|
121
|
+
- C2: Success Without Proof (artifact_refs check)
|
|
122
|
+
- C3: Proxy as Truth (remote truth labeling)
|
|
123
|
+
- C4: Config Accepted Ignored (proofs + transport + classification)
|
|
124
|
+
- C6: State Divergence (reported vs observed files)
|
|
125
|
+
- C7: Fallback Hides Truth (remote mode without proof)
|
|
126
|
+
|
|
127
|
+
(C1 declared/C5 rendered/C8 dead-surface/C9 phantom were Vigil-specific and
|
|
128
|
+
removed -- they depended on INTERFACE.operator / INTERFACE.UI modules.)
|
|
129
|
+
"""
|
|
130
|
+
all_findings: List[GateFinding] = []
|
|
131
|
+
error_notes: List[str] = []
|
|
132
|
+
|
|
133
|
+
def _run_all_checks() -> tuple[List[GateFinding], List[str]]:
|
|
134
|
+
_results: List[GateFinding] = []
|
|
135
|
+
_notes: List[str] = []
|
|
136
|
+
_start = time.monotonic()
|
|
137
|
+
|
|
138
|
+
_checks = [
|
|
139
|
+
# Phase 1 runners (existing)
|
|
140
|
+
("cluster2_success_without_proof", lambda: _check_success_proof(ctx)),
|
|
141
|
+
("cluster4_config_accepted_ignored_proofs", lambda: _check_config_applied(ctx)),
|
|
142
|
+
("cluster6_state_divergence", lambda: _check_state_divergence(ctx)),
|
|
143
|
+
("cluster7_fallback_hides_truth", lambda: _check_fallback_transparency(ctx)),
|
|
144
|
+
# clusters 1/5/8/9 (declared_capability, rendered_vs_live, dead_surface,
|
|
145
|
+
# phantom_capability) were REMOVED: they hardcoded Vigil's INTERFACE.operator
|
|
146
|
+
# / INTERFACE.UI modules and only ever produced false findings (ImportError /
|
|
147
|
+
# empty) on any non-Vigil project. Gone for a clean standalone package.
|
|
148
|
+
# Phase 3 runners (new)
|
|
149
|
+
("cluster3_proxy_as_truth", lambda: _check_proxy_as_truth(ctx)),
|
|
150
|
+
("cluster4_config_accepted_ignored_general", lambda: _check_config_general(ctx)),
|
|
151
|
+
# Phase 4 runners
|
|
152
|
+
("cluster10_edit_consistency", lambda: _check_edit_consistency(ctx)),
|
|
153
|
+
("cluster11_mutation_verified", lambda: _check_mutation_verified(ctx)),
|
|
154
|
+
# Phase 5 runners (universal clusters)
|
|
155
|
+
("cluster12_security", lambda: _check_security_patterns(ctx)),
|
|
156
|
+
("cluster13_test_quality", lambda: _check_test_quality(ctx)),
|
|
157
|
+
("cluster14_import_cycles", lambda: _check_import_cycles(ctx)),
|
|
158
|
+
("cluster15_roundtrip", lambda: _check_roundtrip_consistency(ctx)),
|
|
159
|
+
("cluster16_mutable_state", lambda: _check_shared_mutable_state(ctx)),
|
|
160
|
+
# Phase 6 runners (security + code quality)
|
|
161
|
+
("cluster17_dependency_cves", lambda: _check_dependency_vulnerabilities(ctx)),
|
|
162
|
+
("cluster20_dead_code", lambda: _check_dead_code(ctx)),
|
|
163
|
+
("cluster23_unused_imports", lambda: _check_unused_imports(ctx)),
|
|
164
|
+
("cluster25_secrets", lambda: _check_secrets_in_code(ctx)),
|
|
165
|
+
# Phase 7 runners (code style + encoding)
|
|
166
|
+
("cluster21_magic_numbers", lambda: _check_magic_numbers(ctx)),
|
|
167
|
+
("cluster22_error_messages", lambda: _check_error_message_quality(ctx)),
|
|
168
|
+
("cluster24_naming", lambda: _check_naming_consistency(ctx)),
|
|
169
|
+
("cluster26_todo_debt", lambda: _check_todo_debt(ctx)),
|
|
170
|
+
("cluster28_log_levels", lambda: _check_log_level_quality(ctx)),
|
|
171
|
+
("cluster29_encoding", lambda: _check_encoding_consistency(ctx)),
|
|
172
|
+
# Phase 8 runners (JS/API contract drift)
|
|
173
|
+
("cluster27_embedded_syntax", lambda: _check_embedded_code_syntax(ctx)),
|
|
174
|
+
("cluster28b_response_shape", lambda: _check_response_shape_drift(ctx)),
|
|
175
|
+
("cluster29b_method_consistency", lambda: _check_http_method_consistency(ctx)),
|
|
176
|
+
("cluster30_js_coverage", lambda: _check_js_surface_coverage(ctx)),
|
|
177
|
+
# Phase 9 runners (fail-loud, portability, security boundaries)
|
|
178
|
+
("cluster31_exception_swallowing", lambda: _check_exception_swallowing(ctx)),
|
|
179
|
+
("cluster32_hardcoded_paths", lambda: _check_hardcoded_paths(ctx)),
|
|
180
|
+
("cluster33_boundary_validation", lambda: _check_boundary_validation(ctx)),
|
|
181
|
+
# Phase 10 runners (deep code quality + language-agnostic)
|
|
182
|
+
("cluster34_unreachable_code", lambda: _check_unreachable_code(ctx)),
|
|
183
|
+
("cluster35_shadowed_builtins", lambda: _check_shadowed_builtins(ctx)),
|
|
184
|
+
("cluster36_mutable_defaults", lambda: _check_mutable_defaults(ctx)),
|
|
185
|
+
("cluster37_resource_leaks", lambda: _check_resource_leaks(ctx)),
|
|
186
|
+
("cluster38_docstring_drift", lambda: _check_docstring_params(ctx)),
|
|
187
|
+
("cluster39_broad_catch", lambda: _check_broad_catch_no_reraise(ctx)),
|
|
188
|
+
("cluster40_debug_prints", lambda: _check_debug_prints(ctx)),
|
|
189
|
+
("cluster41_commented_code", lambda: _check_commented_code(ctx)),
|
|
190
|
+
# Phase 11 runners (async, API safety, dependencies)
|
|
191
|
+
("cluster42_missing_await", lambda: _check_missing_await(ctx)),
|
|
192
|
+
("cluster43_unchecked_response", lambda: _check_unchecked_response(ctx)),
|
|
193
|
+
("cluster44_naive_timezone", lambda: _check_naive_timezone(ctx)),
|
|
194
|
+
("cluster45_near_duplicate", lambda: _check_near_duplicate_code(ctx)),
|
|
195
|
+
("cluster46_null_check", lambda: _check_missing_null_check(ctx)),
|
|
196
|
+
("cluster47_path_concat", lambda: _check_path_concatenation(ctx)),
|
|
197
|
+
("cluster48_log_context", lambda: _check_log_without_context(ctx)),
|
|
198
|
+
("cluster49_test_secrets", lambda: _check_test_secrets(ctx)),
|
|
199
|
+
("cluster50_unpinned_deps", lambda: _check_unpinned_dependencies(ctx)),
|
|
200
|
+
# Phase 12 runners (C52: structural quality)
|
|
201
|
+
("cluster52_shared_logic_fragmentation", lambda: _check_shared_logic_fragmentation(ctx)),
|
|
202
|
+
# Phase 13 runners (C53: legacy compatibility debt)
|
|
203
|
+
("cluster53_legacy_compat_debt", lambda: _check_legacy_compat_debt(ctx)),
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
# In static mode (no runtime/verification context) drop the runtime-only
|
|
207
|
+
# clusters so the static-safe subset (security, secrets, mutable defaults,
|
|
208
|
+
# resource leaks, dead code, …) still runs without emitting runtime FPs.
|
|
209
|
+
if _is_static_mode(ctx):
|
|
210
|
+
_dropped = [lbl for lbl, _ in _checks if lbl in _RUNTIME_ONLY_CLUSTERS]
|
|
211
|
+
_checks = [(lbl, fn) for lbl, fn in _checks if lbl not in _RUNTIME_ONLY_CLUSTERS]
|
|
212
|
+
if _dropped:
|
|
213
|
+
_log.debug(
|
|
214
|
+
"forensic_clusters: static mode — skipped %d runtime-only "
|
|
215
|
+
"cluster(s): %s", len(_dropped), ", ".join(_dropped),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
from vigil_forensic.self_audit import get_cancel_event
|
|
219
|
+
for label, fn in _checks:
|
|
220
|
+
_cancel = get_cancel_event()
|
|
221
|
+
if _cancel is not None and _cancel.is_set():
|
|
222
|
+
_log.info("forensic_clusters: cancel_event set, stopping at %s", label)
|
|
223
|
+
break
|
|
224
|
+
_elapsed = time.monotonic() - _start
|
|
225
|
+
if _elapsed > _FORENSIC_CLUSTERS_TIMEOUT - 10: # 10s safety margin
|
|
226
|
+
_log.warning(
|
|
227
|
+
"post_exec_gate: forensic_clusters timeout threshold approaching "
|
|
228
|
+
"(%.1fs / %ds), stopping early",
|
|
229
|
+
_elapsed, _FORENSIC_CLUSTERS_TIMEOUT,
|
|
230
|
+
)
|
|
231
|
+
break
|
|
232
|
+
_safe_run(label, fn, _results, _notes)
|
|
233
|
+
|
|
234
|
+
return _results, _notes
|
|
235
|
+
|
|
236
|
+
executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
|
237
|
+
try:
|
|
238
|
+
all_findings, error_notes = executor.submit(_run_all_checks).result(
|
|
239
|
+
timeout=_FORENSIC_CLUSTERS_TIMEOUT
|
|
240
|
+
)
|
|
241
|
+
except concurrent.futures.TimeoutError:
|
|
242
|
+
_log.error(
|
|
243
|
+
"post_exec_gate: forensic_clusters total execution timeout (%ds reached)",
|
|
244
|
+
_FORENSIC_CLUSTERS_TIMEOUT,
|
|
245
|
+
)
|
|
246
|
+
all_findings = []
|
|
247
|
+
error_notes = []
|
|
248
|
+
finally:
|
|
249
|
+
executor.shutdown(wait=False)
|
|
250
|
+
|
|
251
|
+
# Apply false positive allowlist with revalidation
|
|
252
|
+
from ..forensic_clusters import load_allowlist, filter_by_allowlist
|
|
253
|
+
try:
|
|
254
|
+
allowlist = load_allowlist(ctx.project_dir)
|
|
255
|
+
if allowlist:
|
|
256
|
+
all_findings, filtered, revalidation_notes = filter_by_allowlist(
|
|
257
|
+
all_findings, allowlist, project_dir=ctx.project_dir,
|
|
258
|
+
)
|
|
259
|
+
error_notes.extend(revalidation_notes)
|
|
260
|
+
if filtered:
|
|
261
|
+
error_notes.append(
|
|
262
|
+
f"[allowlist] Filtered {len(filtered)} finding(s) via false_positive_allowlist.json "
|
|
263
|
+
f"({sum(1 for e in allowlist if e.is_valid())} valid entries)"
|
|
264
|
+
)
|
|
265
|
+
except (OSError, KeyError, ValueError, TypeError):
|
|
266
|
+
pass # allowlist failure must not block forensics
|
|
267
|
+
|
|
268
|
+
return build_check_result(
|
|
269
|
+
check_id="forensic_clusters",
|
|
270
|
+
category=GateCategory.CONTRACT,
|
|
271
|
+
findings=all_findings,
|
|
272
|
+
notes=error_notes,
|
|
273
|
+
)
|