vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""C52: Shared Logic Fragmentation / Duplicate Module Proliferation.
|
|
2
|
+
|
|
3
|
+
Four patterns detected on the touched-file set:
|
|
4
|
+
P1 — Abstraction Bypass: new shared-family file that doesn't import the canonical
|
|
5
|
+
P2 — Provider Parallel Flow: 2+ provider files share flow markers without a common base
|
|
6
|
+
P3 — Responsibility Family Proliferation: 3+ touched files share (dir, responsibility_suffix)
|
|
7
|
+
P4 — Generic Shared Fork: new generic-stem file has a same-stem sibling it ignores
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from ...gate_models import EvidenceReference, GateCategory, GateImpact, GateSeverity, GateFileSnapshot, RepairKind
|
|
16
|
+
from ..common import build_finding, normalize_path
|
|
17
|
+
import logging
|
|
18
|
+
_log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Suffixes suggesting "provider-like" roles
|
|
21
|
+
_PROVIDER_SUFFIXES = frozenset({
|
|
22
|
+
"_provider", "_adapter", "_service", "_client", "_backend", "_engine",
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
# Responsibility suffixes that should stay consolidated, not proliferated
|
|
26
|
+
_RESPONSIBILITY_SUFFIXES = frozenset({
|
|
27
|
+
"_checks", "_check", "_utils", "_helpers", "_handlers",
|
|
28
|
+
"_runners", "_providers", "_adapters", "_validators",
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _stem(path: str) -> str:
|
|
33
|
+
"""Filename without extension, lowercased."""
|
|
34
|
+
basename = os.path.basename(path)
|
|
35
|
+
return basename.rsplit(".", 1)[0].lower() if "." in basename else basename.lower()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _parent_dir(path: str) -> str:
|
|
39
|
+
return os.path.dirname(normalize_path(path))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _has_import_from(text: str, module_stem: str) -> bool:
|
|
43
|
+
"""True if *text* contains an import statement referencing *module_stem*."""
|
|
44
|
+
pattern = re.compile(
|
|
45
|
+
r"(?:from|import)\s+[\w.]*\b" + re.escape(module_stem) + r"\b",
|
|
46
|
+
re.MULTILINE,
|
|
47
|
+
)
|
|
48
|
+
return bool(pattern.search(text))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _flow_markers_found(text: str, patterns: tuple[str, ...]) -> frozenset[str]:
|
|
52
|
+
"""Return the subset of flow marker regex patterns that match *text*."""
|
|
53
|
+
found: set[str] = set()
|
|
54
|
+
for pat in patterns:
|
|
55
|
+
try:
|
|
56
|
+
if re.search(pat, text, re.MULTILINE):
|
|
57
|
+
found.add(pat)
|
|
58
|
+
except re.error:
|
|
59
|
+
pass
|
|
60
|
+
return frozenset(found)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def assess_shared_logic_fragmentation(
|
|
64
|
+
snapshots: dict[str, GateFileSnapshot],
|
|
65
|
+
*,
|
|
66
|
+
project_dir: Path,
|
|
67
|
+
source_package_roots: tuple[str, ...],
|
|
68
|
+
) -> list:
|
|
69
|
+
findings: list = []
|
|
70
|
+
findings.extend(_pattern1_abstraction_bypass(snapshots, project_dir, source_package_roots))
|
|
71
|
+
findings.extend(_pattern2_provider_parallel_flow(snapshots))
|
|
72
|
+
findings.extend(_pattern3_responsibility_proliferation(snapshots))
|
|
73
|
+
findings.extend(_pattern4_generic_shared_fork(snapshots, project_dir, source_package_roots))
|
|
74
|
+
return findings
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
# P1 — Abstraction Bypass
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
def _pattern1_abstraction_bypass(
|
|
82
|
+
snapshots: dict[str, GateFileSnapshot],
|
|
83
|
+
project_dir: Path,
|
|
84
|
+
source_package_roots: tuple[str, ...],
|
|
85
|
+
) -> list:
|
|
86
|
+
from ...source_analysis import get_shared_families
|
|
87
|
+
findings = []
|
|
88
|
+
for path, snap in snapshots.items():
|
|
89
|
+
if not snap.exists or not snap.text.strip():
|
|
90
|
+
continue
|
|
91
|
+
file_stem = _stem(path)
|
|
92
|
+
families = get_shared_families(path)
|
|
93
|
+
if file_stem not in families:
|
|
94
|
+
continue
|
|
95
|
+
# Look for a canonical file with the same stem in any package root
|
|
96
|
+
canonical_candidates: list[str] = []
|
|
97
|
+
for root_name in source_package_roots:
|
|
98
|
+
root = project_dir / root_name
|
|
99
|
+
if not root.is_dir():
|
|
100
|
+
continue
|
|
101
|
+
for existing in root.rglob(f"*{file_stem}*"):
|
|
102
|
+
if not existing.is_file():
|
|
103
|
+
continue
|
|
104
|
+
rel = str(existing.relative_to(project_dir)).replace("\\", "/")
|
|
105
|
+
if normalize_path(rel) == normalize_path(path):
|
|
106
|
+
continue
|
|
107
|
+
if _stem(rel) == file_stem:
|
|
108
|
+
canonical_candidates.append(rel)
|
|
109
|
+
if not canonical_candidates:
|
|
110
|
+
continue
|
|
111
|
+
canonical = canonical_candidates[0]
|
|
112
|
+
if not _has_import_from(snap.text, file_stem):
|
|
113
|
+
findings.append(build_finding(
|
|
114
|
+
check_id="c52.abstraction_bypass",
|
|
115
|
+
category=GateCategory.DUPLICATION,
|
|
116
|
+
title=f"Shared-family file {path!r} ignores canonical {canonical!r}",
|
|
117
|
+
severity=GateSeverity.MEDIUM,
|
|
118
|
+
impact=GateImpact.REVISE,
|
|
119
|
+
summary=(
|
|
120
|
+
f"{path} has stem {file_stem!r} (shared family) but does not import "
|
|
121
|
+
f"from the existing canonical {canonical}. "
|
|
122
|
+
"Logic likely duplicated rather than extended."
|
|
123
|
+
),
|
|
124
|
+
recommendation=f"Import from {canonical} and extend it instead of creating a parallel copy.",
|
|
125
|
+
evidence=[
|
|
126
|
+
EvidenceReference(kind="file", path=path, detail="new"),
|
|
127
|
+
EvidenceReference(kind="file", path=canonical, detail="canonical"),
|
|
128
|
+
],
|
|
129
|
+
repair_kind=RepairKind.EDIT_CANONICAL.value,
|
|
130
|
+
executor_action=f"Import from {canonical} in {path}; merge any new logic there instead",
|
|
131
|
+
proof_required="new file imports canonical; no duplicate logic",
|
|
132
|
+
allowlist_allowed=False,
|
|
133
|
+
))
|
|
134
|
+
return findings
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# P2 — Provider Parallel Flow
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
def _pattern2_provider_parallel_flow(snapshots: dict[str, GateFileSnapshot]) -> list:
|
|
142
|
+
from ...source_analysis import get_flow_markers
|
|
143
|
+
findings = []
|
|
144
|
+
provider_files = [
|
|
145
|
+
(path, snap)
|
|
146
|
+
for path, snap in snapshots.items()
|
|
147
|
+
if snap.exists and any(path.replace("\\", "/").endswith(suf + ext)
|
|
148
|
+
for suf in _PROVIDER_SUFFIXES
|
|
149
|
+
for ext in (".py", ".ts", ".js"))
|
|
150
|
+
]
|
|
151
|
+
if len(provider_files) < 2:
|
|
152
|
+
return findings
|
|
153
|
+
# Compare each pair
|
|
154
|
+
seen: set[tuple[str, str]] = set()
|
|
155
|
+
for i, (path_a, snap_a) in enumerate(provider_files):
|
|
156
|
+
markers_a = get_flow_markers(path_a)
|
|
157
|
+
found_a = _flow_markers_found(snap_a.text, markers_a)
|
|
158
|
+
for path_b, snap_b in provider_files[i + 1:]:
|
|
159
|
+
pair = (min(path_a, path_b), max(path_a, path_b))
|
|
160
|
+
if pair in seen:
|
|
161
|
+
continue
|
|
162
|
+
seen.add(pair)
|
|
163
|
+
markers_b = get_flow_markers(path_b)
|
|
164
|
+
found_b = _flow_markers_found(snap_b.text, markers_b)
|
|
165
|
+
shared = found_a & found_b
|
|
166
|
+
if len(shared) < 3:
|
|
167
|
+
continue
|
|
168
|
+
if _has_import_from(snap_a.text, _stem(path_b)) or _has_import_from(snap_b.text, _stem(path_a)):
|
|
169
|
+
continue # already connected
|
|
170
|
+
findings.append(build_finding(
|
|
171
|
+
check_id="c52.provider_parallel_flow",
|
|
172
|
+
category=GateCategory.DUPLICATION,
|
|
173
|
+
title=f"Provider files share {len(shared)} flow steps without a common base",
|
|
174
|
+
severity=GateSeverity.MEDIUM,
|
|
175
|
+
impact=GateImpact.REVISE,
|
|
176
|
+
summary=(
|
|
177
|
+
f"{path_a} and {path_b} both implement {len(shared)} shared flow steps "
|
|
178
|
+
f"with no common imported base. Extract the shared flow into a base class or mixin."
|
|
179
|
+
),
|
|
180
|
+
recommendation=(
|
|
181
|
+
"Extract the shared flow steps into a common base module. "
|
|
182
|
+
"Both providers should import from it."
|
|
183
|
+
),
|
|
184
|
+
evidence=[
|
|
185
|
+
EvidenceReference(kind="file", path=path_a),
|
|
186
|
+
EvidenceReference(kind="file", path=path_b),
|
|
187
|
+
],
|
|
188
|
+
repair_kind=RepairKind.EXTRACT_SHARED.value,
|
|
189
|
+
executor_action=f"Extract shared flow steps from {path_a} and {path_b} into a common base; both import it",
|
|
190
|
+
proof_required="shared base exists; both providers import it; tests pass",
|
|
191
|
+
))
|
|
192
|
+
return findings
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
# P3 — Responsibility Family Proliferation
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
def _pattern3_responsibility_proliferation(snapshots: dict[str, GateFileSnapshot]) -> list:
|
|
200
|
+
findings = []
|
|
201
|
+
groups: dict[tuple[str, str], list[str]] = {}
|
|
202
|
+
for path in snapshots:
|
|
203
|
+
if not snapshots[path].exists:
|
|
204
|
+
continue
|
|
205
|
+
file_stem = _stem(path)
|
|
206
|
+
parent = _parent_dir(path)
|
|
207
|
+
for suf in _RESPONSIBILITY_SUFFIXES:
|
|
208
|
+
if file_stem.endswith(suf):
|
|
209
|
+
groups.setdefault((parent, suf), []).append(path)
|
|
210
|
+
break
|
|
211
|
+
for (parent, suf), paths in groups.items():
|
|
212
|
+
if len(paths) < 3:
|
|
213
|
+
continue
|
|
214
|
+
findings.append(build_finding(
|
|
215
|
+
check_id="c52.responsibility_proliferation",
|
|
216
|
+
category=GateCategory.DUPLICATION,
|
|
217
|
+
title=f"{len(paths)} {suf!r}-suffixed files in {parent or '.'!r}",
|
|
218
|
+
severity=GateSeverity.MEDIUM,
|
|
219
|
+
impact=GateImpact.REVISE,
|
|
220
|
+
summary=(
|
|
221
|
+
f"{len(paths)} files with suffix {suf!r} in {parent or '.'!r}: "
|
|
222
|
+
f"{', '.join(sorted(paths)[:5])}. "
|
|
223
|
+
"This many responsibility-scoped files in one directory suggests fragmentation."
|
|
224
|
+
),
|
|
225
|
+
recommendation=(
|
|
226
|
+
f"Consider consolidating {suf}-suffixed logic into fewer modules. "
|
|
227
|
+
"If the separation is intentional, document the boundary clearly."
|
|
228
|
+
),
|
|
229
|
+
evidence=[EvidenceReference(kind="file", path=p) for p in sorted(paths)[:5]],
|
|
230
|
+
repair_kind=RepairKind.CONSOLIDATE.value,
|
|
231
|
+
executor_action=f"Consolidate {len(paths)} {suf!r} files in {parent or '.'}; merge related logic into fewer modules",
|
|
232
|
+
))
|
|
233
|
+
return findings
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
# P4 — Generic Shared Fork
|
|
238
|
+
# ---------------------------------------------------------------------------
|
|
239
|
+
|
|
240
|
+
def _pattern4_generic_shared_fork(
|
|
241
|
+
snapshots: dict[str, GateFileSnapshot],
|
|
242
|
+
project_dir: Path,
|
|
243
|
+
source_package_roots: tuple[str, ...],
|
|
244
|
+
) -> list:
|
|
245
|
+
from ...source_analysis import get_generic_stems
|
|
246
|
+
findings = []
|
|
247
|
+
for path, snap in snapshots.items():
|
|
248
|
+
if not snap.exists or not snap.text.strip():
|
|
249
|
+
continue
|
|
250
|
+
file_stem = _stem(path)
|
|
251
|
+
stems = get_generic_stems(path)
|
|
252
|
+
if file_stem not in stems:
|
|
253
|
+
continue
|
|
254
|
+
# Look for siblings with the same stem in different dirs
|
|
255
|
+
siblings: list[str] = []
|
|
256
|
+
for root_name in source_package_roots:
|
|
257
|
+
root = project_dir / root_name
|
|
258
|
+
if not root.is_dir():
|
|
259
|
+
continue
|
|
260
|
+
for existing in root.rglob(f"{file_stem}.*"):
|
|
261
|
+
if not existing.is_file():
|
|
262
|
+
continue
|
|
263
|
+
rel = str(existing.relative_to(project_dir)).replace("\\", "/")
|
|
264
|
+
if normalize_path(rel) == normalize_path(path):
|
|
265
|
+
continue
|
|
266
|
+
if _stem(rel) == file_stem and _parent_dir(rel) != _parent_dir(path):
|
|
267
|
+
siblings.append(rel)
|
|
268
|
+
if not siblings:
|
|
269
|
+
continue
|
|
270
|
+
# Check for non-trivial code overlap via length comparison (heuristic)
|
|
271
|
+
try:
|
|
272
|
+
sib_text = (project_dir / siblings[0]).read_text(encoding="utf-8", errors="replace")
|
|
273
|
+
except OSError:
|
|
274
|
+
continue
|
|
275
|
+
min_len = min(len(snap.text), len(sib_text))
|
|
276
|
+
if min_len < 200:
|
|
277
|
+
continue # too small to flag
|
|
278
|
+
findings.append(build_finding(
|
|
279
|
+
check_id="c52.generic_shared_fork",
|
|
280
|
+
category=GateCategory.DUPLICATION,
|
|
281
|
+
title=f"Generic-stem file {path!r} forks {siblings[0]!r}",
|
|
282
|
+
severity=GateSeverity.MEDIUM,
|
|
283
|
+
impact=GateImpact.REVISE,
|
|
284
|
+
summary=(
|
|
285
|
+
f"Touched file {path} has generic stem {file_stem!r} and a same-stem sibling "
|
|
286
|
+
f"at {siblings[0]}. If they serve the same purpose, merge into one canonical module."
|
|
287
|
+
),
|
|
288
|
+
recommendation=f"Verify {path} vs {siblings[0]}. If same purpose — consolidate; if different purpose — rename to be semantically distinct.",
|
|
289
|
+
evidence=[
|
|
290
|
+
EvidenceReference(kind="file", path=path, detail="new"),
|
|
291
|
+
EvidenceReference(kind="file", path=siblings[0], detail="sibling"),
|
|
292
|
+
],
|
|
293
|
+
repair_kind=RepairKind.EDIT_CANONICAL.value,
|
|
294
|
+
executor_action=f"Merge {path} into {siblings[0]} or rename {path} with a specific semantic name",
|
|
295
|
+
proof_required="single canonical generic module; or both have distinct semantic names",
|
|
296
|
+
allowlist_allowed=False,
|
|
297
|
+
))
|
|
298
|
+
return findings
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Gate G.6 -- god_object_zones: detects responsibility-zone inflation in Python files.
|
|
2
|
+
|
|
3
|
+
A file is flagged when it exposes >=3 distinct function-name zones (everything
|
|
4
|
+
before the first underscore in the name maps to a zone in KNOWN_ZONES) AND the
|
|
5
|
+
file is at least MIN_FILE_LINES lines long. Compact utility modules with fewer
|
|
6
|
+
than MIN_FILE_LINES lines are excluded to reduce false positives.
|
|
7
|
+
|
|
8
|
+
Fail-open: SyntaxError or missing file -> no finding, no exception.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import ast
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
16
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
17
|
+
from vigil_forensic.source_analysis import is_source_file
|
|
18
|
+
from .common import build_check_result, build_finding, is_generated_file, normalize_path
|
|
19
|
+
|
|
20
|
+
_log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Minimum file length (lines) required before a zone-count finding is emitted.
|
|
23
|
+
MIN_FILE_LINES: int = 150
|
|
24
|
+
|
|
25
|
+
# Minimum number of distinct zones that must be present to trigger a finding.
|
|
26
|
+
MIN_ZONE_COUNT: int = 3
|
|
27
|
+
|
|
28
|
+
# Canonical set of concern-indicating prefixes.
|
|
29
|
+
KNOWN_ZONES: frozenset[str] = frozenset({
|
|
30
|
+
"write",
|
|
31
|
+
"save",
|
|
32
|
+
"read",
|
|
33
|
+
"load",
|
|
34
|
+
"build",
|
|
35
|
+
"compute",
|
|
36
|
+
"render",
|
|
37
|
+
"parse",
|
|
38
|
+
"dispatch",
|
|
39
|
+
"handle",
|
|
40
|
+
"validate",
|
|
41
|
+
"run",
|
|
42
|
+
"start",
|
|
43
|
+
"stop",
|
|
44
|
+
"close",
|
|
45
|
+
"open",
|
|
46
|
+
"fetch",
|
|
47
|
+
"send",
|
|
48
|
+
"commit",
|
|
49
|
+
"rollback",
|
|
50
|
+
"acquire",
|
|
51
|
+
"release",
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _extract_zone(name: str) -> str | None:
|
|
56
|
+
"""Return the zone prefix for a function name, or None if not in KNOWN_ZONES.
|
|
57
|
+
|
|
58
|
+
Rules:
|
|
59
|
+
- Strip a leading underscore before extracting (``_compute_hash`` -> ``compute``).
|
|
60
|
+
- Take everything before the first ``_`` that occurs after position 0 of the
|
|
61
|
+
(possibly stripped) name.
|
|
62
|
+
- Return the prefix only when it belongs to KNOWN_ZONES.
|
|
63
|
+
"""
|
|
64
|
+
stripped = name.lstrip("_")
|
|
65
|
+
if not stripped:
|
|
66
|
+
return None
|
|
67
|
+
idx = stripped.find("_")
|
|
68
|
+
prefix = stripped[:idx] if idx > 0 else stripped
|
|
69
|
+
return prefix if prefix in KNOWN_ZONES else None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _collect_zones(text: str) -> set[str]:
|
|
73
|
+
"""Parse *text* as Python source and return the set of known zone prefixes found.
|
|
74
|
+
|
|
75
|
+
Returns an empty set on SyntaxError (fail-open).
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
tree = ast.parse(text)
|
|
79
|
+
except SyntaxError:
|
|
80
|
+
return set()
|
|
81
|
+
|
|
82
|
+
zones: set[str] = set()
|
|
83
|
+
for node in ast.walk(tree):
|
|
84
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
85
|
+
continue
|
|
86
|
+
zone = _extract_zone(node.name)
|
|
87
|
+
if zone is not None:
|
|
88
|
+
zones.add(zone)
|
|
89
|
+
return zones
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def run_god_object_zones_checks(ctx: PostExecGateContext):
|
|
93
|
+
"""Check changed .py files for responsibility-zone inflation."""
|
|
94
|
+
findings = []
|
|
95
|
+
|
|
96
|
+
for raw_path in ctx.changed_files_observed:
|
|
97
|
+
normalized = normalize_path(raw_path)
|
|
98
|
+
if not is_source_file(normalized):
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
abs_path = ctx.project_dir / normalized
|
|
102
|
+
try:
|
|
103
|
+
text = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
104
|
+
except OSError as exc:
|
|
105
|
+
_log.debug("god_object_zones: cannot read %s: %s", normalized, exc)
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
# F16d: skip auto-generated files and sanctioned asset bundles.
|
|
109
|
+
if is_generated_file(text):
|
|
110
|
+
_log.debug(
|
|
111
|
+
"god_object_zones: skipping generated/sanctioned file %s",
|
|
112
|
+
normalized,
|
|
113
|
+
)
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
line_count = len(text.splitlines())
|
|
117
|
+
if line_count < MIN_FILE_LINES:
|
|
118
|
+
_log.debug(
|
|
119
|
+
"god_object_zones: skipping %s (%d lines < %d threshold)",
|
|
120
|
+
normalized,
|
|
121
|
+
line_count,
|
|
122
|
+
MIN_FILE_LINES,
|
|
123
|
+
)
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
zones = _collect_zones(text)
|
|
127
|
+
if len(zones) < MIN_ZONE_COUNT:
|
|
128
|
+
_log.debug(
|
|
129
|
+
"god_object_zones: %s has %d zone(s) -- below threshold",
|
|
130
|
+
normalized,
|
|
131
|
+
len(zones),
|
|
132
|
+
)
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
sorted_zones = sorted(zones)
|
|
136
|
+
_log.info(
|
|
137
|
+
"god_object_zones: %s triggers with zones %s",
|
|
138
|
+
normalized,
|
|
139
|
+
sorted_zones,
|
|
140
|
+
)
|
|
141
|
+
findings.append(
|
|
142
|
+
build_finding(
|
|
143
|
+
check_id="god_object_zones.zone_inflation",
|
|
144
|
+
category=GateCategory.DRIFT,
|
|
145
|
+
title="File owns multiple responsibility zones",
|
|
146
|
+
severity=GateSeverity.MEDIUM,
|
|
147
|
+
impact=GateImpact.REVISE,
|
|
148
|
+
summary=(
|
|
149
|
+
f"{normalized} ({line_count} lines) exposes "
|
|
150
|
+
f"{len(zones)} distinct zones: {sorted_zones}. "
|
|
151
|
+
f"Split into focused modules or extract shared helpers."
|
|
152
|
+
),
|
|
153
|
+
recommendation=(
|
|
154
|
+
"Split this file by responsibility zone. "
|
|
155
|
+
"If zones share common helpers — move them to `<package>/shared.py` or `<package>/utils.py` "
|
|
156
|
+
"and import from there. "
|
|
157
|
+
"If zones are unrelated — move each into its own domain module. "
|
|
158
|
+
"A single module should own exactly one concern."
|
|
159
|
+
),
|
|
160
|
+
evidence=[EvidenceReference(kind="file", path=normalized)],
|
|
161
|
+
|
|
162
|
+
repair_kind='split_module',
|
|
163
|
+
executor_action='Split file into modules',
|
|
164
|
+
proof_required='Below complexity threshold',
|
|
165
|
+
allowlist_allowed=False,
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return build_check_result(
|
|
170
|
+
check_id="god_object_zones",
|
|
171
|
+
category=GateCategory.DRIFT,
|
|
172
|
+
findings=findings,
|
|
173
|
+
)
|