vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
7
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
8
|
+
from vigil_forensic.source_analysis import is_source_file
|
|
9
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
10
|
+
from vigil_forensic._git_utils import git_show as _git_show_init
|
|
11
|
+
|
|
12
|
+
_log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Matches top-level import statements (both `import X` and `from X import Y`)
|
|
15
|
+
IMPORT_LINE_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+\S.*$", re.MULTILINE)
|
|
16
|
+
|
|
17
|
+
_MAX_DELETED_FILES = 5
|
|
18
|
+
_MAX_FILES_PER_ROOT = 500
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _derive_module_path(file_path: str) -> str:
|
|
22
|
+
"""Convert 'SYSTEM/runtime/foo.py' to 'SYSTEM.runtime.foo'.
|
|
23
|
+
|
|
24
|
+
Handles new cluster topology: SYSTEM, BRAIN, INTERFACE, TESTS prefixes."""
|
|
25
|
+
normalized = normalize_path(file_path)
|
|
26
|
+
if is_source_file(normalized):
|
|
27
|
+
normalized = normalized[:-3]
|
|
28
|
+
return normalized.replace("/", ".")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _find_deleted_py_files(ctx: PostExecGateContext) -> list[str]:
|
|
32
|
+
"""Return .py files in changed_files_reported that don't exist on disk."""
|
|
33
|
+
deleted = []
|
|
34
|
+
for raw_path in ctx.changed_files_reported:
|
|
35
|
+
normalized = normalize_path(raw_path)
|
|
36
|
+
if not is_source_file(normalized):
|
|
37
|
+
continue
|
|
38
|
+
abs_path = ctx.project_dir / normalized
|
|
39
|
+
if not abs_path.exists():
|
|
40
|
+
deleted.append(normalized)
|
|
41
|
+
if len(deleted) >= _MAX_DELETED_FILES:
|
|
42
|
+
break
|
|
43
|
+
return deleted
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def run_import_integrity_checks(ctx: PostExecGateContext):
|
|
47
|
+
findings = []
|
|
48
|
+
deleted_files = _find_deleted_py_files(ctx)
|
|
49
|
+
if not deleted_files:
|
|
50
|
+
return build_check_result(check_id="import_integrity", category=GateCategory.CONTRACT)
|
|
51
|
+
|
|
52
|
+
# Build search patterns for each deleted module
|
|
53
|
+
module_paths = [(f, _derive_module_path(f)) for f in deleted_files]
|
|
54
|
+
|
|
55
|
+
# Use auto-detected source roots; fall back to scanning project_dir directly
|
|
56
|
+
scan_roots = [ctx.project_dir / r for r in ctx.source_package_roots if (ctx.project_dir / r).is_dir()]
|
|
57
|
+
if not scan_roots:
|
|
58
|
+
scan_roots = [ctx.project_dir]
|
|
59
|
+
for root_dir in scan_roots:
|
|
60
|
+
file_count = 0
|
|
61
|
+
for py_file in root_dir.rglob("*.py"):
|
|
62
|
+
file_count += 1
|
|
63
|
+
if file_count > _MAX_FILES_PER_ROOT:
|
|
64
|
+
break
|
|
65
|
+
try:
|
|
66
|
+
text = py_file.read_text(encoding="utf-8", errors="replace")
|
|
67
|
+
except OSError:
|
|
68
|
+
continue
|
|
69
|
+
rel_importer = str(py_file.relative_to(ctx.project_dir)).replace("\\", "/")
|
|
70
|
+
for deleted_path, module_dotpath in module_paths:
|
|
71
|
+
if rel_importer == deleted_path:
|
|
72
|
+
continue # Don't flag the deleted file itself
|
|
73
|
+
# Check for cluster-style import patterns matching module_dotpath
|
|
74
|
+
if f"from {module_dotpath}" in text or f"import {module_dotpath}" in text:
|
|
75
|
+
findings.append(
|
|
76
|
+
build_finding(
|
|
77
|
+
check_id="import_integrity.broken_import",
|
|
78
|
+
category=GateCategory.CONTRACT,
|
|
79
|
+
title=f"Broken import: {rel_importer} imports deleted {deleted_path}",
|
|
80
|
+
severity=GateSeverity.HIGH,
|
|
81
|
+
impact=GateImpact.REVISE,
|
|
82
|
+
summary=(
|
|
83
|
+
f"File {rel_importer} imports from module '{module_dotpath}' "
|
|
84
|
+
f"but {deleted_path} was deleted in this session. "
|
|
85
|
+
"This will cause an ImportError at runtime."
|
|
86
|
+
),
|
|
87
|
+
recommendation=(
|
|
88
|
+
"Update the import in the affected file to use the new module "
|
|
89
|
+
"path, or delete the orphaned file if it is no longer needed."
|
|
90
|
+
),
|
|
91
|
+
evidence=[
|
|
92
|
+
EvidenceReference(kind="file", path=deleted_path, detail="deleted"),
|
|
93
|
+
EvidenceReference(kind="file", path=rel_importer, detail=f"imports:{module_dotpath}"),
|
|
94
|
+
],
|
|
95
|
+
|
|
96
|
+
repair_kind='fix_contract',
|
|
97
|
+
executor_action='Fix import',
|
|
98
|
+
proof_required='Import resolved',
|
|
99
|
+
allowlist_allowed=False,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return build_check_result(check_id="import_integrity", category=GateCategory.CONTRACT, findings=findings)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
# Finding 6.3: init_order_regression
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _extract_import_order(content: str) -> list[str]:
|
|
112
|
+
"""Return top-level import statements in source order.
|
|
113
|
+
|
|
114
|
+
Uses regex scan -- captures `import X` and `from X import Y` lines at any
|
|
115
|
+
indentation level (intentional: mirrors the task spec regex). Only
|
|
116
|
+
distinct lines are preserved in order of first appearance so that
|
|
117
|
+
duplicate import lines do not cause false positives.
|
|
118
|
+
"""
|
|
119
|
+
seen: set[str] = set()
|
|
120
|
+
ordered: list[str] = []
|
|
121
|
+
for stmt in IMPORT_LINE_RE.findall(content):
|
|
122
|
+
stripped = stmt.strip()
|
|
123
|
+
if stripped and stripped not in seen:
|
|
124
|
+
seen.add(stripped)
|
|
125
|
+
ordered.append(stripped)
|
|
126
|
+
return ordered
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _compare_import_order(before: list[str], after: list[str]) -> tuple[int, int]:
|
|
130
|
+
"""Return (reordered_count, removed_count).
|
|
131
|
+
|
|
132
|
+
- removed_count: imports present in `before` but absent in `after`.
|
|
133
|
+
- reordered_count: imports present in both but their relative order changed.
|
|
134
|
+
Additive-only changes (new imports in `after`) are NOT flagged.
|
|
135
|
+
"""
|
|
136
|
+
before_set = set(before)
|
|
137
|
+
after_set = set(after)
|
|
138
|
+
|
|
139
|
+
removed = [s for s in before if s not in after_set]
|
|
140
|
+
removed_count = len(removed)
|
|
141
|
+
|
|
142
|
+
# Common imports in the order they appear in each sequence
|
|
143
|
+
common_before = [s for s in before if s in after_set]
|
|
144
|
+
common_after = [s for s in after if s in before_set]
|
|
145
|
+
|
|
146
|
+
reordered_count = 0 if common_before == common_after else len(common_before)
|
|
147
|
+
|
|
148
|
+
return reordered_count, removed_count
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def run_init_order_regression_checks(ctx: PostExecGateContext):
|
|
152
|
+
"""Emit findings when `__init__.py` files have import order changed or imports removed.
|
|
153
|
+
|
|
154
|
+
Reordering top-level imports can alter module side effects and circular
|
|
155
|
+
import resolution -- this gate surfaces such regressions before they merge.
|
|
156
|
+
|
|
157
|
+
Rules:
|
|
158
|
+
- Only inspects `__init__.py` files present in changed_files_observed.
|
|
159
|
+
- Skips files with no prior version (newly added -- not a regression).
|
|
160
|
+
- Fails open: any git/I-O error is logged at DEBUG and the file is skipped.
|
|
161
|
+
- New imports added are OK (additive changes pass).
|
|
162
|
+
"""
|
|
163
|
+
findings = []
|
|
164
|
+
|
|
165
|
+
for raw_path in ctx.changed_files_observed:
|
|
166
|
+
normalized = normalize_path(raw_path)
|
|
167
|
+
if not normalized.endswith("__init__.py"):
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
prior_content = _git_show_init(normalized)
|
|
171
|
+
if prior_content is None:
|
|
172
|
+
# New file or git unavailable — not a regression
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
abs_path = ctx.project_dir / normalized
|
|
176
|
+
try:
|
|
177
|
+
current_content = abs_path.read_text(encoding="utf-8")
|
|
178
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
179
|
+
_log.debug("init_order_regression: cannot read current file %s: %s", normalized, exc)
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
before_order = _extract_import_order(prior_content)
|
|
183
|
+
after_order = _extract_import_order(current_content)
|
|
184
|
+
|
|
185
|
+
reordered_count, removed_count = _compare_import_order(before_order, after_order)
|
|
186
|
+
|
|
187
|
+
if reordered_count > 0 or removed_count > 0:
|
|
188
|
+
parts = []
|
|
189
|
+
if reordered_count > 0:
|
|
190
|
+
parts.append(f"{reordered_count} import(s) reordered")
|
|
191
|
+
if removed_count > 0:
|
|
192
|
+
parts.append(f"{removed_count} import(s) removed")
|
|
193
|
+
detail_str = ", ".join(parts)
|
|
194
|
+
|
|
195
|
+
findings.append(
|
|
196
|
+
build_finding(
|
|
197
|
+
check_id="init_order_regression.import_order_changed",
|
|
198
|
+
category=GateCategory.CONTRACT,
|
|
199
|
+
title=f"Import order regression in {normalized}",
|
|
200
|
+
severity=GateSeverity.MEDIUM,
|
|
201
|
+
impact=GateImpact.REVISE,
|
|
202
|
+
summary=(
|
|
203
|
+
f"{normalized} has {detail_str} compared to HEAD~1. "
|
|
204
|
+
"Reordering or removing top-level imports in __init__.py "
|
|
205
|
+
"can alter module initialization side effects and change "
|
|
206
|
+
"circular import resolution order."
|
|
207
|
+
),
|
|
208
|
+
recommendation=(
|
|
209
|
+
"Restore the original import order in __init__.py unless "
|
|
210
|
+
"the change is intentional and the circular-import / "
|
|
211
|
+
"side-effect impact has been verified. "
|
|
212
|
+
"If intentional, document the reason in the commit message."
|
|
213
|
+
),
|
|
214
|
+
evidence=[
|
|
215
|
+
EvidenceReference(
|
|
216
|
+
kind="file",
|
|
217
|
+
path=normalized,
|
|
218
|
+
detail=detail_str,
|
|
219
|
+
)
|
|
220
|
+
],
|
|
221
|
+
|
|
222
|
+
repair_kind='refactor',
|
|
223
|
+
executor_action='Fix import order',
|
|
224
|
+
proof_required='Import order stable',
|
|
225
|
+
allowlist_allowed=False,
|
|
226
|
+
)
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
return build_check_result(
|
|
230
|
+
check_id="init_order_regression",
|
|
231
|
+
category=GateCategory.CONTRACT,
|
|
232
|
+
findings=findings,
|
|
233
|
+
)
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""Detect imports inside function bodies — should be at module top.
|
|
2
|
+
|
|
3
|
+
Pattern caught:
|
|
4
|
+
|
|
5
|
+
def foo():
|
|
6
|
+
import json # <-- module-level import buried inside a function
|
|
7
|
+
return json.dumps(...)
|
|
8
|
+
|
|
9
|
+
Move stdlib imports to module top unless a legitimate reason (circular import,
|
|
10
|
+
deferred load of an optional heavy dep) is documented inline via a recognized
|
|
11
|
+
comment marker.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import ast
|
|
16
|
+
import json as _json
|
|
17
|
+
import logging
|
|
18
|
+
import time
|
|
19
|
+
|
|
20
|
+
from vigil_forensic._shared import (
|
|
21
|
+
EvidenceReference,
|
|
22
|
+
GateCategory,
|
|
23
|
+
GateImpact,
|
|
24
|
+
GateSeverity,
|
|
25
|
+
RepairKind,
|
|
26
|
+
)
|
|
27
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
28
|
+
from vigil_forensic.gate_checks.common import (
|
|
29
|
+
build_check_result,
|
|
30
|
+
build_finding,
|
|
31
|
+
normalize_path,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
_log = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# W4.BQ: relative path (under project_dir) where this gate persists its
|
|
38
|
+
# findings each run. Downstream MEDIUM-executor repair flow reads this file
|
|
39
|
+
# via the path stored on adapter._imports_lift_findings_path.
|
|
40
|
+
PERSISTED_FINDINGS_RELPATH = ".cortex/forensics/imports_in_function_findings.json"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Stdlib modules: local-inside-function imports of these are almost always smell.
|
|
44
|
+
# (We deliberately stay conservative — narrow set of standard, lightweight
|
|
45
|
+
# modules. Third-party / heavyweight modules are commonly lazy-loaded and
|
|
46
|
+
# would produce noisy findings if listed.)
|
|
47
|
+
_STDLIB_LOCAL_IMPORT_SMELLS: frozenset[str] = frozenset({
|
|
48
|
+
"json",
|
|
49
|
+
"os",
|
|
50
|
+
"sys",
|
|
51
|
+
"re",
|
|
52
|
+
"logging",
|
|
53
|
+
"hashlib",
|
|
54
|
+
"uuid",
|
|
55
|
+
"time",
|
|
56
|
+
"datetime",
|
|
57
|
+
"pathlib",
|
|
58
|
+
"subprocess",
|
|
59
|
+
"threading",
|
|
60
|
+
"collections",
|
|
61
|
+
"itertools",
|
|
62
|
+
"functools",
|
|
63
|
+
"typing",
|
|
64
|
+
"math",
|
|
65
|
+
"io",
|
|
66
|
+
"string",
|
|
67
|
+
"enum",
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Inline comment markers that legitimize a local import. Matched
|
|
72
|
+
# case-insensitively as substrings of the import line's trailing comment.
|
|
73
|
+
_LEGITIMATE_REASON_MARKERS: tuple[str, ...] = (
|
|
74
|
+
"# circular",
|
|
75
|
+
"# lazy",
|
|
76
|
+
"# defer",
|
|
77
|
+
"# type_checking",
|
|
78
|
+
"# noqa: imports_in_function",
|
|
79
|
+
"# autoforensics-skip: imports_in_function",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _is_legitimate(line_text: str) -> bool:
|
|
84
|
+
"""Return True if the import line carries a recognized legitimacy marker."""
|
|
85
|
+
lowered = line_text.lower()
|
|
86
|
+
return any(marker in lowered for marker in _LEGITIMATE_REASON_MARKERS)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _iter_imports_within(func: ast.FunctionDef | ast.AsyncFunctionDef):
|
|
90
|
+
"""Yield (Import|ImportFrom, enclosing_function_name) for nodes inside *func*.
|
|
91
|
+
|
|
92
|
+
A nested function definition gets its own enclosing scope reported; the
|
|
93
|
+
walker descends into the nested def so the user sees the closest function.
|
|
94
|
+
"""
|
|
95
|
+
stack: list[tuple[ast.AST, str]] = [(func, func.name)]
|
|
96
|
+
while stack:
|
|
97
|
+
node, enclosing_name = stack.pop()
|
|
98
|
+
for child in ast.iter_child_nodes(node):
|
|
99
|
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
100
|
+
# Descend into nested function with updated enclosing name.
|
|
101
|
+
stack.append((child, child.name))
|
|
102
|
+
continue
|
|
103
|
+
if isinstance(child, (ast.Import, ast.ImportFrom)):
|
|
104
|
+
yield child, enclosing_name
|
|
105
|
+
else:
|
|
106
|
+
# Descend into other constructs (If/Try/For/With/...) keeping
|
|
107
|
+
# the same enclosing function name.
|
|
108
|
+
stack.append((child, enclosing_name))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _module_names(node: ast.Import | ast.ImportFrom) -> list[str]:
|
|
112
|
+
if isinstance(node, ast.Import):
|
|
113
|
+
return [alias.name for alias in node.names]
|
|
114
|
+
# ImportFrom: report the source module (or "" for relative-only imports).
|
|
115
|
+
return [node.module or ""]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def run_imports_in_function_checks(ctx: PostExecGateContext):
|
|
119
|
+
"""Scan changed Python files for stdlib imports inside function bodies."""
|
|
120
|
+
findings: list = []
|
|
121
|
+
# W4.BQ: parallel structured records — same data as the GateFinding
|
|
122
|
+
# tuples above, but in a flat shape the MEDIUM-tier repair executor can
|
|
123
|
+
# consume without de-serializing GateFinding dataclasses from JSON. The
|
|
124
|
+
# shape is the public contract for the brief-mutation step.
|
|
125
|
+
persisted_records: list[dict] = []
|
|
126
|
+
|
|
127
|
+
for raw_path in ctx.changed_files_reported or ctx.touched_files:
|
|
128
|
+
normalized = normalize_path(raw_path)
|
|
129
|
+
|
|
130
|
+
if not normalized.endswith(".py"):
|
|
131
|
+
continue
|
|
132
|
+
# Skip vendor / libs tree — third-party code, not ours to police.
|
|
133
|
+
if "SYSTEM/libs/" in normalized or normalized.startswith("SYSTEM/libs"):
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
abs_path = ctx.project_dir / normalized
|
|
137
|
+
if not abs_path.exists() or not abs_path.is_file():
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
source = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
142
|
+
except OSError as exc:
|
|
143
|
+
_log.debug("imports_in_function: cannot read %s: %s", normalized, exc)
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
tree = ast.parse(source, filename=normalized)
|
|
148
|
+
except SyntaxError as exc:
|
|
149
|
+
_log.debug("imports_in_function: cannot parse %s: %s", normalized, exc)
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
source_lines = source.splitlines()
|
|
153
|
+
|
|
154
|
+
# Collect entry-point function definitions: any FunctionDef/AsyncFunctionDef
|
|
155
|
+
# that is NOT lexically inside another function. Methods of classes count
|
|
156
|
+
# as entry points (a class body is not a function). The recursive walker
|
|
157
|
+
# below descends into nested functions itself, so we must not double-walk.
|
|
158
|
+
entry_funcs: list[ast.FunctionDef | ast.AsyncFunctionDef] = []
|
|
159
|
+
|
|
160
|
+
def _collect(node: ast.AST, inside_func: bool) -> None:
|
|
161
|
+
for child in ast.iter_child_nodes(node):
|
|
162
|
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
163
|
+
if not inside_func:
|
|
164
|
+
entry_funcs.append(child)
|
|
165
|
+
_collect(child, True)
|
|
166
|
+
else:
|
|
167
|
+
_collect(child, inside_func)
|
|
168
|
+
|
|
169
|
+
_collect(tree, False)
|
|
170
|
+
|
|
171
|
+
for top_node in entry_funcs:
|
|
172
|
+
for import_node, enclosing_name in _iter_imports_within(top_node):
|
|
173
|
+
names = _module_names(import_node)
|
|
174
|
+
if not names:
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
# Only flag imports whose first dotted component matches our
|
|
178
|
+
# stdlib smell set. Stays conservative — heavyweight / 3rd-party
|
|
179
|
+
# lazy imports are intentionally NOT flagged.
|
|
180
|
+
top_levels = [n.split(".")[0] for n in names if n]
|
|
181
|
+
if not any(t in _STDLIB_LOCAL_IMPORT_SMELLS for t in top_levels):
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
lineno = int(getattr(import_node, "lineno", 0) or 0)
|
|
185
|
+
line_text = (
|
|
186
|
+
source_lines[lineno - 1] if 1 <= lineno <= len(source_lines) else ""
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if _is_legitimate(line_text):
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
joined = ", ".join(n for n in names if n)
|
|
193
|
+
# W4.BQ: capture per-finding structured record for disk
|
|
194
|
+
# persistence. abs_path is resolved against ctx.project_dir
|
|
195
|
+
# so MEDIUM executor can directly Edit the file.
|
|
196
|
+
persisted_records.append({
|
|
197
|
+
"file": str(abs_path),
|
|
198
|
+
"file_relpath": normalized,
|
|
199
|
+
"line": lineno,
|
|
200
|
+
"function": enclosing_name,
|
|
201
|
+
"imported": [n for n in names if n],
|
|
202
|
+
"line_text": line_text,
|
|
203
|
+
"suggestion": (
|
|
204
|
+
f"Move 'import {joined}' from function "
|
|
205
|
+
f"{enclosing_name}() to module top."
|
|
206
|
+
),
|
|
207
|
+
})
|
|
208
|
+
findings.append(
|
|
209
|
+
build_finding(
|
|
210
|
+
check_id="imports_in_function.stdlib",
|
|
211
|
+
category=GateCategory.DRIFT,
|
|
212
|
+
title=(
|
|
213
|
+
f"Stdlib import inside function '{enclosing_name}' in "
|
|
214
|
+
f"{normalized}:{lineno}"
|
|
215
|
+
),
|
|
216
|
+
severity=GateSeverity.MEDIUM,
|
|
217
|
+
impact=GateImpact.REVISE,
|
|
218
|
+
summary=(
|
|
219
|
+
f"Function {enclosing_name}() in {normalized} imports "
|
|
220
|
+
f"'{joined}' inside its body. Stdlib imports belong at "
|
|
221
|
+
"module top so the dependency is visible to readers, "
|
|
222
|
+
"static analysis, and import graph tools."
|
|
223
|
+
),
|
|
224
|
+
recommendation=(
|
|
225
|
+
f"Move 'import {joined}' from {enclosing_name}() to the "
|
|
226
|
+
"module top. If the local import is intentional "
|
|
227
|
+
"(circular import, lazy load), add a trailing comment "
|
|
228
|
+
"such as '# lazy: ...', '# circular: ...', or "
|
|
229
|
+
"'# noqa: imports_in_function' on the import line."
|
|
230
|
+
),
|
|
231
|
+
evidence=[
|
|
232
|
+
EvidenceReference(
|
|
233
|
+
kind="file",
|
|
234
|
+
path=normalized,
|
|
235
|
+
detail=f"line:{lineno} function:{enclosing_name}",
|
|
236
|
+
)
|
|
237
|
+
],
|
|
238
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
239
|
+
executor_action=(
|
|
240
|
+
"Hoist the import to module top, or add a legitimacy "
|
|
241
|
+
"marker comment on the import line."
|
|
242
|
+
),
|
|
243
|
+
proof_required=(
|
|
244
|
+
"Import is at module top OR import line carries a "
|
|
245
|
+
"recognized legitimacy marker."
|
|
246
|
+
),
|
|
247
|
+
allowlist_allowed=True,
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# W4.BQ: persist structured findings to disk so the downstream MEDIUM
|
|
252
|
+
# repair executor can consume them. Best-effort — never fail the gate
|
|
253
|
+
# because we couldn't write the artifact. The file is overwritten on
|
|
254
|
+
# every run (atomic full rewrite; no append) so stale entries from
|
|
255
|
+
# prior sessions do not contaminate the current repair brief.
|
|
256
|
+
if persisted_records:
|
|
257
|
+
try:
|
|
258
|
+
findings_path = ctx.project_dir / PERSISTED_FINDINGS_RELPATH
|
|
259
|
+
findings_path.parent.mkdir(parents=True, exist_ok=True)
|
|
260
|
+
payload = {
|
|
261
|
+
"schema_version": "1.0",
|
|
262
|
+
"generated_at": time.time(),
|
|
263
|
+
"count": len(persisted_records),
|
|
264
|
+
"findings": persisted_records,
|
|
265
|
+
}
|
|
266
|
+
findings_path.write_text(
|
|
267
|
+
_json.dumps(payload, indent=2, ensure_ascii=False),
|
|
268
|
+
encoding="utf-8",
|
|
269
|
+
)
|
|
270
|
+
_log.info(
|
|
271
|
+
"imports_in_function: persisted %d findings to %s",
|
|
272
|
+
len(persisted_records), findings_path,
|
|
273
|
+
)
|
|
274
|
+
except OSError as exc:
|
|
275
|
+
_log.warning(
|
|
276
|
+
"imports_in_function: failed to persist findings: %s", exc,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
return build_check_result(
|
|
280
|
+
check_id="imports_in_function",
|
|
281
|
+
category=GateCategory.DRIFT,
|
|
282
|
+
findings=findings,
|
|
283
|
+
)
|