vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,566 @@
|
|
|
1
|
+
"""Gate check: detect AI-hallucinated symbols in modified Python files.
|
|
2
|
+
|
|
3
|
+
Two sub-checks:
|
|
4
|
+
A. hallucination.import_not_found -- imported name doesn't exist in the source module
|
|
5
|
+
B. hallucination.undefined_call -- function called without import or local definition
|
|
6
|
+
|
|
7
|
+
Both checks are WARN-severity (not blocking) to build confidence before escalating.
|
|
8
|
+
Files with `from X import *` skip check B (star imports make scope unknowable).
|
|
9
|
+
TYPE_CHECKING blocks are skipped in both checks.
|
|
10
|
+
|
|
11
|
+
Sprint B1 (2026-04-23): check A migrated to the tri-state ``PythonModuleIndex``
|
|
12
|
+
resolver. When the module layout is uncertain (src-layout, PEP 420 namespace,
|
|
13
|
+
custom PYTHONPATH) findings are emitted with ``applicability="unknown"``
|
|
14
|
+
instead of being silently skipped — the reviewer sees the detector's
|
|
15
|
+
uncertainty rather than losing signal.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import ast
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from vigil_forensic._shared import (
|
|
24
|
+
EvidenceReference,
|
|
25
|
+
GateCategory,
|
|
26
|
+
GateImpact,
|
|
27
|
+
GateSeverity,
|
|
28
|
+
)
|
|
29
|
+
# standalone: code-hash stamping unavailable
|
|
30
|
+
PythonModuleIndex = None # type: ignore[assignment,misc]
|
|
31
|
+
ResolveOutcome = None # type: ignore[assignment,misc]
|
|
32
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
33
|
+
from vigil_forensic.gate_checks.common import build_check_result, build_finding, iter_touched_snapshots
|
|
34
|
+
from vigil_forensic.source_analysis import is_source_file
|
|
35
|
+
import logging
|
|
36
|
+
_log = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Built-in names that are always in scope without any import
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
PYTHON_BUILTINS: frozenset[str] = frozenset({
|
|
43
|
+
# Built-in functions
|
|
44
|
+
"abs", "all", "any", "ascii", "bin", "breakpoint", "callable",
|
|
45
|
+
"chr", "compile", "delattr", "dir", "divmod", "enumerate", "eval",
|
|
46
|
+
"exec", "filter", "format", "frozenset", "getattr", "globals",
|
|
47
|
+
"hasattr", "hash", "help", "hex", "id", "input", "isinstance",
|
|
48
|
+
"issubclass", "iter", "len", "list", "locals", "map", "max",
|
|
49
|
+
"memoryview", "min", "next", "object", "oct", "open", "ord",
|
|
50
|
+
"pow", "print", "property", "range", "repr", "reversed", "round",
|
|
51
|
+
"set", "setattr", "slice", "sorted", "staticmethod", "str", "sum",
|
|
52
|
+
"super", "tuple", "type", "vars", "zip",
|
|
53
|
+
# Built-in types
|
|
54
|
+
"bool", "bytearray", "bytes", "classmethod", "complex", "dict", "float", "int",
|
|
55
|
+
# Exceptions
|
|
56
|
+
"ArithmeticError", "AssertionError", "AttributeError", "BaseException",
|
|
57
|
+
"BlockingIOError", "BrokenPipeError", "BufferError",
|
|
58
|
+
"ChildProcessError", "ConnectionAbortedError", "ConnectionError",
|
|
59
|
+
"ConnectionRefusedError", "ConnectionResetError", "DeprecationWarning",
|
|
60
|
+
"EOFError", "EnvironmentError", "Exception", "FileExistsError",
|
|
61
|
+
"FileNotFoundError", "FloatingPointError", "FutureWarning",
|
|
62
|
+
"GeneratorExit", "IOError", "ImportError", "ImportWarning",
|
|
63
|
+
"IndentationError", "IndexError", "InterruptedError", "IsADirectoryError",
|
|
64
|
+
"KeyError", "KeyboardInterrupt", "LookupError", "MemoryError",
|
|
65
|
+
"ModuleNotFoundError", "NameError", "NotADirectoryError",
|
|
66
|
+
"NotImplementedError", "OSError", "OverflowError", "PermissionError",
|
|
67
|
+
"ProcessLookupError", "RecursionError", "ReferenceError",
|
|
68
|
+
"ResourceWarning", "RuntimeError", "RuntimeWarning", "StopAsyncIteration",
|
|
69
|
+
"StopIteration", "SyntaxError", "SyntaxWarning", "SystemError", "SystemExit",
|
|
70
|
+
"TabError", "TimeoutError", "TypeError", "UnboundLocalError",
|
|
71
|
+
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError",
|
|
72
|
+
"UnicodeTranslateError", "UnicodeWarning", "UserWarning",
|
|
73
|
+
"ValueError", "Warning", "ZeroDivisionError",
|
|
74
|
+
# Special constants
|
|
75
|
+
"None", "True", "False", "NotImplemented", "Ellipsis",
|
|
76
|
+
# Module-level dunders always present
|
|
77
|
+
"__name__", "__file__", "__doc__", "__package__", "__spec__",
|
|
78
|
+
"__all__", "__annotations__", "__builtins__",
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
# Helpers
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _is_project_module(module: str, roots: tuple[str, ...]) -> bool:
|
|
87
|
+
return any(module.startswith(r) for r in roots)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _resolve_module_path(module: str, project_dir: Path) -> Path | None:
|
|
91
|
+
"""Legacy filesystem-only resolver (kept as fallback for callers without ctx).
|
|
92
|
+
|
|
93
|
+
Convert 'SYSTEM.runtime.foo' -> absolute path to foo.py (or package
|
|
94
|
+
__init__.py). Handles cluster topology: SYSTEM, BRAIN, INTERFACE,
|
|
95
|
+
STORAGE, and any roots in ctx.source_package_roots.
|
|
96
|
+
|
|
97
|
+
Sprint B1: the primary path is now ``_resolve_module_with_context``
|
|
98
|
+
which returns a tri-state ``ResolveOutcome`` via ``PythonModuleIndex``.
|
|
99
|
+
This function is only used when ``ctx.project_context`` is None (older
|
|
100
|
+
callers that have not yet plumbed the context through).
|
|
101
|
+
"""
|
|
102
|
+
rel = module.replace(".", "/")
|
|
103
|
+
candidate = project_dir / (rel + ".py")
|
|
104
|
+
if candidate.exists():
|
|
105
|
+
return candidate
|
|
106
|
+
init = project_dir / rel / "__init__.py"
|
|
107
|
+
if init.exists():
|
|
108
|
+
return init
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _resolve_module_with_context(
|
|
113
|
+
module: str, ctx: PostExecGateContext
|
|
114
|
+
) -> ResolveOutcome:
|
|
115
|
+
"""Tri-state module resolution using ctx.project_context when available.
|
|
116
|
+
|
|
117
|
+
Returns a ``ResolveOutcome`` with strict tri-state semantics:
|
|
118
|
+
* ``resolved`` — no finding emitted downstream.
|
|
119
|
+
* ``missing_confident`` — applicable finding with confidence >= 0.85.
|
|
120
|
+
* ``resolver_uncertain`` — applicability="unknown" finding with
|
|
121
|
+
confidence 0.4-0.7 and a human reason.
|
|
122
|
+
|
|
123
|
+
Fallback path (no project_context or no python_module_index): legacy
|
|
124
|
+
filesystem check wrapped into the same ``ResolveOutcome`` vocabulary
|
|
125
|
+
so the caller does not branch on context presence. When fallback finds
|
|
126
|
+
nothing we return ``missing_confident`` only if the module starts with
|
|
127
|
+
a known project source package root — otherwise uncertain (preserves
|
|
128
|
+
FN discipline for older callers).
|
|
129
|
+
"""
|
|
130
|
+
# When PythonModuleIndex/ResolveOutcome are unavailable (standalone mode),
|
|
131
|
+
# fall back to filesystem check with a stub result object.
|
|
132
|
+
if PythonModuleIndex is None or ResolveOutcome is None:
|
|
133
|
+
path = _resolve_module_path(module, ctx.project_dir)
|
|
134
|
+
# Return a simple namespace that callers test for .status attribute
|
|
135
|
+
class _StubOutcome:
|
|
136
|
+
def __init__(self, status: str, path: object, confidence: float, reason: str) -> None:
|
|
137
|
+
self.status = status; self.path = path; self.confidence = confidence; self.reason = reason
|
|
138
|
+
if path is not None:
|
|
139
|
+
return _StubOutcome("resolved", path, 0.9, reason="") # type: ignore[return-value]
|
|
140
|
+
return _StubOutcome("resolver_uncertain", None, 0.4, reason="module index unavailable in standalone mode") # type: ignore[return-value]
|
|
141
|
+
|
|
142
|
+
project_ctx = getattr(ctx, "project_context", None)
|
|
143
|
+
module_index: Optional[PythonModuleIndex] = None
|
|
144
|
+
if project_ctx is not None:
|
|
145
|
+
candidate_index = getattr(project_ctx, "python_module_index", None)
|
|
146
|
+
if isinstance(candidate_index, PythonModuleIndex):
|
|
147
|
+
module_index = candidate_index
|
|
148
|
+
|
|
149
|
+
if module_index is not None:
|
|
150
|
+
return module_index.resolve(module)
|
|
151
|
+
|
|
152
|
+
# Legacy fallback — older callers without a full ProjectContext.
|
|
153
|
+
path = _resolve_module_path(module, ctx.project_dir)
|
|
154
|
+
if path is not None:
|
|
155
|
+
return ResolveOutcome("resolved", path, 0.9, reason="")
|
|
156
|
+
# Without a module index we cannot distinguish "hallucinated" from
|
|
157
|
+
# "resolver incomplete" — stay uncertain to avoid false positives.
|
|
158
|
+
return ResolveOutcome(
|
|
159
|
+
"resolver_uncertain",
|
|
160
|
+
None,
|
|
161
|
+
0.4,
|
|
162
|
+
reason=(
|
|
163
|
+
f"module index unavailable; {module!r} not found under "
|
|
164
|
+
f"project_dir via direct fs check"
|
|
165
|
+
),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _extract_defined_names(source_text: str) -> frozenset[str]:
|
|
170
|
+
"""Return all names exported by a module (top-level defs, assignments, __all__)."""
|
|
171
|
+
try:
|
|
172
|
+
tree = ast.parse(source_text)
|
|
173
|
+
except SyntaxError:
|
|
174
|
+
return frozenset()
|
|
175
|
+
|
|
176
|
+
names: set[str] = set()
|
|
177
|
+
all_list: list[str] | None = None
|
|
178
|
+
|
|
179
|
+
for node in ast.iter_child_nodes(tree):
|
|
180
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
181
|
+
names.add(node.name)
|
|
182
|
+
elif isinstance(node, ast.Assign):
|
|
183
|
+
for target in node.targets:
|
|
184
|
+
if isinstance(target, ast.Name):
|
|
185
|
+
names.add(target.id)
|
|
186
|
+
if target.id == "__all__" and isinstance(node.value, (ast.List, ast.Tuple)):
|
|
187
|
+
all_list = [
|
|
188
|
+
elt.value
|
|
189
|
+
for elt in node.value.elts
|
|
190
|
+
if isinstance(elt, ast.Constant) and isinstance(elt.value, str)
|
|
191
|
+
]
|
|
192
|
+
elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
|
|
193
|
+
names.add(node.target.id)
|
|
194
|
+
elif isinstance(node, ast.Import):
|
|
195
|
+
for alias in node.names:
|
|
196
|
+
names.add(alias.asname or alias.name.split(".")[0])
|
|
197
|
+
elif isinstance(node, ast.ImportFrom) and node.names:
|
|
198
|
+
for alias in node.names:
|
|
199
|
+
if alias.name != "*":
|
|
200
|
+
names.add(alias.asname or alias.name)
|
|
201
|
+
|
|
202
|
+
if all_list is not None:
|
|
203
|
+
return frozenset(all_list) | names
|
|
204
|
+
return frozenset(names)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _type_checking_lines(tree: ast.Module) -> frozenset[int]:
|
|
208
|
+
"""Return line numbers that live inside `if TYPE_CHECKING:` blocks."""
|
|
209
|
+
lines: set[int] = set()
|
|
210
|
+
for node in ast.iter_child_nodes(tree):
|
|
211
|
+
if not isinstance(node, ast.If):
|
|
212
|
+
continue
|
|
213
|
+
test = node.test
|
|
214
|
+
is_tc = (
|
|
215
|
+
(isinstance(test, ast.Name) and test.id == "TYPE_CHECKING")
|
|
216
|
+
or (isinstance(test, ast.Attribute) and test.attr == "TYPE_CHECKING")
|
|
217
|
+
)
|
|
218
|
+
if is_tc:
|
|
219
|
+
for child in ast.walk(node):
|
|
220
|
+
ln = getattr(child, "lineno", None)
|
|
221
|
+
if ln is not None:
|
|
222
|
+
lines.add(ln)
|
|
223
|
+
return frozenset(lines)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _collect_known_names(tree: ast.Module) -> frozenset[str]:
|
|
227
|
+
"""Names in scope for this file: builtins + imports + local defs + params."""
|
|
228
|
+
names: set[str] = set(PYTHON_BUILTINS)
|
|
229
|
+
for node in ast.walk(tree):
|
|
230
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
231
|
+
names.add(node.name)
|
|
232
|
+
elif isinstance(node, ast.Import):
|
|
233
|
+
for alias in node.names:
|
|
234
|
+
names.add(alias.asname or alias.name.split(".")[0])
|
|
235
|
+
elif isinstance(node, ast.ImportFrom):
|
|
236
|
+
for alias in node.names:
|
|
237
|
+
if alias.name != "*":
|
|
238
|
+
names.add(alias.asname or alias.name)
|
|
239
|
+
# Function / method parameters (cls, self, fn, probe_fn, …)
|
|
240
|
+
elif isinstance(node, ast.arg):
|
|
241
|
+
names.add(node.arg)
|
|
242
|
+
# Local assignments: _error = ctx["error"], esc = html.escape, …
|
|
243
|
+
elif isinstance(node, ast.Assign):
|
|
244
|
+
for target in node.targets:
|
|
245
|
+
if isinstance(target, ast.Name):
|
|
246
|
+
names.add(target.id)
|
|
247
|
+
elif isinstance(target, (ast.Tuple, ast.List)):
|
|
248
|
+
for elt in target.elts:
|
|
249
|
+
if isinstance(elt, ast.Name):
|
|
250
|
+
names.add(elt.id)
|
|
251
|
+
elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
|
|
252
|
+
names.add(node.target.id)
|
|
253
|
+
# for x in … / async for x in …
|
|
254
|
+
elif isinstance(node, (ast.For, ast.AsyncFor)):
|
|
255
|
+
if isinstance(node.target, ast.Name):
|
|
256
|
+
names.add(node.target.id)
|
|
257
|
+
elif isinstance(node.target, (ast.Tuple, ast.List)):
|
|
258
|
+
for elt in node.target.elts:
|
|
259
|
+
if isinstance(elt, ast.Name):
|
|
260
|
+
names.add(elt.id)
|
|
261
|
+
# with … as x:
|
|
262
|
+
elif isinstance(node, ast.withitem):
|
|
263
|
+
if node.optional_vars and isinstance(node.optional_vars, ast.Name):
|
|
264
|
+
names.add(node.optional_vars.id)
|
|
265
|
+
# except Exc as e:
|
|
266
|
+
elif isinstance(node, ast.ExceptHandler) and node.name:
|
|
267
|
+
names.add(node.name)
|
|
268
|
+
# walrus operator x := expr
|
|
269
|
+
elif isinstance(node, ast.NamedExpr):
|
|
270
|
+
names.add(node.target.id)
|
|
271
|
+
return frozenset(names)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _has_star_import(tree: ast.Module) -> bool:
|
|
275
|
+
for node in ast.walk(tree):
|
|
276
|
+
if isinstance(node, ast.ImportFrom):
|
|
277
|
+
for alias in node.names:
|
|
278
|
+
if alias.name == "*":
|
|
279
|
+
return True
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# ---------------------------------------------------------------------------
|
|
284
|
+
# Check A: imported name doesn't exist in source module
|
|
285
|
+
# ---------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _check_imports(
|
|
289
|
+
snapshot, ctx: PostExecGateContext, tree: ast.Module
|
|
290
|
+
) -> list:
|
|
291
|
+
"""Sprint B1: tri-state import resolution via PythonModuleIndex.
|
|
292
|
+
|
|
293
|
+
Three outcomes per ``from <module> import <name>`` statement:
|
|
294
|
+
* module.resolve(module) == ``resolved`` → compare name
|
|
295
|
+
against module's defined symbols (legacy behaviour — confident
|
|
296
|
+
finding when name is missing).
|
|
297
|
+
* module.resolve(module) == ``missing_confident`` → emit
|
|
298
|
+
``hallucination.module_not_found`` at ``applicability="applicable"``.
|
|
299
|
+
* module.resolve(module) == ``resolver_uncertain`` → emit
|
|
300
|
+
``hallucination.import_not_found`` at ``applicability="unknown"``
|
|
301
|
+
so the reviewer keeps visibility on a potential hallucination
|
|
302
|
+
without us pretending to be sure.
|
|
303
|
+
"""
|
|
304
|
+
findings = []
|
|
305
|
+
tc_lines = _type_checking_lines(tree)
|
|
306
|
+
|
|
307
|
+
for node in ast.walk(tree):
|
|
308
|
+
if not isinstance(node, ast.ImportFrom) or not node.module:
|
|
309
|
+
continue
|
|
310
|
+
lineno = getattr(node, "lineno", 0)
|
|
311
|
+
if lineno in tc_lines:
|
|
312
|
+
continue
|
|
313
|
+
if not _is_project_module(node.module, ctx.source_package_roots):
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
outcome = _resolve_module_with_context(node.module, ctx)
|
|
317
|
+
|
|
318
|
+
if outcome.status == "resolver_uncertain":
|
|
319
|
+
# Layout uncertainty — emit one finding per (module, name) pair
|
|
320
|
+
# flagged as unknown so the reviewer sees the uncertainty. We
|
|
321
|
+
# cannot say the name is missing (module may provide it at
|
|
322
|
+
# runtime via custom PYTHONPATH), but we can say "we don't
|
|
323
|
+
# know" rather than silently dropping the signal.
|
|
324
|
+
for alias in node.names:
|
|
325
|
+
if alias.name in ("*", "_"):
|
|
326
|
+
continue
|
|
327
|
+
findings.append(
|
|
328
|
+
build_finding(
|
|
329
|
+
check_id="hallucination.import_not_found",
|
|
330
|
+
category=GateCategory.CONTRACT,
|
|
331
|
+
title=f"Unresolved import '{alias.name}' from {node.module}",
|
|
332
|
+
severity=GateSeverity.LOW,
|
|
333
|
+
impact=GateImpact.WARN,
|
|
334
|
+
summary=(
|
|
335
|
+
f"{snapshot.path} line {lineno}: imports "
|
|
336
|
+
f"'{alias.name}' from '{node.module}'. Resolver "
|
|
337
|
+
f"cannot determine whether this module resolves "
|
|
338
|
+
f"at runtime (uncertain layout: "
|
|
339
|
+
f"{outcome.reason})."
|
|
340
|
+
),
|
|
341
|
+
recommendation=(
|
|
342
|
+
f"Inspect manually: does '{node.module}' resolve "
|
|
343
|
+
f"at runtime and does it export '{alias.name}'? "
|
|
344
|
+
f"If yes, consider marking the module index "
|
|
345
|
+
f"aware of this path."
|
|
346
|
+
),
|
|
347
|
+
evidence=[
|
|
348
|
+
EvidenceReference(
|
|
349
|
+
kind="file",
|
|
350
|
+
path=snapshot.path,
|
|
351
|
+
detail=f"line {lineno}: from {node.module} import {alias.name}",
|
|
352
|
+
)
|
|
353
|
+
],
|
|
354
|
+
repair_kind='validate_boundary',
|
|
355
|
+
executor_action='Address finding details',
|
|
356
|
+
proof_required='No hallucination',
|
|
357
|
+
allowlist_allowed=True,
|
|
358
|
+
confidence=outcome.confidence,
|
|
359
|
+
applicability="unknown",
|
|
360
|
+
analysis_mode="ast",
|
|
361
|
+
applicability_reason=(
|
|
362
|
+
outcome.reason
|
|
363
|
+
or "resolver incomplete for this project layout"
|
|
364
|
+
),
|
|
365
|
+
)
|
|
366
|
+
)
|
|
367
|
+
continue
|
|
368
|
+
|
|
369
|
+
if outcome.status == "missing_confident":
|
|
370
|
+
# Module itself doesn't exist in the project. One finding per
|
|
371
|
+
# imported name so reviewers see which symbols are affected.
|
|
372
|
+
for alias in node.names:
|
|
373
|
+
if alias.name in ("*", "_"):
|
|
374
|
+
continue
|
|
375
|
+
findings.append(
|
|
376
|
+
build_finding(
|
|
377
|
+
check_id="hallucination.import_not_found",
|
|
378
|
+
category=GateCategory.CONTRACT,
|
|
379
|
+
title=f"Module '{node.module}' not found for import '{alias.name}'",
|
|
380
|
+
severity=GateSeverity.MEDIUM,
|
|
381
|
+
impact=GateImpact.REVISE,
|
|
382
|
+
summary=(
|
|
383
|
+
f"{snapshot.path} line {lineno}: imports "
|
|
384
|
+
f"'{alias.name}' from '{node.module}' but the "
|
|
385
|
+
f"module is not present in the project tree. "
|
|
386
|
+
f"{outcome.reason}"
|
|
387
|
+
),
|
|
388
|
+
recommendation=(
|
|
389
|
+
f"Verify '{node.module}' exists. If renamed, "
|
|
390
|
+
f"update the import; if missing, create the "
|
|
391
|
+
f"module or remove the stale import."
|
|
392
|
+
),
|
|
393
|
+
evidence=[
|
|
394
|
+
EvidenceReference(
|
|
395
|
+
kind="file",
|
|
396
|
+
path=snapshot.path,
|
|
397
|
+
detail=f"line {lineno}: from {node.module} import {alias.name}",
|
|
398
|
+
)
|
|
399
|
+
],
|
|
400
|
+
repair_kind='validate_boundary',
|
|
401
|
+
executor_action='Address finding details',
|
|
402
|
+
proof_required='No hallucination',
|
|
403
|
+
allowlist_allowed=False,
|
|
404
|
+
confidence=outcome.confidence,
|
|
405
|
+
applicability="applicable",
|
|
406
|
+
analysis_mode="ast",
|
|
407
|
+
)
|
|
408
|
+
)
|
|
409
|
+
continue
|
|
410
|
+
|
|
411
|
+
# outcome.status == "resolved" — verify the imported name exists
|
|
412
|
+
# inside the resolved module using the legacy file-based detector.
|
|
413
|
+
source_path = outcome.path
|
|
414
|
+
if source_path is None:
|
|
415
|
+
# Defensive — resolved outcome must carry a path; if not, skip.
|
|
416
|
+
continue
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
source_text = source_path.read_text(encoding="utf-8", errors="replace")
|
|
420
|
+
except OSError:
|
|
421
|
+
continue
|
|
422
|
+
|
|
423
|
+
defined = _extract_defined_names(source_text)
|
|
424
|
+
|
|
425
|
+
# Package dir for submodule existence checks. Use the resolved
|
|
426
|
+
# file's parent so src-layout resolutions see the correct dir.
|
|
427
|
+
if source_path.name == "__init__.py":
|
|
428
|
+
pkg_dir = source_path.parent
|
|
429
|
+
else:
|
|
430
|
+
pkg_dir = source_path.parent / source_path.stem
|
|
431
|
+
if not pkg_dir.is_dir():
|
|
432
|
+
pkg_dir = source_path.parent
|
|
433
|
+
|
|
434
|
+
for alias in node.names:
|
|
435
|
+
if alias.name in ("*", "_"):
|
|
436
|
+
continue
|
|
437
|
+
# Valid submodule import: from pkg import submod where pkg/submod.py
|
|
438
|
+
# or pkg/submod/__init__.py exists. Python resolves these without
|
|
439
|
+
# the name appearing in __init__.py, so skip — not a hallucination.
|
|
440
|
+
if (pkg_dir / (alias.name + ".py")).exists() or (pkg_dir / alias.name / "__init__.py").exists():
|
|
441
|
+
continue
|
|
442
|
+
if alias.name not in defined:
|
|
443
|
+
findings.append(
|
|
444
|
+
build_finding(
|
|
445
|
+
check_id="hallucination.import_not_found",
|
|
446
|
+
category=GateCategory.CONTRACT,
|
|
447
|
+
title=f"Imported '{alias.name}' not found in {node.module}",
|
|
448
|
+
severity=GateSeverity.MEDIUM,
|
|
449
|
+
impact=GateImpact.REVISE,
|
|
450
|
+
summary=(
|
|
451
|
+
f"{snapshot.path} line {lineno}: imports '{alias.name}' "
|
|
452
|
+
f"from '{node.module}' but that name is not defined there. "
|
|
453
|
+
f"Likely an AI hallucination -- the symbol was invented."
|
|
454
|
+
),
|
|
455
|
+
recommendation=(
|
|
456
|
+
f"Check that '{alias.name}' exists in {node.module}. "
|
|
457
|
+
f"If added in this edit, ensure the definition is also present."
|
|
458
|
+
),
|
|
459
|
+
evidence=[
|
|
460
|
+
EvidenceReference(
|
|
461
|
+
kind="file",
|
|
462
|
+
path=snapshot.path,
|
|
463
|
+
detail=f"line {lineno}: from {node.module} import {alias.name}",
|
|
464
|
+
)
|
|
465
|
+
],
|
|
466
|
+
repair_kind='validate_boundary',
|
|
467
|
+
executor_action='Address finding details',
|
|
468
|
+
proof_required='No hallucination',
|
|
469
|
+
allowlist_allowed=False,
|
|
470
|
+
confidence=0.9,
|
|
471
|
+
applicability="applicable",
|
|
472
|
+
analysis_mode="ast",
|
|
473
|
+
)
|
|
474
|
+
)
|
|
475
|
+
return findings
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
# ---------------------------------------------------------------------------
|
|
479
|
+
# Check B: function called without being imported or locally defined
|
|
480
|
+
# ---------------------------------------------------------------------------
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _check_undefined_calls(
|
|
484
|
+
snapshot, tree: ast.Module
|
|
485
|
+
) -> list:
|
|
486
|
+
# Star imports make scope unknowable — skip conservatively
|
|
487
|
+
if _has_star_import(tree):
|
|
488
|
+
return []
|
|
489
|
+
|
|
490
|
+
findings = []
|
|
491
|
+
known = _collect_known_names(tree)
|
|
492
|
+
tc_lines = _type_checking_lines(tree)
|
|
493
|
+
|
|
494
|
+
for node in ast.walk(tree):
|
|
495
|
+
if not isinstance(node, ast.Call):
|
|
496
|
+
continue
|
|
497
|
+
if not isinstance(node.func, ast.Name):
|
|
498
|
+
continue # skip obj.method() -- needs type inference
|
|
499
|
+
|
|
500
|
+
name = node.func.id
|
|
501
|
+
lineno = getattr(node, "lineno", 0)
|
|
502
|
+
|
|
503
|
+
if lineno in tc_lines:
|
|
504
|
+
continue
|
|
505
|
+
if name.startswith("__"):
|
|
506
|
+
continue # dunder names are always special
|
|
507
|
+
if name in known:
|
|
508
|
+
continue
|
|
509
|
+
|
|
510
|
+
findings.append(
|
|
511
|
+
build_finding(
|
|
512
|
+
check_id="hallucination.undefined_call",
|
|
513
|
+
category=GateCategory.CONTRACT,
|
|
514
|
+
title=f"Call to undefined '{name}' in {snapshot.path}",
|
|
515
|
+
severity=GateSeverity.MEDIUM,
|
|
516
|
+
impact=GateImpact.REVISE,
|
|
517
|
+
summary=(
|
|
518
|
+
f"{snapshot.path} line {lineno}: calls '{name}()' "
|
|
519
|
+
f"but this name is not imported or defined in the file. "
|
|
520
|
+
f"Likely an AI hallucination -- the function was invented."
|
|
521
|
+
),
|
|
522
|
+
recommendation=(
|
|
523
|
+
f"Add an import for '{name}' or define it before use. "
|
|
524
|
+
f"If it should come from another module, add the import statement."
|
|
525
|
+
),
|
|
526
|
+
evidence=[
|
|
527
|
+
EvidenceReference(
|
|
528
|
+
kind="file",
|
|
529
|
+
path=snapshot.path,
|
|
530
|
+
detail=f"line {lineno}: {name}(...)",
|
|
531
|
+
)
|
|
532
|
+
],
|
|
533
|
+
|
|
534
|
+
repair_kind='validate_boundary',
|
|
535
|
+
executor_action='Address finding details',
|
|
536
|
+
proof_required='No hallucination',
|
|
537
|
+
allowlist_allowed=False,
|
|
538
|
+
)
|
|
539
|
+
)
|
|
540
|
+
return findings
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# ---------------------------------------------------------------------------
|
|
544
|
+
# Entry point
|
|
545
|
+
# ---------------------------------------------------------------------------
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def run_hallucination_checks(ctx: PostExecGateContext):
|
|
549
|
+
"""Detect AI-hallucinated symbols in touched Python files."""
|
|
550
|
+
findings = []
|
|
551
|
+
for snapshot in iter_touched_snapshots(ctx):
|
|
552
|
+
if not snapshot.exists or not is_source_file(snapshot.path):
|
|
553
|
+
continue
|
|
554
|
+
if not snapshot.text.strip():
|
|
555
|
+
continue
|
|
556
|
+
try:
|
|
557
|
+
tree = ast.parse(snapshot.text, filename=snapshot.path)
|
|
558
|
+
except SyntaxError:
|
|
559
|
+
continue # handled by syntax_validity_checks
|
|
560
|
+
findings.extend(_check_imports(snapshot, ctx, tree))
|
|
561
|
+
findings.extend(_check_undefined_calls(snapshot, tree))
|
|
562
|
+
return build_check_result(
|
|
563
|
+
check_id="hallucination",
|
|
564
|
+
category=GateCategory.CONTRACT,
|
|
565
|
+
findings=findings,
|
|
566
|
+
)
|