vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,834 @@
|
|
|
1
|
+
"""Static code analysis: unreachable code, shadowed builtins, mutable defaults,
|
|
2
|
+
resource leaks, docstring drift. Clusters 34-38.
|
|
3
|
+
|
|
4
|
+
Clusters:
|
|
5
|
+
34 - Unreachable Code
|
|
6
|
+
35 - Shadowed Builtins
|
|
7
|
+
36 - Mutable Default Arguments
|
|
8
|
+
37 - Resource Leaks
|
|
9
|
+
38 - Docstring/Signature Parameter Drift
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from .core import detect_language
|
|
14
|
+
from ...gate_models import (
|
|
15
|
+
EvidenceReference,
|
|
16
|
+
GateCategory,
|
|
17
|
+
GateFinding,
|
|
18
|
+
GateImpact,
|
|
19
|
+
GateSeverity,
|
|
20
|
+
RepairKind,
|
|
21
|
+
)
|
|
22
|
+
from ..common import build_finding
|
|
23
|
+
from .._ast_helpers import collect_string_constant_line_ranges
|
|
24
|
+
import logging
|
|
25
|
+
_log = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Cluster 34: Unreachable Code
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
# Terminator keywords per language
|
|
33
|
+
_TERMINATORS = {
|
|
34
|
+
"python": {"return", "raise", "break", "continue"},
|
|
35
|
+
"javascript": {"return", "throw", "break", "continue"},
|
|
36
|
+
"typescript": {"return", "throw", "break", "continue"},
|
|
37
|
+
"go": {"return", "panic", "break", "continue"},
|
|
38
|
+
"rust": {"return", "panic!", "break", "continue"},
|
|
39
|
+
"java": {"return", "throw", "break", "continue"},
|
|
40
|
+
"csharp": {"return", "throw", "break", "continue"},
|
|
41
|
+
"kotlin": {"return", "throw", "break", "continue"},
|
|
42
|
+
"ruby": {"return", "raise", "break", "next"},
|
|
43
|
+
"swift": {"return", "throw", "break", "continue"},
|
|
44
|
+
"php": {"return", "throw", "break", "continue"},
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Lines that legitimately follow a terminator at the same/less indent
|
|
48
|
+
_POST_TERMINATOR_OK = {
|
|
49
|
+
"except", "except:", "elif", "else", "else:", "finally", "finally:",
|
|
50
|
+
"catch", "case", "default", "default:", "}", "end", "rescue", "ensure",
|
|
51
|
+
"elif:", "elseif", "elsif",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def assess_unreachable_code(
|
|
56
|
+
file_path: str,
|
|
57
|
+
content: str,
|
|
58
|
+
) -> list[GateFinding]:
|
|
59
|
+
"""Cluster 34: Detect code after return/raise/throw/break in same block."""
|
|
60
|
+
if not content.strip():
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
lang = detect_language(file_path)
|
|
64
|
+
terminators = _TERMINATORS.get(lang)
|
|
65
|
+
if not terminators:
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
lines = content.splitlines()
|
|
69
|
+
findings: list[GateFinding] = []
|
|
70
|
+
|
|
71
|
+
# F14a: for Python, skip lines that live inside a string constant
|
|
72
|
+
# (test fixtures containing `return x\n dead_line()` etc.). For
|
|
73
|
+
# non-Python languages this helper returns an empty set (ast.parse
|
|
74
|
+
# fails), preserving prior behavior.
|
|
75
|
+
string_literal_lines: frozenset[int] = (
|
|
76
|
+
collect_string_constant_line_ranges(content) if lang == "python" else frozenset()
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def _indent(line: str) -> int:
|
|
80
|
+
return len(line) - len(line.lstrip())
|
|
81
|
+
|
|
82
|
+
i = 0
|
|
83
|
+
while i < len(lines) - 1:
|
|
84
|
+
# F14a: skip terminator candidate lines that are inside a string literal.
|
|
85
|
+
if (i + 1) in string_literal_lines:
|
|
86
|
+
i += 1
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
stripped = lines[i].strip()
|
|
90
|
+
|
|
91
|
+
first_word = stripped.split("(")[0].split(" ")[0].rstrip(";")
|
|
92
|
+
if first_word in terminators and not stripped.startswith("#") and not stripped.startswith("//"):
|
|
93
|
+
term_indent = _indent(lines[i])
|
|
94
|
+
for j in range(i + 1, min(i + 5, len(lines))):
|
|
95
|
+
next_line = lines[j]
|
|
96
|
+
if not next_line.strip():
|
|
97
|
+
continue
|
|
98
|
+
# F14a: also skip the follow-up line if it's inside a string literal.
|
|
99
|
+
if (j + 1) in string_literal_lines:
|
|
100
|
+
continue
|
|
101
|
+
next_stripped = next_line.strip()
|
|
102
|
+
next_indent = _indent(next_line)
|
|
103
|
+
if next_indent > term_indent:
|
|
104
|
+
break
|
|
105
|
+
if next_indent == term_indent:
|
|
106
|
+
first_next = next_stripped.split("(")[0].split(" ")[0].rstrip(":;")
|
|
107
|
+
if first_next.lower() not in _POST_TERMINATOR_OK and not next_stripped.startswith(("#", "//", "/*", "*", "@")):
|
|
108
|
+
detail = f"Unreachable code after '{first_word}' at line {i + 1}: {next_stripped[:60]}"
|
|
109
|
+
findings.append(build_finding(
|
|
110
|
+
check_id="unreachable_scan",
|
|
111
|
+
category=GateCategory.DRIFT,
|
|
112
|
+
title=f"[unreachable_code] {file_path}:{j + 1}",
|
|
113
|
+
severity=GateSeverity.MEDIUM,
|
|
114
|
+
impact=GateImpact.REVISE,
|
|
115
|
+
summary=detail,
|
|
116
|
+
recommendation="Remove or restructure the unreachable code block.",
|
|
117
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
118
|
+
repair_kind=RepairKind.REMOVE_DUPLICATE.value,
|
|
119
|
+
executor_action=f"Remove unreachable code at {file_path}:{j + 1}",
|
|
120
|
+
))
|
|
121
|
+
break
|
|
122
|
+
i += 1
|
|
123
|
+
if len(findings) >= 10:
|
|
124
|
+
break
|
|
125
|
+
|
|
126
|
+
return findings
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Cluster 35: Shadowed Builtins
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
_BUILTINS_BY_LANG: dict[str, set[str]] = {
|
|
134
|
+
"python": {
|
|
135
|
+
"list", "dict", "set", "tuple", "str", "int", "float", "bool",
|
|
136
|
+
"type", "id", "input", "print", "len", "range", "map", "filter",
|
|
137
|
+
"open", "hash", "any", "all", "min", "max", "sum", "sorted",
|
|
138
|
+
"next", "iter", "super", "format", "zip", "enumerate", "abs",
|
|
139
|
+
"round", "bytes", "object", "dir", "vars", "chr", "ord", "hex",
|
|
140
|
+
"oct", "bin", "pow", "repr", "callable", "isinstance", "issubclass",
|
|
141
|
+
"getattr", "setattr", "hasattr", "property", "classmethod",
|
|
142
|
+
"staticmethod", "frozenset", "compile", "eval", "exec", "globals",
|
|
143
|
+
"locals", "breakpoint", "complex",
|
|
144
|
+
"bytearray", "memoryview", "slice", "reversed",
|
|
145
|
+
},
|
|
146
|
+
"javascript": {
|
|
147
|
+
"Array", "Object", "String", "Number", "Boolean", "Function",
|
|
148
|
+
"Symbol", "Map", "Set", "Promise", "Error", "Date", "RegExp",
|
|
149
|
+
"JSON", "Math", "parseInt", "parseFloat", "isNaN", "Infinity",
|
|
150
|
+
"NaN", "undefined", "console", "window", "document", "fetch",
|
|
151
|
+
"setTimeout", "setInterval", "eval", "alert",
|
|
152
|
+
},
|
|
153
|
+
"go": {
|
|
154
|
+
"error", "string", "int", "float64", "bool", "byte", "rune",
|
|
155
|
+
"append", "cap", "close", "copy", "delete", "len", "make",
|
|
156
|
+
"new", "panic", "recover", "print", "println", "true", "false",
|
|
157
|
+
"nil", "iota", "complex64", "complex128",
|
|
158
|
+
},
|
|
159
|
+
}
|
|
160
|
+
_BUILTINS_BY_LANG["typescript"] = _BUILTINS_BY_LANG["javascript"]
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _ast_shadowed_builtins_python(
|
|
164
|
+
content: str,
|
|
165
|
+
builtins: set[str],
|
|
166
|
+
) -> list[tuple[int, str]] | None:
|
|
167
|
+
"""Use AST to find Python names that genuinely shadow builtins.
|
|
168
|
+
|
|
169
|
+
Returns a list of (lineno, name) tuples, or ``None`` when the content
|
|
170
|
+
cannot be parsed (SyntaxError) so the caller can fall back to regex.
|
|
171
|
+
|
|
172
|
+
Skipped (not real shadowing):
|
|
173
|
+
- ``ast.AnnAssign`` inside a class body (dataclass/Pydantic field annotation)
|
|
174
|
+
- Function parameter names (``def f(id: str)`` — legit API surface)
|
|
175
|
+
- Names suppressed with ``# noqa: shadowed_builtin`` on the same line
|
|
176
|
+
|
|
177
|
+
Flagged (real shadowing):
|
|
178
|
+
- Module-level plain assignment: ``id = foo()``
|
|
179
|
+
- Function-local plain assignment: ``def f(): id = 42``
|
|
180
|
+
- ``for`` loop target at any scope: ``for list in items``
|
|
181
|
+
- Import alias: ``from x import list``
|
|
182
|
+
- Function definition whose name shadows a builtin: ``def list():``
|
|
183
|
+
"""
|
|
184
|
+
import ast
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
tree = ast.parse(content)
|
|
188
|
+
except SyntaxError:
|
|
189
|
+
return None # caller falls back to regex
|
|
190
|
+
|
|
191
|
+
source_lines = content.splitlines()
|
|
192
|
+
|
|
193
|
+
def _noqa(lineno: int) -> bool:
|
|
194
|
+
"""Return True if the line carries # noqa: shadowed_builtin."""
|
|
195
|
+
if lineno < 1 or lineno > len(source_lines):
|
|
196
|
+
return False
|
|
197
|
+
line = source_lines[lineno - 1]
|
|
198
|
+
return "noqa: shadowed_builtin" in line
|
|
199
|
+
|
|
200
|
+
hits: list[tuple[int, str]] = []
|
|
201
|
+
|
|
202
|
+
# Collect the set of class body node-ids so we can skip AnnAssign inside them.
|
|
203
|
+
class_body_ids: set[int] = set()
|
|
204
|
+
for node in ast.walk(tree):
|
|
205
|
+
if isinstance(node, ast.ClassDef):
|
|
206
|
+
for child in node.body:
|
|
207
|
+
class_body_ids.add(id(child))
|
|
208
|
+
|
|
209
|
+
# Collect function arg names to skip (parameter annotations are not shadowing).
|
|
210
|
+
param_names_by_funcdef: dict[int, set[str]] = {}
|
|
211
|
+
for node in ast.walk(tree):
|
|
212
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
213
|
+
args = node.args
|
|
214
|
+
all_args = (
|
|
215
|
+
args.args
|
|
216
|
+
+ args.posonlyargs
|
|
217
|
+
+ args.kwonlyargs
|
|
218
|
+
+ ([args.vararg] if args.vararg else [])
|
|
219
|
+
+ ([args.kwarg] if args.kwarg else [])
|
|
220
|
+
)
|
|
221
|
+
param_names_by_funcdef[id(node)] = {a.arg for a in all_args}
|
|
222
|
+
|
|
223
|
+
for node in ast.walk(tree):
|
|
224
|
+
# --- AnnAssign: skip if it's inside a class body ---
|
|
225
|
+
if isinstance(node, ast.AnnAssign):
|
|
226
|
+
if id(node) in class_body_ids:
|
|
227
|
+
continue # dataclass / Pydantic field — not real shadowing
|
|
228
|
+
if isinstance(node.target, ast.Name):
|
|
229
|
+
name = node.target.id
|
|
230
|
+
lineno = node.lineno
|
|
231
|
+
if name in builtins and not _noqa(lineno):
|
|
232
|
+
hits.append((lineno, name))
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
# --- Plain assignment (Assign / AugAssign / NamedExpr) ---
|
|
236
|
+
if isinstance(node, ast.Assign):
|
|
237
|
+
for target in node.targets:
|
|
238
|
+
if isinstance(target, ast.Name):
|
|
239
|
+
name = target.id
|
|
240
|
+
lineno = node.lineno
|
|
241
|
+
if name in builtins and not _noqa(lineno):
|
|
242
|
+
hits.append((lineno, name))
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
if isinstance(node, ast.AugAssign):
|
|
246
|
+
if isinstance(node.target, ast.Name):
|
|
247
|
+
name = node.target.id
|
|
248
|
+
lineno = node.lineno
|
|
249
|
+
if name in builtins and not _noqa(lineno):
|
|
250
|
+
hits.append((lineno, name))
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
if isinstance(node, ast.NamedExpr):
|
|
254
|
+
if isinstance(node.target, ast.Name):
|
|
255
|
+
name = node.target.id
|
|
256
|
+
lineno = node.lineno
|
|
257
|
+
if name in builtins and not _noqa(lineno):
|
|
258
|
+
hits.append((lineno, name))
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
# --- For-loop target ---
|
|
262
|
+
if isinstance(node, (ast.For, ast.AsyncFor)):
|
|
263
|
+
if isinstance(node.target, ast.Name):
|
|
264
|
+
name = node.target.id
|
|
265
|
+
lineno = node.lineno
|
|
266
|
+
if name in builtins and not _noqa(lineno):
|
|
267
|
+
hits.append((lineno, name))
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
# --- Import alias ---
|
|
271
|
+
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
|
272
|
+
for alias in node.names:
|
|
273
|
+
bound = alias.asname if alias.asname else alias.name
|
|
274
|
+
lineno = node.lineno
|
|
275
|
+
if bound in builtins and not _noqa(lineno):
|
|
276
|
+
hits.append((lineno, bound))
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
# --- Function / class definition name ---
|
|
280
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
281
|
+
name = node.name
|
|
282
|
+
lineno = node.lineno
|
|
283
|
+
if name in builtins and not _noqa(lineno):
|
|
284
|
+
hits.append((lineno, name))
|
|
285
|
+
# Note: we do NOT flag the parameter names — those are legit API surface.
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
# Deduplicate and sort by line number (ast.walk may visit some nodes twice
|
|
289
|
+
# in edge cases with nested comprehensions).
|
|
290
|
+
seen: set[tuple[int, str]] = set()
|
|
291
|
+
result: list[tuple[int, str]] = []
|
|
292
|
+
for item in sorted(hits, key=lambda x: x[0]):
|
|
293
|
+
if item not in seen:
|
|
294
|
+
seen.add(item)
|
|
295
|
+
result.append(item)
|
|
296
|
+
return result
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def assess_shadowed_builtins(
|
|
300
|
+
file_path: str,
|
|
301
|
+
content: str,
|
|
302
|
+
) -> list[GateFinding]:
|
|
303
|
+
"""Cluster 35: Detect variable names that shadow language builtins."""
|
|
304
|
+
import re
|
|
305
|
+
|
|
306
|
+
if not content.strip():
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
lang = detect_language(file_path)
|
|
310
|
+
builtins = _BUILTINS_BY_LANG.get(lang)
|
|
311
|
+
if not builtins:
|
|
312
|
+
return []
|
|
313
|
+
|
|
314
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
315
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
findings: list[GateFinding] = []
|
|
319
|
+
|
|
320
|
+
if lang == "python":
|
|
321
|
+
# Prefer AST-based detection — precise, no FPs on dataclass fields /
|
|
322
|
+
# function parameters. Returns None on SyntaxError → regex fallback.
|
|
323
|
+
ast_hits = _ast_shadowed_builtins_python(content, builtins)
|
|
324
|
+
if ast_hits is not None:
|
|
325
|
+
# AST parse succeeded; ast_hits is the authoritative list (may be empty).
|
|
326
|
+
for lineno, name in ast_hits:
|
|
327
|
+
detail = f"Variable '{name}' shadows Python builtin (line {lineno})"
|
|
328
|
+
findings.append(build_finding(
|
|
329
|
+
check_id="shadowed_builtin_scan",
|
|
330
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
331
|
+
title=f"[shadowed_builtins] {file_path}:{lineno}",
|
|
332
|
+
severity=GateSeverity.LOW,
|
|
333
|
+
impact=GateImpact.WARN,
|
|
334
|
+
summary=detail,
|
|
335
|
+
recommendation=f"Rename '{name}' to avoid shadowing the Python builtin.",
|
|
336
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
337
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
338
|
+
executor_action=f"Rename shadowed builtin '{name}' at {file_path}:{lineno}",
|
|
339
|
+
))
|
|
340
|
+
if len(findings) >= 10:
|
|
341
|
+
break
|
|
342
|
+
else:
|
|
343
|
+
# SyntaxError in file — best-effort regex fallback.
|
|
344
|
+
source_lines = content.splitlines()
|
|
345
|
+
for i, line in enumerate(source_lines, 1):
|
|
346
|
+
if line.strip().startswith("#"):
|
|
347
|
+
continue
|
|
348
|
+
if "noqa: shadowed_builtin" in line:
|
|
349
|
+
continue
|
|
350
|
+
name = None
|
|
351
|
+
m = re.match(r'^\s*(\w+)\s*=\s*(?!=)', line)
|
|
352
|
+
if m:
|
|
353
|
+
name = m.group(1)
|
|
354
|
+
if not name:
|
|
355
|
+
m = re.match(r'^\s*for\s+(\w+)\s+in\b', line)
|
|
356
|
+
if m:
|
|
357
|
+
name = m.group(1)
|
|
358
|
+
if not name:
|
|
359
|
+
m = re.match(r'^\s*def\s+(\w+)\s*\(', line)
|
|
360
|
+
if m:
|
|
361
|
+
name = m.group(1)
|
|
362
|
+
if name and name in builtins:
|
|
363
|
+
detail = f"Variable '{name}' shadows Python builtin (line {i})"
|
|
364
|
+
findings.append(build_finding(
|
|
365
|
+
check_id="shadowed_builtin_scan",
|
|
366
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
367
|
+
title=f"[shadowed_builtins] {file_path}:{i}",
|
|
368
|
+
severity=GateSeverity.LOW,
|
|
369
|
+
impact=GateImpact.WARN,
|
|
370
|
+
summary=detail,
|
|
371
|
+
recommendation=f"Rename '{name}' to avoid shadowing the Python builtin.",
|
|
372
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
373
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
374
|
+
executor_action=f"Rename shadowed builtin '{name}' at {file_path}:{i}",
|
|
375
|
+
))
|
|
376
|
+
if len(findings) >= 10:
|
|
377
|
+
break
|
|
378
|
+
|
|
379
|
+
elif lang in ("javascript", "typescript"):
|
|
380
|
+
js_re = re.compile(r'^\s*(?:var|let|const)\s+(\w+)\s*=')
|
|
381
|
+
fn_re = re.compile(r'^\s*function\s+(\w+)\s*\(')
|
|
382
|
+
|
|
383
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
384
|
+
if line.strip().startswith("//"):
|
|
385
|
+
continue
|
|
386
|
+
for m in [js_re.match(line), fn_re.match(line)]:
|
|
387
|
+
if m:
|
|
388
|
+
name = m.group(1)
|
|
389
|
+
if name in builtins:
|
|
390
|
+
detail = f"Variable '{name}' shadows JS/TS builtin (line {i})"
|
|
391
|
+
findings.append(build_finding(
|
|
392
|
+
check_id="shadowed_builtin_scan",
|
|
393
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
394
|
+
title=f"[shadowed_builtins] {file_path}:{i}",
|
|
395
|
+
severity=GateSeverity.LOW,
|
|
396
|
+
impact=GateImpact.WARN,
|
|
397
|
+
summary=detail,
|
|
398
|
+
recommendation=f"Rename '{name}' to avoid shadowing the JS/TS builtin.",
|
|
399
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
400
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
401
|
+
executor_action=f"Rename shadowed builtin '{name}' at {file_path}:{i}",
|
|
402
|
+
))
|
|
403
|
+
break
|
|
404
|
+
if len(findings) >= 10:
|
|
405
|
+
break
|
|
406
|
+
|
|
407
|
+
elif lang == "go":
|
|
408
|
+
go_re = re.compile(r'^\s*(?:var\s+)?(\w+)\s*:?=')
|
|
409
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
410
|
+
if line.strip().startswith("//"):
|
|
411
|
+
continue
|
|
412
|
+
m = go_re.match(line)
|
|
413
|
+
if m and m.group(1) in builtins:
|
|
414
|
+
detail = f"Variable '{m.group(1)}' shadows Go builtin (line {i})"
|
|
415
|
+
findings.append(build_finding(
|
|
416
|
+
check_id="shadowed_builtin_scan",
|
|
417
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
418
|
+
title=f"[shadowed_builtins] {file_path}:{i}",
|
|
419
|
+
severity=GateSeverity.LOW,
|
|
420
|
+
impact=GateImpact.WARN,
|
|
421
|
+
summary=detail,
|
|
422
|
+
recommendation=f"Rename '{m.group(1)}' to avoid shadowing the Go builtin.",
|
|
423
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
424
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
425
|
+
executor_action=f"Rename shadowed builtin '{m.group(1)}' at {file_path}:{i}",
|
|
426
|
+
))
|
|
427
|
+
if len(findings) >= 10:
|
|
428
|
+
break
|
|
429
|
+
|
|
430
|
+
return findings
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ---------------------------------------------------------------------------
|
|
434
|
+
# Cluster 36: Mutable Default Arguments
|
|
435
|
+
# ---------------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def assess_mutable_defaults(
|
|
439
|
+
file_path: str,
|
|
440
|
+
content: str,
|
|
441
|
+
) -> list[GateFinding]:
|
|
442
|
+
"""Cluster 36: Detect mutable default arguments in function signatures."""
|
|
443
|
+
import re
|
|
444
|
+
|
|
445
|
+
if not content.strip():
|
|
446
|
+
return []
|
|
447
|
+
|
|
448
|
+
lang = detect_language(file_path)
|
|
449
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
450
|
+
return []
|
|
451
|
+
|
|
452
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
453
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
454
|
+
return []
|
|
455
|
+
|
|
456
|
+
findings: list[GateFinding] = []
|
|
457
|
+
|
|
458
|
+
if lang == "python":
|
|
459
|
+
mutable_re = re.compile(
|
|
460
|
+
r'(\w+)\s*(?::\s*\w[^=]*)?\s*=\s*(\[\]|\{\}|set\(\)|list\(\)|dict\(\)|bytearray\(\))'
|
|
461
|
+
)
|
|
462
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
463
|
+
if not line.strip().startswith("def "):
|
|
464
|
+
continue
|
|
465
|
+
sig = line
|
|
466
|
+
j = i
|
|
467
|
+
all_lines = content.splitlines()
|
|
468
|
+
while sig.count("(") > sig.count(")") and j < min(i + 10, len(all_lines)):
|
|
469
|
+
j += 1
|
|
470
|
+
sig += " " + all_lines[j - 1]
|
|
471
|
+
for m in mutable_re.finditer(sig):
|
|
472
|
+
detail = f"Mutable default argument '{m.group(1)}={m.group(2)}' (line {i})"
|
|
473
|
+
findings.append(build_finding(
|
|
474
|
+
check_id="mutable_default_scan",
|
|
475
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
476
|
+
title=f"[mutable_defaults] {file_path}:{i}",
|
|
477
|
+
severity=GateSeverity.MEDIUM,
|
|
478
|
+
impact=GateImpact.REVISE,
|
|
479
|
+
summary=detail,
|
|
480
|
+
recommendation=f"Use None as default and initialize inside the function: `if {m.group(1)} is None: {m.group(1)} = {m.group(2)}`",
|
|
481
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
482
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
483
|
+
executor_action=f"Fix mutable default at {file_path}:{i}",
|
|
484
|
+
))
|
|
485
|
+
if len(findings) >= 10:
|
|
486
|
+
break
|
|
487
|
+
|
|
488
|
+
elif lang in ("javascript", "typescript"):
|
|
489
|
+
js_mutable_re = re.compile(r'(\w+)\s*=\s*(\[\]|\{\})')
|
|
490
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
491
|
+
stripped = line.strip()
|
|
492
|
+
if "function" in stripped or "=>" in stripped or stripped.startswith("("):
|
|
493
|
+
for m in js_mutable_re.finditer(line):
|
|
494
|
+
detail = f"Mutable default argument '{m.group(1)} = {m.group(2)}' (line {i})"
|
|
495
|
+
findings.append(build_finding(
|
|
496
|
+
check_id="mutable_default_scan",
|
|
497
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
498
|
+
title=f"[mutable_defaults] {file_path}:{i}",
|
|
499
|
+
severity=GateSeverity.MEDIUM,
|
|
500
|
+
impact=GateImpact.REVISE,
|
|
501
|
+
summary=detail,
|
|
502
|
+
recommendation="Use null/undefined as default and initialize inside the function.",
|
|
503
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
504
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
505
|
+
executor_action=f"Fix mutable default at {file_path}:{i}",
|
|
506
|
+
))
|
|
507
|
+
if len(findings) >= 10:
|
|
508
|
+
break
|
|
509
|
+
|
|
510
|
+
return findings
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
# ---------------------------------------------------------------------------
|
|
514
|
+
# Cluster 37: Resource Leaks
|
|
515
|
+
# ---------------------------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def assess_resource_leaks(
|
|
519
|
+
file_path: str,
|
|
520
|
+
content: str,
|
|
521
|
+
) -> list[GateFinding]:
|
|
522
|
+
"""Cluster 37: Detect unclosed resources (file handles, connections)."""
|
|
523
|
+
import re
|
|
524
|
+
|
|
525
|
+
if not content.strip():
|
|
526
|
+
return []
|
|
527
|
+
|
|
528
|
+
lang = detect_language(file_path)
|
|
529
|
+
if lang not in ("python", "go", "java"):
|
|
530
|
+
return []
|
|
531
|
+
|
|
532
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
533
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
534
|
+
return []
|
|
535
|
+
|
|
536
|
+
findings: list[GateFinding] = []
|
|
537
|
+
|
|
538
|
+
if lang == "python":
|
|
539
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
540
|
+
stripped = line.strip()
|
|
541
|
+
if stripped.startswith("#"):
|
|
542
|
+
continue
|
|
543
|
+
if re.search(r'\w+\s*=\s*open\s*\(', stripped) and not stripped.startswith("with "):
|
|
544
|
+
detail = f"open() without `with` statement (line {i}): use `with open(...) as f:` instead"
|
|
545
|
+
findings.append(build_finding(
|
|
546
|
+
check_id="resource_leak_scan",
|
|
547
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
548
|
+
title=f"[resource_leaks] {file_path}:{i}",
|
|
549
|
+
severity=GateSeverity.MEDIUM,
|
|
550
|
+
impact=GateImpact.REVISE,
|
|
551
|
+
summary=detail,
|
|
552
|
+
recommendation="Use `with open(...) as f:` to ensure the file is closed.",
|
|
553
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
554
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
555
|
+
executor_action=f"Fix resource leak at {file_path}:{i}",
|
|
556
|
+
))
|
|
557
|
+
if re.search(r'\w+\s*=\s*(?:sqlite3\.connect|socket\.socket|urllib\.\w+\.urlopen)\s*\(', stripped) and not stripped.startswith("with "):
|
|
558
|
+
detail = f"Resource opened without `with` statement (line {i})"
|
|
559
|
+
findings.append(build_finding(
|
|
560
|
+
check_id="resource_leak_scan",
|
|
561
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
562
|
+
title=f"[resource_leaks] {file_path}:{i}",
|
|
563
|
+
severity=GateSeverity.MEDIUM,
|
|
564
|
+
impact=GateImpact.REVISE,
|
|
565
|
+
summary=detail,
|
|
566
|
+
recommendation="Use a context manager (`with` statement) to ensure the resource is closed.",
|
|
567
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
568
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
569
|
+
executor_action=f"Fix resource leak at {file_path}:{i}",
|
|
570
|
+
))
|
|
571
|
+
if len(findings) >= 10:
|
|
572
|
+
break
|
|
573
|
+
|
|
574
|
+
elif lang == "go":
|
|
575
|
+
lines = content.splitlines()
|
|
576
|
+
for i, line in enumerate(lines, 1):
|
|
577
|
+
m = re.search(r'(\w+)\s*,\s*\w+\s*:?=\s*os\.(Open|Create|OpenFile)\s*\(', line)
|
|
578
|
+
if m:
|
|
579
|
+
var_name = m.group(1)
|
|
580
|
+
has_defer = False
|
|
581
|
+
for j in range(i, min(i + 5, len(lines))):
|
|
582
|
+
if f"defer {var_name}.Close()" in lines[j]:
|
|
583
|
+
has_defer = True
|
|
584
|
+
break
|
|
585
|
+
if not has_defer:
|
|
586
|
+
detail = f"os.{m.group(2)}() without `defer {var_name}.Close()` (line {i})"
|
|
587
|
+
findings.append(build_finding(
|
|
588
|
+
check_id="resource_leak_scan",
|
|
589
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
590
|
+
title=f"[resource_leaks] {file_path}:{i}",
|
|
591
|
+
severity=GateSeverity.MEDIUM,
|
|
592
|
+
impact=GateImpact.REVISE,
|
|
593
|
+
summary=detail,
|
|
594
|
+
recommendation=f"Add `defer {var_name}.Close()` immediately after opening.",
|
|
595
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
596
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
597
|
+
executor_action=f"Fix resource leak at {file_path}:{i}",
|
|
598
|
+
))
|
|
599
|
+
if len(findings) >= 10:
|
|
600
|
+
break
|
|
601
|
+
|
|
602
|
+
elif lang == "java":
|
|
603
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
604
|
+
stripped = line.strip()
|
|
605
|
+
if re.search(r'new\s+(?:FileInputStream|FileOutputStream|BufferedReader|FileReader|FileWriter|Socket)\s*\(', stripped):
|
|
606
|
+
if "try" not in stripped:
|
|
607
|
+
detail = f"Resource created without try-with-resources (line {i})"
|
|
608
|
+
findings.append(build_finding(
|
|
609
|
+
check_id="resource_leak_scan",
|
|
610
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
611
|
+
title=f"[resource_leaks] {file_path}:{i}",
|
|
612
|
+
severity=GateSeverity.MEDIUM,
|
|
613
|
+
impact=GateImpact.REVISE,
|
|
614
|
+
summary=detail,
|
|
615
|
+
recommendation="Use try-with-resources to ensure the resource is closed.",
|
|
616
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
617
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
618
|
+
executor_action=f"Fix resource leak at {file_path}:{i}",
|
|
619
|
+
))
|
|
620
|
+
if len(findings) >= 10:
|
|
621
|
+
break
|
|
622
|
+
|
|
623
|
+
return findings
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
# ---------------------------------------------------------------------------
|
|
627
|
+
# Cluster 38: Docstring/Signature Parameter Drift
|
|
628
|
+
# ---------------------------------------------------------------------------
|
|
629
|
+
|
|
630
|
+
# FP-round2-C (2026-06-28): Google-style section headers that are NOT params.
|
|
631
|
+
# The old ``Args:\s*\n((?:\s+\w+.*\n)*)`` capture ran past the Args block into
|
|
632
|
+
# the following ``Returns:`` / ``Raises:`` / ``Yields:`` sections (separated by a
|
|
633
|
+
# blank line that the greedy ``.*\n`` re-absorbed), so section headers like
|
|
634
|
+
# ``Returns`` / ``Raises`` / ``RuntimeError`` were mis-parsed as documented
|
|
635
|
+
# params and reported as drift on every Google-style docstring (mcp). We now
|
|
636
|
+
# parse the Args block line-by-line, stopping at a blank line or the next
|
|
637
|
+
# section header, and filter these keywords defensively.
|
|
638
|
+
_DOCSTRING_SECTION_HEADERS = frozenset({
|
|
639
|
+
"Args", "Arguments", "Parameters", "Returns", "Return", "Yields", "Yield",
|
|
640
|
+
"Raises", "Raise", "Examples", "Example", "Note", "Notes", "Warning",
|
|
641
|
+
"Warnings", "See", "References", "Attributes", "Todo",
|
|
642
|
+
})
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _extract_documented_params(docstring: str) -> list[str]:
|
|
646
|
+
"""Extract the parameter NAMES a docstring documents.
|
|
647
|
+
|
|
648
|
+
Supports reStructuredText ``:param name:`` and Google-style ``Args:``
|
|
649
|
+
blocks. For the Google block we read only the indented lines immediately
|
|
650
|
+
under ``Args:`` and STOP at the first blank line or the next section header
|
|
651
|
+
(``Returns:`` / ``Raises:`` / ...), so section headers are never mistaken
|
|
652
|
+
for parameter names. Section-header keywords are also filtered defensively.
|
|
653
|
+
"""
|
|
654
|
+
import re
|
|
655
|
+
|
|
656
|
+
# reStructuredText form. Supports both ``:param name:`` and the
|
|
657
|
+
# type-prefixed ``:param <type> name:`` variant (e.g.
|
|
658
|
+
# ``:param futures.Executor | None value:``) where the NAME is the last
|
|
659
|
+
# identifier before the closing colon — capturing the first token there
|
|
660
|
+
# would misread the type (``futures``) as the param (FP-round2-C).
|
|
661
|
+
rst: list[str] = []
|
|
662
|
+
for body in re.findall(r':param\s+([^:]+):', docstring):
|
|
663
|
+
tokens = re.findall(r'[A-Za-z_]\w*', body)
|
|
664
|
+
if tokens:
|
|
665
|
+
rst.append(tokens[-1])
|
|
666
|
+
if rst:
|
|
667
|
+
return rst
|
|
668
|
+
|
|
669
|
+
# Google-style ``Args:`` block.
|
|
670
|
+
lines = docstring.splitlines()
|
|
671
|
+
params: list[str] = []
|
|
672
|
+
in_args = False
|
|
673
|
+
for raw in lines:
|
|
674
|
+
line = raw.rstrip()
|
|
675
|
+
stripped = line.strip()
|
|
676
|
+
header = stripped[:-1] if stripped.endswith(":") else stripped
|
|
677
|
+
is_section_header = header in _DOCSTRING_SECTION_HEADERS and (
|
|
678
|
+
stripped.endswith(":") or stripped == header
|
|
679
|
+
)
|
|
680
|
+
if not in_args:
|
|
681
|
+
if stripped in ("Args:", "Arguments:", "Parameters:"):
|
|
682
|
+
in_args = True
|
|
683
|
+
continue
|
|
684
|
+
# Inside the Args block.
|
|
685
|
+
if stripped == "":
|
|
686
|
+
break # blank line ends the block
|
|
687
|
+
if is_section_header:
|
|
688
|
+
break # next section (Returns:/Raises:/...) ends the block
|
|
689
|
+
m = re.match(r'(\w+)\s*(?:\([^)]*\))?\s*:', line.strip())
|
|
690
|
+
if m:
|
|
691
|
+
name = m.group(1)
|
|
692
|
+
if name not in _DOCSTRING_SECTION_HEADERS:
|
|
693
|
+
params.append(name)
|
|
694
|
+
return params
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def assess_docstring_params(
|
|
698
|
+
file_path: str,
|
|
699
|
+
content: str,
|
|
700
|
+
) -> list[GateFinding]:
|
|
701
|
+
"""Cluster 38: Detect mismatch between function parameters and docstring."""
|
|
702
|
+
import re
|
|
703
|
+
|
|
704
|
+
if not content.strip():
|
|
705
|
+
return []
|
|
706
|
+
|
|
707
|
+
lang = detect_language(file_path)
|
|
708
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
709
|
+
return []
|
|
710
|
+
|
|
711
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
712
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
713
|
+
return []
|
|
714
|
+
|
|
715
|
+
findings: list[GateFinding] = []
|
|
716
|
+
|
|
717
|
+
if lang == "python":
|
|
718
|
+
# FP-round2-C (2026-06-28): AST-based parameter extraction.
|
|
719
|
+
#
|
|
720
|
+
# The old detector used ``def\s+\w+\s*\(([^)]*)\)`` to grab the param
|
|
721
|
+
# text. ``[^)]*`` stops at the FIRST ``)`` — which is wrong for any
|
|
722
|
+
# type-annotated signature (``f: t.Callable[..., t.Any]``) and any
|
|
723
|
+
# multi-line / overloaded signature. That truncation produced garbage
|
|
724
|
+
# "params" like ``t.Any]`` / ``str]]`` and even leaked inline-comment
|
|
725
|
+
# fragments, yielding 16 false mismatches on click alone (all artifacts,
|
|
726
|
+
# zero real drift).
|
|
727
|
+
#
|
|
728
|
+
# We now parse the file with ``ast`` and read real parameter names from
|
|
729
|
+
# ``node.args`` (posonly + args + kwonly; ``*args`` / ``**kwargs`` and
|
|
730
|
+
# ``self`` / ``cls`` excluded — those are conventionally undocumented).
|
|
731
|
+
# We only flag the GENUINE, actionable drift direction:
|
|
732
|
+
# * a parameter DOCUMENTED in the docstring that the function does NOT
|
|
733
|
+
# accept ("extra in docs" — a renamed / removed / typo'd param).
|
|
734
|
+
# We deliberately do NOT flag "param present but undocumented": partial
|
|
735
|
+
# parameter docs are ubiquitous and not a defect, and that direction was
|
|
736
|
+
# the source of most remaining noise.
|
|
737
|
+
import ast as _ast
|
|
738
|
+
|
|
739
|
+
try:
|
|
740
|
+
_tree = _ast.parse(content)
|
|
741
|
+
except SyntaxError:
|
|
742
|
+
return findings
|
|
743
|
+
|
|
744
|
+
def _sig_param_names(fn) -> set[str]:
|
|
745
|
+
"""Return EVERY name the signature accepts.
|
|
746
|
+
|
|
747
|
+
We include ``self`` / ``cls`` (a real param may legitimately be
|
|
748
|
+
named ``cls`` — e.g. click's free function ``add_completion_class``),
|
|
749
|
+
and crucially ``*args`` / ``**kwargs`` names, because idiomatic
|
|
750
|
+
docstrings document var-positional / var-keyword params by their
|
|
751
|
+
bare name (``:param param_decls:`` for ``*param_decls``). Since this
|
|
752
|
+
gate only flags the "documented-but-not-a-parameter" direction, an
|
|
753
|
+
over-inclusive accepted set can only SUPPRESS false positives, never
|
|
754
|
+
create them. FP-round2-C (2026-06-28).
|
|
755
|
+
"""
|
|
756
|
+
a = fn.args
|
|
757
|
+
names: list[str] = [arg.arg for arg in (
|
|
758
|
+
list(a.posonlyargs) + list(a.args) + list(a.kwonlyargs)
|
|
759
|
+
)]
|
|
760
|
+
if a.vararg is not None:
|
|
761
|
+
names.append(a.vararg.arg)
|
|
762
|
+
if a.kwarg is not None:
|
|
763
|
+
names.append(a.kwarg.arg)
|
|
764
|
+
return set(names)
|
|
765
|
+
|
|
766
|
+
for fn in _ast.walk(_tree):
|
|
767
|
+
if not isinstance(fn, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
|
|
768
|
+
continue
|
|
769
|
+
actual_params = _sig_param_names(fn)
|
|
770
|
+
|
|
771
|
+
docstring = _ast.get_docstring(fn, clean=False)
|
|
772
|
+
if not docstring:
|
|
773
|
+
continue
|
|
774
|
+
doc_params = _extract_documented_params(docstring)
|
|
775
|
+
if not doc_params:
|
|
776
|
+
continue
|
|
777
|
+
|
|
778
|
+
extra_in_doc = set(doc_params) - actual_params
|
|
779
|
+
if not extra_in_doc:
|
|
780
|
+
continue
|
|
781
|
+
|
|
782
|
+
func_name = fn.name
|
|
783
|
+
line_num = fn.lineno
|
|
784
|
+
detail = (
|
|
785
|
+
f"Docstring/signature mismatch in {func_name}(): "
|
|
786
|
+
f"documented but not a parameter: {', '.join(sorted(extra_in_doc))}"
|
|
787
|
+
)
|
|
788
|
+
findings.append(build_finding(
|
|
789
|
+
check_id="docstring_param_scan",
|
|
790
|
+
category=GateCategory.REPORTING,
|
|
791
|
+
title=f"[docstring_drift] {file_path}:{line_num}:{func_name}",
|
|
792
|
+
severity=GateSeverity.LOW,
|
|
793
|
+
impact=GateImpact.WARN,
|
|
794
|
+
summary=detail,
|
|
795
|
+
recommendation=f"Update docstring for {func_name}() to match actual parameters.",
|
|
796
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
797
|
+
repair_kind=RepairKind.ADD_PROOF.value,
|
|
798
|
+
executor_action=f"Fix docstring drift in {func_name}() at {file_path}:{line_num}",
|
|
799
|
+
))
|
|
800
|
+
if len(findings) >= 10:
|
|
801
|
+
break
|
|
802
|
+
|
|
803
|
+
elif lang in ("javascript", "typescript"):
|
|
804
|
+
blocks = re.finditer(r'/\*\*(.*?)\*/\s*(?:(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\))', content, re.DOTALL)
|
|
805
|
+
for m in blocks:
|
|
806
|
+
jsdoc = m.group(1)
|
|
807
|
+
func_name = m.group(2) or m.group(4) or "anonymous"
|
|
808
|
+
params_str = m.group(3) or m.group(5) or ""
|
|
809
|
+
doc_params = re.findall(r'@param\s+(?:\{[^}]*\}\s+)?(\w+)', jsdoc)
|
|
810
|
+
actual_params = [p.strip().split("=")[0].split(":")[0].strip()
|
|
811
|
+
for p in params_str.split(",") if p.strip()]
|
|
812
|
+
actual_params = [p for p in actual_params if p and not p.startswith("...")]
|
|
813
|
+
if not doc_params or not actual_params:
|
|
814
|
+
continue
|
|
815
|
+
|
|
816
|
+
line_num = content[:m.start()].count("\n") + 1
|
|
817
|
+
if set(doc_params) != set(actual_params):
|
|
818
|
+
detail = f"JSDoc/signature mismatch in {func_name}()"
|
|
819
|
+
findings.append(build_finding(
|
|
820
|
+
check_id="docstring_param_scan",
|
|
821
|
+
category=GateCategory.REPORTING,
|
|
822
|
+
title=f"[docstring_drift] {file_path}:{line_num}:{func_name}",
|
|
823
|
+
severity=GateSeverity.LOW,
|
|
824
|
+
impact=GateImpact.WARN,
|
|
825
|
+
summary=detail,
|
|
826
|
+
recommendation=f"Update JSDoc for {func_name}() to match actual parameters.",
|
|
827
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
828
|
+
repair_kind=RepairKind.ADD_PROOF.value,
|
|
829
|
+
executor_action=f"Fix JSDoc drift in {func_name}() at {file_path}:{line_num}",
|
|
830
|
+
))
|
|
831
|
+
if len(findings) >= 10:
|
|
832
|
+
break
|
|
833
|
+
|
|
834
|
+
return findings
|