vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,834 @@
1
+ """Static code analysis: unreachable code, shadowed builtins, mutable defaults,
2
+ resource leaks, docstring drift. Clusters 34-38.
3
+
4
+ Clusters:
5
+ 34 - Unreachable Code
6
+ 35 - Shadowed Builtins
7
+ 36 - Mutable Default Arguments
8
+ 37 - Resource Leaks
9
+ 38 - Docstring/Signature Parameter Drift
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from .core import detect_language
14
+ from ...gate_models import (
15
+ EvidenceReference,
16
+ GateCategory,
17
+ GateFinding,
18
+ GateImpact,
19
+ GateSeverity,
20
+ RepairKind,
21
+ )
22
+ from ..common import build_finding
23
+ from .._ast_helpers import collect_string_constant_line_ranges
24
+ import logging
25
+ _log = logging.getLogger(__name__)
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Cluster 34: Unreachable Code
30
+ # ---------------------------------------------------------------------------
31
+
32
+ # Terminator keywords per language
33
+ _TERMINATORS = {
34
+ "python": {"return", "raise", "break", "continue"},
35
+ "javascript": {"return", "throw", "break", "continue"},
36
+ "typescript": {"return", "throw", "break", "continue"},
37
+ "go": {"return", "panic", "break", "continue"},
38
+ "rust": {"return", "panic!", "break", "continue"},
39
+ "java": {"return", "throw", "break", "continue"},
40
+ "csharp": {"return", "throw", "break", "continue"},
41
+ "kotlin": {"return", "throw", "break", "continue"},
42
+ "ruby": {"return", "raise", "break", "next"},
43
+ "swift": {"return", "throw", "break", "continue"},
44
+ "php": {"return", "throw", "break", "continue"},
45
+ }
46
+
47
+ # Lines that legitimately follow a terminator at the same/less indent
48
+ _POST_TERMINATOR_OK = {
49
+ "except", "except:", "elif", "else", "else:", "finally", "finally:",
50
+ "catch", "case", "default", "default:", "}", "end", "rescue", "ensure",
51
+ "elif:", "elseif", "elsif",
52
+ }
53
+
54
+
55
+ def assess_unreachable_code(
56
+ file_path: str,
57
+ content: str,
58
+ ) -> list[GateFinding]:
59
+ """Cluster 34: Detect code after return/raise/throw/break in same block."""
60
+ if not content.strip():
61
+ return []
62
+
63
+ lang = detect_language(file_path)
64
+ terminators = _TERMINATORS.get(lang)
65
+ if not terminators:
66
+ return []
67
+
68
+ lines = content.splitlines()
69
+ findings: list[GateFinding] = []
70
+
71
+ # F14a: for Python, skip lines that live inside a string constant
72
+ # (test fixtures containing `return x\n dead_line()` etc.). For
73
+ # non-Python languages this helper returns an empty set (ast.parse
74
+ # fails), preserving prior behavior.
75
+ string_literal_lines: frozenset[int] = (
76
+ collect_string_constant_line_ranges(content) if lang == "python" else frozenset()
77
+ )
78
+
79
+ def _indent(line: str) -> int:
80
+ return len(line) - len(line.lstrip())
81
+
82
+ i = 0
83
+ while i < len(lines) - 1:
84
+ # F14a: skip terminator candidate lines that are inside a string literal.
85
+ if (i + 1) in string_literal_lines:
86
+ i += 1
87
+ continue
88
+
89
+ stripped = lines[i].strip()
90
+
91
+ first_word = stripped.split("(")[0].split(" ")[0].rstrip(";")
92
+ if first_word in terminators and not stripped.startswith("#") and not stripped.startswith("//"):
93
+ term_indent = _indent(lines[i])
94
+ for j in range(i + 1, min(i + 5, len(lines))):
95
+ next_line = lines[j]
96
+ if not next_line.strip():
97
+ continue
98
+ # F14a: also skip the follow-up line if it's inside a string literal.
99
+ if (j + 1) in string_literal_lines:
100
+ continue
101
+ next_stripped = next_line.strip()
102
+ next_indent = _indent(next_line)
103
+ if next_indent > term_indent:
104
+ break
105
+ if next_indent == term_indent:
106
+ first_next = next_stripped.split("(")[0].split(" ")[0].rstrip(":;")
107
+ if first_next.lower() not in _POST_TERMINATOR_OK and not next_stripped.startswith(("#", "//", "/*", "*", "@")):
108
+ detail = f"Unreachable code after '{first_word}' at line {i + 1}: {next_stripped[:60]}"
109
+ findings.append(build_finding(
110
+ check_id="unreachable_scan",
111
+ category=GateCategory.DRIFT,
112
+ title=f"[unreachable_code] {file_path}:{j + 1}",
113
+ severity=GateSeverity.MEDIUM,
114
+ impact=GateImpact.REVISE,
115
+ summary=detail,
116
+ recommendation="Remove or restructure the unreachable code block.",
117
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
118
+ repair_kind=RepairKind.REMOVE_DUPLICATE.value,
119
+ executor_action=f"Remove unreachable code at {file_path}:{j + 1}",
120
+ ))
121
+ break
122
+ i += 1
123
+ if len(findings) >= 10:
124
+ break
125
+
126
+ return findings
127
+
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # Cluster 35: Shadowed Builtins
131
+ # ---------------------------------------------------------------------------
132
+
133
+ _BUILTINS_BY_LANG: dict[str, set[str]] = {
134
+ "python": {
135
+ "list", "dict", "set", "tuple", "str", "int", "float", "bool",
136
+ "type", "id", "input", "print", "len", "range", "map", "filter",
137
+ "open", "hash", "any", "all", "min", "max", "sum", "sorted",
138
+ "next", "iter", "super", "format", "zip", "enumerate", "abs",
139
+ "round", "bytes", "object", "dir", "vars", "chr", "ord", "hex",
140
+ "oct", "bin", "pow", "repr", "callable", "isinstance", "issubclass",
141
+ "getattr", "setattr", "hasattr", "property", "classmethod",
142
+ "staticmethod", "frozenset", "compile", "eval", "exec", "globals",
143
+ "locals", "breakpoint", "complex",
144
+ "bytearray", "memoryview", "slice", "reversed",
145
+ },
146
+ "javascript": {
147
+ "Array", "Object", "String", "Number", "Boolean", "Function",
148
+ "Symbol", "Map", "Set", "Promise", "Error", "Date", "RegExp",
149
+ "JSON", "Math", "parseInt", "parseFloat", "isNaN", "Infinity",
150
+ "NaN", "undefined", "console", "window", "document", "fetch",
151
+ "setTimeout", "setInterval", "eval", "alert",
152
+ },
153
+ "go": {
154
+ "error", "string", "int", "float64", "bool", "byte", "rune",
155
+ "append", "cap", "close", "copy", "delete", "len", "make",
156
+ "new", "panic", "recover", "print", "println", "true", "false",
157
+ "nil", "iota", "complex64", "complex128",
158
+ },
159
+ }
160
+ _BUILTINS_BY_LANG["typescript"] = _BUILTINS_BY_LANG["javascript"]
161
+
162
+
163
+ def _ast_shadowed_builtins_python(
164
+ content: str,
165
+ builtins: set[str],
166
+ ) -> list[tuple[int, str]] | None:
167
+ """Use AST to find Python names that genuinely shadow builtins.
168
+
169
+ Returns a list of (lineno, name) tuples, or ``None`` when the content
170
+ cannot be parsed (SyntaxError) so the caller can fall back to regex.
171
+
172
+ Skipped (not real shadowing):
173
+ - ``ast.AnnAssign`` inside a class body (dataclass/Pydantic field annotation)
174
+ - Function parameter names (``def f(id: str)`` — legit API surface)
175
+ - Names suppressed with ``# noqa: shadowed_builtin`` on the same line
176
+
177
+ Flagged (real shadowing):
178
+ - Module-level plain assignment: ``id = foo()``
179
+ - Function-local plain assignment: ``def f(): id = 42``
180
+ - ``for`` loop target at any scope: ``for list in items``
181
+ - Import alias: ``from x import list``
182
+ - Function definition whose name shadows a builtin: ``def list():``
183
+ """
184
+ import ast
185
+
186
+ try:
187
+ tree = ast.parse(content)
188
+ except SyntaxError:
189
+ return None # caller falls back to regex
190
+
191
+ source_lines = content.splitlines()
192
+
193
+ def _noqa(lineno: int) -> bool:
194
+ """Return True if the line carries # noqa: shadowed_builtin."""
195
+ if lineno < 1 or lineno > len(source_lines):
196
+ return False
197
+ line = source_lines[lineno - 1]
198
+ return "noqa: shadowed_builtin" in line
199
+
200
+ hits: list[tuple[int, str]] = []
201
+
202
+ # Collect the set of class body node-ids so we can skip AnnAssign inside them.
203
+ class_body_ids: set[int] = set()
204
+ for node in ast.walk(tree):
205
+ if isinstance(node, ast.ClassDef):
206
+ for child in node.body:
207
+ class_body_ids.add(id(child))
208
+
209
+ # Collect function arg names to skip (parameter annotations are not shadowing).
210
+ param_names_by_funcdef: dict[int, set[str]] = {}
211
+ for node in ast.walk(tree):
212
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
213
+ args = node.args
214
+ all_args = (
215
+ args.args
216
+ + args.posonlyargs
217
+ + args.kwonlyargs
218
+ + ([args.vararg] if args.vararg else [])
219
+ + ([args.kwarg] if args.kwarg else [])
220
+ )
221
+ param_names_by_funcdef[id(node)] = {a.arg for a in all_args}
222
+
223
+ for node in ast.walk(tree):
224
+ # --- AnnAssign: skip if it's inside a class body ---
225
+ if isinstance(node, ast.AnnAssign):
226
+ if id(node) in class_body_ids:
227
+ continue # dataclass / Pydantic field — not real shadowing
228
+ if isinstance(node.target, ast.Name):
229
+ name = node.target.id
230
+ lineno = node.lineno
231
+ if name in builtins and not _noqa(lineno):
232
+ hits.append((lineno, name))
233
+ continue
234
+
235
+ # --- Plain assignment (Assign / AugAssign / NamedExpr) ---
236
+ if isinstance(node, ast.Assign):
237
+ for target in node.targets:
238
+ if isinstance(target, ast.Name):
239
+ name = target.id
240
+ lineno = node.lineno
241
+ if name in builtins and not _noqa(lineno):
242
+ hits.append((lineno, name))
243
+ continue
244
+
245
+ if isinstance(node, ast.AugAssign):
246
+ if isinstance(node.target, ast.Name):
247
+ name = node.target.id
248
+ lineno = node.lineno
249
+ if name in builtins and not _noqa(lineno):
250
+ hits.append((lineno, name))
251
+ continue
252
+
253
+ if isinstance(node, ast.NamedExpr):
254
+ if isinstance(node.target, ast.Name):
255
+ name = node.target.id
256
+ lineno = node.lineno
257
+ if name in builtins and not _noqa(lineno):
258
+ hits.append((lineno, name))
259
+ continue
260
+
261
+ # --- For-loop target ---
262
+ if isinstance(node, (ast.For, ast.AsyncFor)):
263
+ if isinstance(node.target, ast.Name):
264
+ name = node.target.id
265
+ lineno = node.lineno
266
+ if name in builtins and not _noqa(lineno):
267
+ hits.append((lineno, name))
268
+ continue
269
+
270
+ # --- Import alias ---
271
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
272
+ for alias in node.names:
273
+ bound = alias.asname if alias.asname else alias.name
274
+ lineno = node.lineno
275
+ if bound in builtins and not _noqa(lineno):
276
+ hits.append((lineno, bound))
277
+ continue
278
+
279
+ # --- Function / class definition name ---
280
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
281
+ name = node.name
282
+ lineno = node.lineno
283
+ if name in builtins and not _noqa(lineno):
284
+ hits.append((lineno, name))
285
+ # Note: we do NOT flag the parameter names — those are legit API surface.
286
+ continue
287
+
288
+ # Deduplicate and sort by line number (ast.walk may visit some nodes twice
289
+ # in edge cases with nested comprehensions).
290
+ seen: set[tuple[int, str]] = set()
291
+ result: list[tuple[int, str]] = []
292
+ for item in sorted(hits, key=lambda x: x[0]):
293
+ if item not in seen:
294
+ seen.add(item)
295
+ result.append(item)
296
+ return result
297
+
298
+
299
+ def assess_shadowed_builtins(
300
+ file_path: str,
301
+ content: str,
302
+ ) -> list[GateFinding]:
303
+ """Cluster 35: Detect variable names that shadow language builtins."""
304
+ import re
305
+
306
+ if not content.strip():
307
+ return []
308
+
309
+ lang = detect_language(file_path)
310
+ builtins = _BUILTINS_BY_LANG.get(lang)
311
+ if not builtins:
312
+ return []
313
+
314
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
315
+ if basename.startswith("test_") or basename.startswith("conftest"):
316
+ return []
317
+
318
+ findings: list[GateFinding] = []
319
+
320
+ if lang == "python":
321
+ # Prefer AST-based detection — precise, no FPs on dataclass fields /
322
+ # function parameters. Returns None on SyntaxError → regex fallback.
323
+ ast_hits = _ast_shadowed_builtins_python(content, builtins)
324
+ if ast_hits is not None:
325
+ # AST parse succeeded; ast_hits is the authoritative list (may be empty).
326
+ for lineno, name in ast_hits:
327
+ detail = f"Variable '{name}' shadows Python builtin (line {lineno})"
328
+ findings.append(build_finding(
329
+ check_id="shadowed_builtin_scan",
330
+ category=GateCategory.RUNTIME_BEHAVIOR,
331
+ title=f"[shadowed_builtins] {file_path}:{lineno}",
332
+ severity=GateSeverity.LOW,
333
+ impact=GateImpact.WARN,
334
+ summary=detail,
335
+ recommendation=f"Rename '{name}' to avoid shadowing the Python builtin.",
336
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
337
+ repair_kind=RepairKind.FIX_CONTRACT.value,
338
+ executor_action=f"Rename shadowed builtin '{name}' at {file_path}:{lineno}",
339
+ ))
340
+ if len(findings) >= 10:
341
+ break
342
+ else:
343
+ # SyntaxError in file — best-effort regex fallback.
344
+ source_lines = content.splitlines()
345
+ for i, line in enumerate(source_lines, 1):
346
+ if line.strip().startswith("#"):
347
+ continue
348
+ if "noqa: shadowed_builtin" in line:
349
+ continue
350
+ name = None
351
+ m = re.match(r'^\s*(\w+)\s*=\s*(?!=)', line)
352
+ if m:
353
+ name = m.group(1)
354
+ if not name:
355
+ m = re.match(r'^\s*for\s+(\w+)\s+in\b', line)
356
+ if m:
357
+ name = m.group(1)
358
+ if not name:
359
+ m = re.match(r'^\s*def\s+(\w+)\s*\(', line)
360
+ if m:
361
+ name = m.group(1)
362
+ if name and name in builtins:
363
+ detail = f"Variable '{name}' shadows Python builtin (line {i})"
364
+ findings.append(build_finding(
365
+ check_id="shadowed_builtin_scan",
366
+ category=GateCategory.RUNTIME_BEHAVIOR,
367
+ title=f"[shadowed_builtins] {file_path}:{i}",
368
+ severity=GateSeverity.LOW,
369
+ impact=GateImpact.WARN,
370
+ summary=detail,
371
+ recommendation=f"Rename '{name}' to avoid shadowing the Python builtin.",
372
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
373
+ repair_kind=RepairKind.FIX_CONTRACT.value,
374
+ executor_action=f"Rename shadowed builtin '{name}' at {file_path}:{i}",
375
+ ))
376
+ if len(findings) >= 10:
377
+ break
378
+
379
+ elif lang in ("javascript", "typescript"):
380
+ js_re = re.compile(r'^\s*(?:var|let|const)\s+(\w+)\s*=')
381
+ fn_re = re.compile(r'^\s*function\s+(\w+)\s*\(')
382
+
383
+ for i, line in enumerate(content.splitlines(), 1):
384
+ if line.strip().startswith("//"):
385
+ continue
386
+ for m in [js_re.match(line), fn_re.match(line)]:
387
+ if m:
388
+ name = m.group(1)
389
+ if name in builtins:
390
+ detail = f"Variable '{name}' shadows JS/TS builtin (line {i})"
391
+ findings.append(build_finding(
392
+ check_id="shadowed_builtin_scan",
393
+ category=GateCategory.RUNTIME_BEHAVIOR,
394
+ title=f"[shadowed_builtins] {file_path}:{i}",
395
+ severity=GateSeverity.LOW,
396
+ impact=GateImpact.WARN,
397
+ summary=detail,
398
+ recommendation=f"Rename '{name}' to avoid shadowing the JS/TS builtin.",
399
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
400
+ repair_kind=RepairKind.FIX_CONTRACT.value,
401
+ executor_action=f"Rename shadowed builtin '{name}' at {file_path}:{i}",
402
+ ))
403
+ break
404
+ if len(findings) >= 10:
405
+ break
406
+
407
+ elif lang == "go":
408
+ go_re = re.compile(r'^\s*(?:var\s+)?(\w+)\s*:?=')
409
+ for i, line in enumerate(content.splitlines(), 1):
410
+ if line.strip().startswith("//"):
411
+ continue
412
+ m = go_re.match(line)
413
+ if m and m.group(1) in builtins:
414
+ detail = f"Variable '{m.group(1)}' shadows Go builtin (line {i})"
415
+ findings.append(build_finding(
416
+ check_id="shadowed_builtin_scan",
417
+ category=GateCategory.RUNTIME_BEHAVIOR,
418
+ title=f"[shadowed_builtins] {file_path}:{i}",
419
+ severity=GateSeverity.LOW,
420
+ impact=GateImpact.WARN,
421
+ summary=detail,
422
+ recommendation=f"Rename '{m.group(1)}' to avoid shadowing the Go builtin.",
423
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
424
+ repair_kind=RepairKind.FIX_CONTRACT.value,
425
+ executor_action=f"Rename shadowed builtin '{m.group(1)}' at {file_path}:{i}",
426
+ ))
427
+ if len(findings) >= 10:
428
+ break
429
+
430
+ return findings
431
+
432
+
433
+ # ---------------------------------------------------------------------------
434
+ # Cluster 36: Mutable Default Arguments
435
+ # ---------------------------------------------------------------------------
436
+
437
+
438
+ def assess_mutable_defaults(
439
+ file_path: str,
440
+ content: str,
441
+ ) -> list[GateFinding]:
442
+ """Cluster 36: Detect mutable default arguments in function signatures."""
443
+ import re
444
+
445
+ if not content.strip():
446
+ return []
447
+
448
+ lang = detect_language(file_path)
449
+ if lang not in ("python", "javascript", "typescript"):
450
+ return []
451
+
452
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
453
+ if basename.startswith("test_") or basename.startswith("conftest"):
454
+ return []
455
+
456
+ findings: list[GateFinding] = []
457
+
458
+ if lang == "python":
459
+ mutable_re = re.compile(
460
+ r'(\w+)\s*(?::\s*\w[^=]*)?\s*=\s*(\[\]|\{\}|set\(\)|list\(\)|dict\(\)|bytearray\(\))'
461
+ )
462
+ for i, line in enumerate(content.splitlines(), 1):
463
+ if not line.strip().startswith("def "):
464
+ continue
465
+ sig = line
466
+ j = i
467
+ all_lines = content.splitlines()
468
+ while sig.count("(") > sig.count(")") and j < min(i + 10, len(all_lines)):
469
+ j += 1
470
+ sig += " " + all_lines[j - 1]
471
+ for m in mutable_re.finditer(sig):
472
+ detail = f"Mutable default argument '{m.group(1)}={m.group(2)}' (line {i})"
473
+ findings.append(build_finding(
474
+ check_id="mutable_default_scan",
475
+ category=GateCategory.RUNTIME_BEHAVIOR,
476
+ title=f"[mutable_defaults] {file_path}:{i}",
477
+ severity=GateSeverity.MEDIUM,
478
+ impact=GateImpact.REVISE,
479
+ summary=detail,
480
+ recommendation=f"Use None as default and initialize inside the function: `if {m.group(1)} is None: {m.group(1)} = {m.group(2)}`",
481
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
482
+ repair_kind=RepairKind.FIX_CONTRACT.value,
483
+ executor_action=f"Fix mutable default at {file_path}:{i}",
484
+ ))
485
+ if len(findings) >= 10:
486
+ break
487
+
488
+ elif lang in ("javascript", "typescript"):
489
+ js_mutable_re = re.compile(r'(\w+)\s*=\s*(\[\]|\{\})')
490
+ for i, line in enumerate(content.splitlines(), 1):
491
+ stripped = line.strip()
492
+ if "function" in stripped or "=>" in stripped or stripped.startswith("("):
493
+ for m in js_mutable_re.finditer(line):
494
+ detail = f"Mutable default argument '{m.group(1)} = {m.group(2)}' (line {i})"
495
+ findings.append(build_finding(
496
+ check_id="mutable_default_scan",
497
+ category=GateCategory.RUNTIME_BEHAVIOR,
498
+ title=f"[mutable_defaults] {file_path}:{i}",
499
+ severity=GateSeverity.MEDIUM,
500
+ impact=GateImpact.REVISE,
501
+ summary=detail,
502
+ recommendation="Use null/undefined as default and initialize inside the function.",
503
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
504
+ repair_kind=RepairKind.FIX_CONTRACT.value,
505
+ executor_action=f"Fix mutable default at {file_path}:{i}",
506
+ ))
507
+ if len(findings) >= 10:
508
+ break
509
+
510
+ return findings
511
+
512
+
513
+ # ---------------------------------------------------------------------------
514
+ # Cluster 37: Resource Leaks
515
+ # ---------------------------------------------------------------------------
516
+
517
+
518
+ def assess_resource_leaks(
519
+ file_path: str,
520
+ content: str,
521
+ ) -> list[GateFinding]:
522
+ """Cluster 37: Detect unclosed resources (file handles, connections)."""
523
+ import re
524
+
525
+ if not content.strip():
526
+ return []
527
+
528
+ lang = detect_language(file_path)
529
+ if lang not in ("python", "go", "java"):
530
+ return []
531
+
532
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
533
+ if basename.startswith("test_") or basename.startswith("conftest"):
534
+ return []
535
+
536
+ findings: list[GateFinding] = []
537
+
538
+ if lang == "python":
539
+ for i, line in enumerate(content.splitlines(), 1):
540
+ stripped = line.strip()
541
+ if stripped.startswith("#"):
542
+ continue
543
+ if re.search(r'\w+\s*=\s*open\s*\(', stripped) and not stripped.startswith("with "):
544
+ detail = f"open() without `with` statement (line {i}): use `with open(...) as f:` instead"
545
+ findings.append(build_finding(
546
+ check_id="resource_leak_scan",
547
+ category=GateCategory.RUNTIME_BEHAVIOR,
548
+ title=f"[resource_leaks] {file_path}:{i}",
549
+ severity=GateSeverity.MEDIUM,
550
+ impact=GateImpact.REVISE,
551
+ summary=detail,
552
+ recommendation="Use `with open(...) as f:` to ensure the file is closed.",
553
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
554
+ repair_kind=RepairKind.FIX_CONTRACT.value,
555
+ executor_action=f"Fix resource leak at {file_path}:{i}",
556
+ ))
557
+ if re.search(r'\w+\s*=\s*(?:sqlite3\.connect|socket\.socket|urllib\.\w+\.urlopen)\s*\(', stripped) and not stripped.startswith("with "):
558
+ detail = f"Resource opened without `with` statement (line {i})"
559
+ findings.append(build_finding(
560
+ check_id="resource_leak_scan",
561
+ category=GateCategory.RUNTIME_BEHAVIOR,
562
+ title=f"[resource_leaks] {file_path}:{i}",
563
+ severity=GateSeverity.MEDIUM,
564
+ impact=GateImpact.REVISE,
565
+ summary=detail,
566
+ recommendation="Use a context manager (`with` statement) to ensure the resource is closed.",
567
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
568
+ repair_kind=RepairKind.FIX_CONTRACT.value,
569
+ executor_action=f"Fix resource leak at {file_path}:{i}",
570
+ ))
571
+ if len(findings) >= 10:
572
+ break
573
+
574
+ elif lang == "go":
575
+ lines = content.splitlines()
576
+ for i, line in enumerate(lines, 1):
577
+ m = re.search(r'(\w+)\s*,\s*\w+\s*:?=\s*os\.(Open|Create|OpenFile)\s*\(', line)
578
+ if m:
579
+ var_name = m.group(1)
580
+ has_defer = False
581
+ for j in range(i, min(i + 5, len(lines))):
582
+ if f"defer {var_name}.Close()" in lines[j]:
583
+ has_defer = True
584
+ break
585
+ if not has_defer:
586
+ detail = f"os.{m.group(2)}() without `defer {var_name}.Close()` (line {i})"
587
+ findings.append(build_finding(
588
+ check_id="resource_leak_scan",
589
+ category=GateCategory.RUNTIME_BEHAVIOR,
590
+ title=f"[resource_leaks] {file_path}:{i}",
591
+ severity=GateSeverity.MEDIUM,
592
+ impact=GateImpact.REVISE,
593
+ summary=detail,
594
+ recommendation=f"Add `defer {var_name}.Close()` immediately after opening.",
595
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
596
+ repair_kind=RepairKind.FIX_CONTRACT.value,
597
+ executor_action=f"Fix resource leak at {file_path}:{i}",
598
+ ))
599
+ if len(findings) >= 10:
600
+ break
601
+
602
+ elif lang == "java":
603
+ for i, line in enumerate(content.splitlines(), 1):
604
+ stripped = line.strip()
605
+ if re.search(r'new\s+(?:FileInputStream|FileOutputStream|BufferedReader|FileReader|FileWriter|Socket)\s*\(', stripped):
606
+ if "try" not in stripped:
607
+ detail = f"Resource created without try-with-resources (line {i})"
608
+ findings.append(build_finding(
609
+ check_id="resource_leak_scan",
610
+ category=GateCategory.RUNTIME_BEHAVIOR,
611
+ title=f"[resource_leaks] {file_path}:{i}",
612
+ severity=GateSeverity.MEDIUM,
613
+ impact=GateImpact.REVISE,
614
+ summary=detail,
615
+ recommendation="Use try-with-resources to ensure the resource is closed.",
616
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
617
+ repair_kind=RepairKind.FIX_CONTRACT.value,
618
+ executor_action=f"Fix resource leak at {file_path}:{i}",
619
+ ))
620
+ if len(findings) >= 10:
621
+ break
622
+
623
+ return findings
624
+
625
+
626
+ # ---------------------------------------------------------------------------
627
+ # Cluster 38: Docstring/Signature Parameter Drift
628
+ # ---------------------------------------------------------------------------
629
+
630
+ # FP-round2-C (2026-06-28): Google-style section headers that are NOT params.
631
+ # The old ``Args:\s*\n((?:\s+\w+.*\n)*)`` capture ran past the Args block into
632
+ # the following ``Returns:`` / ``Raises:`` / ``Yields:`` sections (separated by a
633
+ # blank line that the greedy ``.*\n`` re-absorbed), so section headers like
634
+ # ``Returns`` / ``Raises`` / ``RuntimeError`` were mis-parsed as documented
635
+ # params and reported as drift on every Google-style docstring (mcp). We now
636
+ # parse the Args block line-by-line, stopping at a blank line or the next
637
+ # section header, and filter these keywords defensively.
638
+ _DOCSTRING_SECTION_HEADERS = frozenset({
639
+ "Args", "Arguments", "Parameters", "Returns", "Return", "Yields", "Yield",
640
+ "Raises", "Raise", "Examples", "Example", "Note", "Notes", "Warning",
641
+ "Warnings", "See", "References", "Attributes", "Todo",
642
+ })
643
+
644
+
645
+ def _extract_documented_params(docstring: str) -> list[str]:
646
+ """Extract the parameter NAMES a docstring documents.
647
+
648
+ Supports reStructuredText ``:param name:`` and Google-style ``Args:``
649
+ blocks. For the Google block we read only the indented lines immediately
650
+ under ``Args:`` and STOP at the first blank line or the next section header
651
+ (``Returns:`` / ``Raises:`` / ...), so section headers are never mistaken
652
+ for parameter names. Section-header keywords are also filtered defensively.
653
+ """
654
+ import re
655
+
656
+ # reStructuredText form. Supports both ``:param name:`` and the
657
+ # type-prefixed ``:param <type> name:`` variant (e.g.
658
+ # ``:param futures.Executor | None value:``) where the NAME is the last
659
+ # identifier before the closing colon — capturing the first token there
660
+ # would misread the type (``futures``) as the param (FP-round2-C).
661
+ rst: list[str] = []
662
+ for body in re.findall(r':param\s+([^:]+):', docstring):
663
+ tokens = re.findall(r'[A-Za-z_]\w*', body)
664
+ if tokens:
665
+ rst.append(tokens[-1])
666
+ if rst:
667
+ return rst
668
+
669
+ # Google-style ``Args:`` block.
670
+ lines = docstring.splitlines()
671
+ params: list[str] = []
672
+ in_args = False
673
+ for raw in lines:
674
+ line = raw.rstrip()
675
+ stripped = line.strip()
676
+ header = stripped[:-1] if stripped.endswith(":") else stripped
677
+ is_section_header = header in _DOCSTRING_SECTION_HEADERS and (
678
+ stripped.endswith(":") or stripped == header
679
+ )
680
+ if not in_args:
681
+ if stripped in ("Args:", "Arguments:", "Parameters:"):
682
+ in_args = True
683
+ continue
684
+ # Inside the Args block.
685
+ if stripped == "":
686
+ break # blank line ends the block
687
+ if is_section_header:
688
+ break # next section (Returns:/Raises:/...) ends the block
689
+ m = re.match(r'(\w+)\s*(?:\([^)]*\))?\s*:', line.strip())
690
+ if m:
691
+ name = m.group(1)
692
+ if name not in _DOCSTRING_SECTION_HEADERS:
693
+ params.append(name)
694
+ return params
695
+
696
+
697
+ def assess_docstring_params(
698
+ file_path: str,
699
+ content: str,
700
+ ) -> list[GateFinding]:
701
+ """Cluster 38: Detect mismatch between function parameters and docstring."""
702
+ import re
703
+
704
+ if not content.strip():
705
+ return []
706
+
707
+ lang = detect_language(file_path)
708
+ if lang not in ("python", "javascript", "typescript"):
709
+ return []
710
+
711
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
712
+ if basename.startswith("test_") or basename.startswith("conftest"):
713
+ return []
714
+
715
+ findings: list[GateFinding] = []
716
+
717
+ if lang == "python":
718
+ # FP-round2-C (2026-06-28): AST-based parameter extraction.
719
+ #
720
+ # The old detector used ``def\s+\w+\s*\(([^)]*)\)`` to grab the param
721
+ # text. ``[^)]*`` stops at the FIRST ``)`` — which is wrong for any
722
+ # type-annotated signature (``f: t.Callable[..., t.Any]``) and any
723
+ # multi-line / overloaded signature. That truncation produced garbage
724
+ # "params" like ``t.Any]`` / ``str]]`` and even leaked inline-comment
725
+ # fragments, yielding 16 false mismatches on click alone (all artifacts,
726
+ # zero real drift).
727
+ #
728
+ # We now parse the file with ``ast`` and read real parameter names from
729
+ # ``node.args`` (posonly + args + kwonly; ``*args`` / ``**kwargs`` and
730
+ # ``self`` / ``cls`` excluded — those are conventionally undocumented).
731
+ # We only flag the GENUINE, actionable drift direction:
732
+ # * a parameter DOCUMENTED in the docstring that the function does NOT
733
+ # accept ("extra in docs" — a renamed / removed / typo'd param).
734
+ # We deliberately do NOT flag "param present but undocumented": partial
735
+ # parameter docs are ubiquitous and not a defect, and that direction was
736
+ # the source of most remaining noise.
737
+ import ast as _ast
738
+
739
+ try:
740
+ _tree = _ast.parse(content)
741
+ except SyntaxError:
742
+ return findings
743
+
744
+ def _sig_param_names(fn) -> set[str]:
745
+ """Return EVERY name the signature accepts.
746
+
747
+ We include ``self`` / ``cls`` (a real param may legitimately be
748
+ named ``cls`` — e.g. click's free function ``add_completion_class``),
749
+ and crucially ``*args`` / ``**kwargs`` names, because idiomatic
750
+ docstrings document var-positional / var-keyword params by their
751
+ bare name (``:param param_decls:`` for ``*param_decls``). Since this
752
+ gate only flags the "documented-but-not-a-parameter" direction, an
753
+ over-inclusive accepted set can only SUPPRESS false positives, never
754
+ create them. FP-round2-C (2026-06-28).
755
+ """
756
+ a = fn.args
757
+ names: list[str] = [arg.arg for arg in (
758
+ list(a.posonlyargs) + list(a.args) + list(a.kwonlyargs)
759
+ )]
760
+ if a.vararg is not None:
761
+ names.append(a.vararg.arg)
762
+ if a.kwarg is not None:
763
+ names.append(a.kwarg.arg)
764
+ return set(names)
765
+
766
+ for fn in _ast.walk(_tree):
767
+ if not isinstance(fn, (_ast.FunctionDef, _ast.AsyncFunctionDef)):
768
+ continue
769
+ actual_params = _sig_param_names(fn)
770
+
771
+ docstring = _ast.get_docstring(fn, clean=False)
772
+ if not docstring:
773
+ continue
774
+ doc_params = _extract_documented_params(docstring)
775
+ if not doc_params:
776
+ continue
777
+
778
+ extra_in_doc = set(doc_params) - actual_params
779
+ if not extra_in_doc:
780
+ continue
781
+
782
+ func_name = fn.name
783
+ line_num = fn.lineno
784
+ detail = (
785
+ f"Docstring/signature mismatch in {func_name}(): "
786
+ f"documented but not a parameter: {', '.join(sorted(extra_in_doc))}"
787
+ )
788
+ findings.append(build_finding(
789
+ check_id="docstring_param_scan",
790
+ category=GateCategory.REPORTING,
791
+ title=f"[docstring_drift] {file_path}:{line_num}:{func_name}",
792
+ severity=GateSeverity.LOW,
793
+ impact=GateImpact.WARN,
794
+ summary=detail,
795
+ recommendation=f"Update docstring for {func_name}() to match actual parameters.",
796
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
797
+ repair_kind=RepairKind.ADD_PROOF.value,
798
+ executor_action=f"Fix docstring drift in {func_name}() at {file_path}:{line_num}",
799
+ ))
800
+ if len(findings) >= 10:
801
+ break
802
+
803
+ elif lang in ("javascript", "typescript"):
804
+ blocks = re.finditer(r'/\*\*(.*?)\*/\s*(?:(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\))', content, re.DOTALL)
805
+ for m in blocks:
806
+ jsdoc = m.group(1)
807
+ func_name = m.group(2) or m.group(4) or "anonymous"
808
+ params_str = m.group(3) or m.group(5) or ""
809
+ doc_params = re.findall(r'@param\s+(?:\{[^}]*\}\s+)?(\w+)', jsdoc)
810
+ actual_params = [p.strip().split("=")[0].split(":")[0].strip()
811
+ for p in params_str.split(",") if p.strip()]
812
+ actual_params = [p for p in actual_params if p and not p.startswith("...")]
813
+ if not doc_params or not actual_params:
814
+ continue
815
+
816
+ line_num = content[:m.start()].count("\n") + 1
817
+ if set(doc_params) != set(actual_params):
818
+ detail = f"JSDoc/signature mismatch in {func_name}()"
819
+ findings.append(build_finding(
820
+ check_id="docstring_param_scan",
821
+ category=GateCategory.REPORTING,
822
+ title=f"[docstring_drift] {file_path}:{line_num}:{func_name}",
823
+ severity=GateSeverity.LOW,
824
+ impact=GateImpact.WARN,
825
+ summary=detail,
826
+ recommendation=f"Update JSDoc for {func_name}() to match actual parameters.",
827
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
828
+ repair_kind=RepairKind.ADD_PROOF.value,
829
+ executor_action=f"Fix JSDoc drift in {func_name}() at {file_path}:{line_num}",
830
+ ))
831
+ if len(findings) >= 10:
832
+ break
833
+
834
+ return findings