vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,1156 @@
1
+ """Async correctness, debug prints, commented-out code, HTTP response checks.
2
+ Clusters 39-43.
3
+
4
+ Clusters:
5
+ 39 - Broad Catch + Log Without Reraise
6
+ 40 - Debug Prints in Production
7
+ 41 - Commented-Out Code Blocks
8
+ 42 - Missing Await / Unawaited Coroutines
9
+ 43 - API Response Without Status Check
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from .core import detect_language
14
+ from .exception_boundary import _extract_except_body
15
+ from ...gate_models import (
16
+ EvidenceReference,
17
+ GateCategory,
18
+ GateFinding,
19
+ GateImpact,
20
+ GateSeverity,
21
+ RepairKind,
22
+ )
23
+ from ..common import (
24
+ build_finding,
25
+ collect_constant_container_literal_lines,
26
+ collect_main_block_line_ranges,
27
+ has_allowlist_for,
28
+ is_cli_surface_file,
29
+ line_in_ranges,
30
+ )
31
+ from .._ast_helpers import (
32
+ collect_cli_output_func_line_ranges,
33
+ collect_print_call_line_nums,
34
+ collect_string_constant_line_ranges,
35
+ )
36
+ import logging
37
+ import re
38
+ _log = logging.getLogger(__name__)
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Cluster 39: Broad Catch + Log Without Reraise
43
+ # ---------------------------------------------------------------------------
44
+
45
+
46
+ def assess_broad_catch_no_reraise(
47
+ file_path: str,
48
+ content: str,
49
+ ) -> list[GateFinding]:
50
+ """Cluster 39: Detect except Exception/BaseException with log-only (no reraise)."""
51
+ import re
52
+
53
+ if not content.strip():
54
+ return []
55
+
56
+ lang = detect_language(file_path)
57
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
58
+ if basename.startswith("test_") or basename.startswith("conftest"):
59
+ return []
60
+
61
+ findings: list[GateFinding] = []
62
+
63
+ if lang == "python":
64
+ lines = content.splitlines()
65
+ for i, line in enumerate(lines):
66
+ stripped = line.strip()
67
+ if re.match(r'^except\s+(Exception|BaseException)(\s+as\s+\w+)?\s*:', stripped):
68
+ body = _extract_except_body(lines, i)
69
+ body_lines = [l.strip() for l in body.splitlines() if l.strip()]
70
+ if not body_lines:
71
+ continue
72
+ has_raise = any(l.startswith("raise") for l in body_lines)
73
+ has_return = any(l.startswith("return ") for l in body_lines)
74
+ if has_raise or has_return:
75
+ continue
76
+ is_log_only = all(
77
+ l.startswith(("log", "logger", "logging", "print(", "#", "warnings.warn", "traceback")) # noqa: debug_print_scan # gate pattern reference, not a production print call
78
+ for l in body_lines
79
+ )
80
+ if is_log_only:
81
+ exc_m = re.match(r'^except\s+(\w+)', stripped)
82
+ exc_type = exc_m.group(1) if exc_m else "Exception"
83
+ detail = f"`except {exc_type}` logs but doesn't reraise (line {i + 1}) -- error silently consumed"
84
+ findings.append(build_finding(
85
+ check_id="broad_catch_scan",
86
+ category=GateCategory.RUNTIME_BEHAVIOR,
87
+ title=f"[broad_catch_no_reraise] {file_path}:{i + 1}",
88
+ severity=GateSeverity.HIGH,
89
+ impact=GateImpact.REVISE,
90
+ summary=detail,
91
+ recommendation="Add `raise` after logging, or use `logger.exception()` and reraise.",
92
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
93
+ repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
94
+ executor_action=f"Fix broad catch without reraise at {file_path}:{i + 1}",
95
+ ))
96
+ if len(findings) >= 10:
97
+ break
98
+
99
+ elif lang in ("javascript", "typescript"):
100
+ lines = content.splitlines()
101
+ for i, line in enumerate(lines):
102
+ stripped = line.strip()
103
+ if re.match(r'^catch\s*\(', stripped) or stripped == "catch {":
104
+ body_lines = []
105
+ indent = len(line) - len(line.lstrip())
106
+ for j in range(i + 1, min(i + 15, len(lines))):
107
+ bl = lines[j]
108
+ if not bl.strip():
109
+ continue
110
+ bl_indent = len(bl) - len(bl.lstrip())
111
+ if bl_indent <= indent and bl.strip() not in ("}",):
112
+ break
113
+ body_lines.append(bl.strip())
114
+ has_throw = any(l.startswith("throw") for l in body_lines)
115
+ if has_throw:
116
+ continue
117
+ is_log_only = all(
118
+ l.startswith(("console.", "//", "}")) or not l
119
+ for l in body_lines
120
+ )
121
+ if is_log_only and any(l.startswith("console.") for l in body_lines):
122
+ detail = f"catch block logs but doesn't rethrow (line {i + 1})"
123
+ findings.append(build_finding(
124
+ check_id="broad_catch_scan",
125
+ category=GateCategory.RUNTIME_BEHAVIOR,
126
+ title=f"[broad_catch_no_reraise] {file_path}:{i + 1}",
127
+ severity=GateSeverity.HIGH,
128
+ impact=GateImpact.REVISE,
129
+ summary=detail,
130
+ recommendation="Add `throw err` after logging to propagate the error.",
131
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
132
+ repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
133
+ executor_action=f"Fix broad catch without rethrow at {file_path}:{i + 1}",
134
+ ))
135
+ if len(findings) >= 10:
136
+ break
137
+
138
+ elif lang == "java":
139
+ lines = content.splitlines()
140
+ for i, line in enumerate(lines):
141
+ stripped = line.strip()
142
+ if re.match(r'^catch\s*\(\s*(Exception|Throwable|RuntimeException)\s+', stripped):
143
+ body_lines = []
144
+ indent = len(line) - len(line.lstrip())
145
+ for j in range(i + 1, min(i + 15, len(lines))):
146
+ bl = lines[j]
147
+ if not bl.strip():
148
+ continue
149
+ bl_indent = len(bl) - len(bl.lstrip())
150
+ if bl_indent <= indent and bl.strip() not in ("}",):
151
+ break
152
+ body_lines.append(bl.strip())
153
+ has_throw = any(l.startswith("throw") for l in body_lines)
154
+ if has_throw:
155
+ continue
156
+ is_log_only = all(
157
+ l.startswith(("log", "logger", "System.err", "System.out", "e.print", "//", "}")) or not l
158
+ for l in body_lines
159
+ )
160
+ if is_log_only and len(body_lines) > 0:
161
+ detail = f"Broad catch logs but doesn't rethrow (line {i + 1})"
162
+ findings.append(build_finding(
163
+ check_id="broad_catch_scan",
164
+ category=GateCategory.RUNTIME_BEHAVIOR,
165
+ title=f"[broad_catch_no_reraise] {file_path}:{i + 1}",
166
+ severity=GateSeverity.HIGH,
167
+ impact=GateImpact.REVISE,
168
+ summary=detail,
169
+ recommendation="Add `throw` after logging to propagate the exception.",
170
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
171
+ repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
172
+ executor_action=f"Fix broad catch without rethrow at {file_path}:{i + 1}",
173
+ ))
174
+ if len(findings) >= 10:
175
+ break
176
+
177
+ return findings
178
+
179
+
180
+ # ---------------------------------------------------------------------------
181
+ # Cluster 40: Debug Prints in Production
182
+ # ---------------------------------------------------------------------------
183
+
184
+ _DEBUG_PRINT_PATTERNS: dict[str, list[str]] = {
185
+ "python": [r'\bprint\s*\('],
186
+ "javascript": [r'\bconsole\.(log|debug|info|warn|dir|trace|table)\s*\('],
187
+ "typescript": [r'\bconsole\.(log|debug|info|warn|dir|trace|table)\s*\('],
188
+ "go": [r'\bfmt\.(Print|Println|Printf)\s*\('],
189
+ "rust": [r'\b(println|dbg|eprintln)!\s*\('],
190
+ "java": [r'\bSystem\.(out|err)\.(print|println)\s*\('],
191
+ "kotlin": [r'\bprintln\s*\('],
192
+ "ruby": [r'\bputs\s+', r'\bp\s+\w'],
193
+ "php": [r'\b(var_dump|print_r|echo)\s*\('],
194
+ }
195
+
196
+
197
+ def assess_debug_prints(
198
+ file_path: str,
199
+ content: str,
200
+ ) -> list[GateFinding]:
201
+ """Cluster 40: Detect debug print/log statements left in production code."""
202
+ import re
203
+
204
+ if not content.strip():
205
+ return []
206
+
207
+ lang = detect_language(file_path)
208
+ patterns = _DEBUG_PRINT_PATTERNS.get(lang)
209
+ if not patterns:
210
+ return []
211
+
212
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
213
+ if basename.startswith("test_") or basename.startswith("conftest"):
214
+ return []
215
+
216
+ # Test fixture files (e.g. polyglot JS/JSX samples under fixtures/) are
217
+ # never production code; gate should not flag them regardless of language.
218
+ if "fixtures/" in file_path.replace("\\", "/"):
219
+ return []
220
+
221
+ # F14c sub-fix 3: ``print()`` is legitimate user-facing output in CLI
222
+ # surface files (INTERFACE/cli/**, self_audit.py, cli_forensic_audit.py).
223
+ # Skip the entire file for Python CLI surfaces.
224
+ if lang == "python" and is_cli_surface_file(file_path):
225
+ return []
226
+
227
+ # F14c sub-fix 3: also skip ``print()`` calls that live inside a
228
+ # ``if __name__ == "__main__":`` guard or a conventionally-named CLI
229
+ # entrypoint function (``main`` / ``cli_main`` / ``run`` / ``_cli_*``).
230
+ # AST-derived inclusive line ranges.
231
+ main_ranges: list[tuple[int, int]] = []
232
+ # F14c extra: skip interior lines of multi-line string constants
233
+ # (docstrings that *describe* ``print()`` patterns, regex pattern tuples
234
+ # that contain the substring ``print(``).
235
+ string_literal_lines: frozenset[int] = frozenset()
236
+ # F14c sub-fix 1 (applied to debug_prints too): skip string literals
237
+ # inside UPPER_CASE module-level container assignments such as
238
+ # ``_TEXTUAL_STDOUT_SINKS = ("print(", "console.log(", ...)`` so the
239
+ # gate doesn't self-match on its own pattern definitions.
240
+ container_lines: frozenset[int] = frozenset()
241
+ # FP-precision fix (debug_print_scan):
242
+ # * ``print_call_lines`` — 1-based lines of GENUINE ``print(...)`` AST
243
+ # calls. For Python this is the authoritative signal: a ``print(``
244
+ # substring inside a string literal or an attribute call
245
+ # (``obj.print(...)``) is NOT in this set, so it is never flagged.
246
+ # * ``cli_output_ranges`` — body ranges of user-facing output functions
247
+ # (``print_*`` / ``_print_*`` / ``main`` / ``cli`` …) where ``print()``
248
+ # is intentional. Robust to package layout, unlike the hard-coded path
249
+ # allowlist in ``is_cli_surface_file`` (which only knew the pre-migration
250
+ # ``BRAIN/autoforensics/self_audit.py`` path).
251
+ print_call_lines: frozenset[int] = frozenset()
252
+ cli_output_ranges: list[tuple[int, int]] = []
253
+ python_ast_ok = True
254
+ if lang == "python":
255
+ main_ranges = collect_main_block_line_ranges(content)
256
+ string_literal_lines = collect_string_constant_line_ranges(content)
257
+ container_lines = collect_constant_container_literal_lines(content)
258
+ print_call_lines = collect_print_call_line_nums(content)
259
+ cli_output_ranges = collect_cli_output_func_line_ranges(content)
260
+ # If the source does not parse, every AST helper returns empty. We must
261
+ # not silently emit zero findings on a real (broken) file, so detect
262
+ # that case and fall back to a statement-position regex below.
263
+ try:
264
+ import ast as _ast
265
+ _ast.parse(content)
266
+ except (SyntaxError, ValueError):
267
+ python_ast_ok = False
268
+
269
+ def _line_has_noqa(line_text: str, lineno: int) -> bool:
270
+ # Respect per-line suppression: ``# noqa: debug_print_scan`` (handled by
271
+ # has_allowlist_for, incl. same/previous line) and a *bare* ``# noqa``.
272
+ if has_allowlist_for(content, "debug_print_scan", lineno):
273
+ return True
274
+ stripped_comment = line_text.split("#", 1)[1].strip().lower() if "#" in line_text else ""
275
+ # bare ``# noqa`` (no ``: check_id``) suppresses everything on the line.
276
+ return stripped_comment == "noqa" or stripped_comment.startswith("noqa ")
277
+
278
+ findings: list[GateFinding] = []
279
+
280
+ for i, line in enumerate(content.splitlines(), 1):
281
+ stripped = line.strip()
282
+ if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
283
+ continue
284
+ # All languages: respect inline ``# noqa`` suppression.
285
+ if _line_has_noqa(line, i):
286
+ continue
287
+ if lang == "python":
288
+ if '__name__' in stripped and '__main__' in stripped:
289
+ continue
290
+ if 'help=' in stripped or 'parser.add' in stripped:
291
+ continue
292
+ # F14c sub-fix 3: skip lines inside main-guard / CLI-entrypoint
293
+ # AST ranges.
294
+ if main_ranges and line_in_ranges(i, main_ranges):
295
+ continue
296
+ # FP-precision fix: skip prints inside user-facing output funcs
297
+ # (``print_*`` / ``_print_*`` / ``main`` / ``cli`` …).
298
+ if cli_output_ranges and line_in_ranges(i, cli_output_ranges):
299
+ continue
300
+ # F14c extra: skip lines inside multi-line string constants.
301
+ if i in string_literal_lines:
302
+ continue
303
+ # F14c sub-fix 1: skip UPPER_CASE container literal lines.
304
+ if i in container_lines:
305
+ continue
306
+ # FP-precision fix (authoritative): when the file parses, only a
307
+ # line carrying a genuine ``print(...)`` AST call may be flagged.
308
+ # This rejects ``print(`` inside string literals and attribute
309
+ # calls. On a non-parsing file we fall back to requiring the
310
+ # stripped line to START with the call (statement position), so a
311
+ # ``print(`` buried mid-line (e.g. inside a literal) is still not
312
+ # flagged.
313
+ if python_ast_ok:
314
+ if i not in print_call_lines:
315
+ continue
316
+ else:
317
+ if not re.match(r'print\s*\(', stripped):
318
+ continue
319
+
320
+ for pat in patterns:
321
+ if re.search(pat, stripped):
322
+ detail = f"Debug print in production code (line {i}): {stripped[:60]}"
323
+ findings.append(build_finding(
324
+ check_id="debug_print_scan",
325
+ category=GateCategory.DRIFT,
326
+ title=f"[debug_prints] {file_path}:{i}",
327
+ severity=GateSeverity.LOW,
328
+ impact=GateImpact.WARN,
329
+ summary=detail,
330
+ recommendation="Remove debug print or replace with proper logging.",
331
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
332
+ repair_kind=RepairKind.REMOVE_DUPLICATE.value,
333
+ executor_action=f"Remove debug print at {file_path}:{i}",
334
+ ))
335
+ break
336
+ if len(findings) >= 10:
337
+ break
338
+
339
+ return findings
340
+
341
+
342
+ # ---------------------------------------------------------------------------
343
+ # Cluster 41: Commented-Out Code Blocks
344
+ # ---------------------------------------------------------------------------
345
+
346
+
347
+ def _collect_docstring_line_ranges(content: str) -> list[tuple[int, int]]:
348
+ """Return list of (start_line, end_line) ranges covered by module/class/
349
+ function docstrings. AST-based (F2 reuse).
350
+
351
+ 1-based inclusive line numbers.
352
+ """
353
+ import ast
354
+
355
+ try:
356
+ tree = ast.parse(content)
357
+ except SyntaxError:
358
+ return []
359
+
360
+ ranges: list[tuple[int, int]] = []
361
+ for node in ast.walk(tree):
362
+ if not isinstance(
363
+ node,
364
+ (ast.Module, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef),
365
+ ):
366
+ continue
367
+ body = getattr(node, "body", None)
368
+ if not body:
369
+ continue
370
+ first = body[0]
371
+ if (
372
+ isinstance(first, ast.Expr)
373
+ and isinstance(first.value, ast.Constant)
374
+ and isinstance(first.value.value, str)
375
+ ):
376
+ start = first.lineno
377
+ end = getattr(first, "end_lineno", start) or start
378
+ ranges.append((start, end))
379
+ return ranges
380
+
381
+
382
+ # F9d: audit-trail allowlist markers. If a commented-code block is preceded
383
+ # within 3 lines by one of these markers, skip the finding.
384
+ _AUDIT_TRAIL_MARKERS: tuple[str, ...] = (
385
+ "# ALLOWLIST_AUDIT_TRAIL",
386
+ "# AUDIT_TRAIL:",
387
+ )
388
+
389
+ # F9d: commented-code blocks longer than this threshold are likely preserved
390
+ # spec / algorithm documentation and are skipped to avoid false positives.
391
+ _COMMENTED_CODE_LONG_BLOCK_THRESHOLD = 10
392
+
393
+
394
+ # ---------------------------------------------------------------------------
395
+ # F9e: prose-vs-commented-code discrimination.
396
+ #
397
+ # The old decision ("ANY 2 lines whose body matches a permissive code_indicators
398
+ # regex") flagged explanatory PROSE that merely *mentions* a code keyword in an
399
+ # English sentence (e.g. "... a line-only regex cannot tell a swallow from the
400
+ # correct ``except BaseException: <cleanup>; raise`` idiom."). That produced a
401
+ # verified false positive at broad_except_checks.py:21.
402
+ #
403
+ # A de-commented comment block is treated as REAL commented-out code only when:
404
+ # (Python) a contiguous run of >= 2 of its body lines ``ast.parse``-s as valid
405
+ # Python statements (a prose intro line that does not parse is simply
406
+ # trimmed away — the oracle block "legacy impl ...:\n for v in ...:"
407
+ # is still caught via its inner code run), OR
408
+ # (any language, fallback) the block carries >= 2 *distinct strong* code
409
+ # signals: an assignment with an identifier LHS, a def/class/import/
410
+ # func/const/let/var header, a bare ``name(...)`` call statement, or a
411
+ # block-header line (``if ...:``/``for ...:``/``} {``).
412
+ #
413
+ # A single keyword embedded in grammatical English is NOT a strong signal, so
414
+ # prose does not reach the >= 2 bar.
415
+ # ---------------------------------------------------------------------------
416
+
417
+ # Strong, structural code signals (used for every language; the AST path is the
418
+ # primary signal for Python). Each regex anchors at the START of the (stripped)
419
+ # body so a keyword mid-sentence does not match.
420
+ _STRONG_ASSIGN_RE = re.compile(
421
+ r'^[A-Za-z_]\w*(?:\.[A-Za-z_]\w*|\[[^\]]*\])*\s*(?:[-+*/%|&^@]|//|\*\*|>>|<<)?=\s*\S'
422
+ )
423
+ _STRONG_DEFCLASS_RE = re.compile(
424
+ r'^(?:async\s+)?(?:def|class|import|from|func|public|private|protected|const|let|var)\s+\w'
425
+ )
426
+ _STRONG_CALL_RE = re.compile(r'^[A-Za-z_][\w.]*\s*\([^)]*\)\s*;?\s*$')
427
+ _STRONG_BLOCKHEAD_RE = re.compile(
428
+ r'^(?:if|elif|else|for|while|try|except|finally|with|switch|case|do)\b.*[:{]\s*$'
429
+ r'|^\}'
430
+ )
431
+
432
+
433
+ def _largest_parseable_python_run(bodies: list[str], min_run: int = 2) -> int:
434
+ """Return the length of the longest contiguous run of >= ``min_run`` body
435
+ lines that ``ast.parse``-s as valid Python (after dedent), else 0.
436
+
437
+ Trims leading/trailing prose: a real commented-out block preceded by a
438
+ one-line prose intro (which alone breaks parsing) is still recognised via
439
+ its inner code run.
440
+ """
441
+ import ast
442
+ import textwrap
443
+
444
+ n = len(bodies)
445
+ if n < min_run:
446
+ return 0
447
+ best = 0
448
+ for start in range(n):
449
+ # Longest window first for this start; stop at the first that parses.
450
+ for end in range(n, start + min_run - 1, -1):
451
+ if end - start < min_run:
452
+ continue
453
+ text = textwrap.dedent("\n".join(bodies[start:end]))
454
+ if not text.strip():
455
+ continue
456
+ try:
457
+ tree = ast.parse(text)
458
+ except (SyntaxError, ValueError):
459
+ continue
460
+ if tree.body:
461
+ if end - start > best:
462
+ best = end - start
463
+ break
464
+ return best
465
+
466
+
467
+ def _count_strong_code_signals(bodies: list[str]) -> int:
468
+ """Count DISTINCT strong structural code-signal kinds across the block.
469
+
470
+ Distinct kinds (not raw line count) so a single repeated construct does not
471
+ by itself clear the bar; >= 2 different kinds is strong evidence of code.
472
+ """
473
+ kinds: set[str] = set()
474
+ for raw in bodies:
475
+ s = raw.strip()
476
+ if not s:
477
+ continue
478
+ if _STRONG_DEFCLASS_RE.search(s):
479
+ kinds.add("defclass")
480
+ if _STRONG_ASSIGN_RE.search(s):
481
+ kinds.add("assign")
482
+ if _STRONG_CALL_RE.search(s):
483
+ kinds.add("call")
484
+ if _STRONG_BLOCKHEAD_RE.search(s):
485
+ kinds.add("blockhead")
486
+ return len(kinds)
487
+
488
+
489
+ def _commented_block_is_code(bodies: list[str], lang: str) -> bool:
490
+ """True when a de-commented comment block is REAL commented-out code rather
491
+ than explanatory prose. See the F9e note above for the discrimination rule.
492
+ """
493
+ if lang == "python":
494
+ if _largest_parseable_python_run(bodies, min_run=2) >= 2:
495
+ return True
496
+ # Language-agnostic fallback (and a backstop for Python blocks that no longer
497
+ # parse standalone — e.g. a dangling continuation): >= 2 distinct strong
498
+ # structural signals.
499
+ return _count_strong_code_signals(bodies) >= 2
500
+
501
+
502
+ def assess_commented_code(
503
+ file_path: str,
504
+ content: str,
505
+ ) -> list[GateFinding]:
506
+ """Cluster 41: Detect blocks of commented-out code (3+ consecutive lines).
507
+
508
+ F9d: skip blocks that (a) are preceded by a `# ALLOWLIST_AUDIT_TRAIL` /
509
+ `# AUDIT_TRAIL:` marker within 3 lines, (b) are located inside a docstring
510
+ (AST-based), or (c) are longer than 10 consecutive commented lines
511
+ (preserved spec text).
512
+ """
513
+ import re
514
+
515
+ if not content.strip():
516
+ return []
517
+
518
+ lang = detect_language(file_path)
519
+ if lang in ("json", "yaml", "toml", "markdown", "restructuredtext"):
520
+ return []
521
+
522
+ if lang == "python":
523
+ comment_re = re.compile(r'^\s*#\s?(.*)')
524
+ elif lang in ("shell", "ruby", "php"):
525
+ comment_re = re.compile(r'^\s*#\s?(.*)')
526
+ else:
527
+ comment_re = re.compile(r'^\s*//\s?(.*)')
528
+
529
+ code_indicators = re.compile(
530
+ r'(?:'
531
+ r'\w+\s*=\s*\w'
532
+ r'|def\s+\w+\s*\('
533
+ r'|class\s+\w+'
534
+ r'|function\s+\w+'
535
+ r'|return\s+\w'
536
+ r'|if\s+\w.*:'
537
+ r'|if\s*\(.*\)\s*\{'
538
+ r'|for\s+\w'
539
+ r'|while\s+\w'
540
+ r'|import\s+\w'
541
+ r'|from\s+\w+\s+import'
542
+ r'|\w+\.\w+\s*\('
543
+ r'|raise\s+\w'
544
+ r'|throw\s+\w'
545
+ r'|except\s+\w'
546
+ r'|catch\s*\('
547
+ r'|try\s*[:{]'
548
+ r')'
549
+ )
550
+
551
+ lines = content.splitlines()
552
+ # F9d: AST docstring ranges (only meaningful for Python).
553
+ docstring_ranges: list[tuple[int, int]] = []
554
+ if lang == "python":
555
+ docstring_ranges = _collect_docstring_line_ranges(content)
556
+
557
+ def _line_in_docstring(lineno_1based: int) -> bool:
558
+ return any(s <= lineno_1based <= e for s, e in docstring_ranges)
559
+
560
+ def _has_audit_trail_marker_above(block_start_idx: int, block_end_idx: int) -> bool:
561
+ # block_start_idx is 0-based. Check up to 3 preceding non-blank lines
562
+ # AND the first 3 lines of the block itself (since the marker is
563
+ # typically placed as the first comment of a preserved block).
564
+ def _line_has_marker(line_text: str) -> bool:
565
+ stripped = line_text.strip()
566
+ if not stripped:
567
+ return False
568
+ for marker in _AUDIT_TRAIL_MARKERS:
569
+ if marker in stripped:
570
+ return True
571
+ return False
572
+
573
+ # Check block's own first 3 lines.
574
+ for idx in range(block_start_idx, min(block_start_idx + 3, block_end_idx)):
575
+ if _line_has_marker(lines[idx]):
576
+ return True
577
+
578
+ # Check up to 3 preceding non-blank lines.
579
+ inspected = 0
580
+ j = block_start_idx - 1
581
+ while j >= 0 and inspected < 3:
582
+ ln = lines[j].strip()
583
+ if not ln:
584
+ j -= 1
585
+ continue
586
+ inspected += 1
587
+ if _line_has_marker(lines[j]):
588
+ return True
589
+ j -= 1
590
+ return False
591
+
592
+ findings: list[GateFinding] = []
593
+ i = 0
594
+
595
+ while i < len(lines):
596
+ m = comment_re.match(lines[i])
597
+ if m:
598
+ block_start = i
599
+ code_lines = 0
600
+ block_bodies: list[str] = []
601
+ j = i
602
+ while j < len(lines):
603
+ cm = comment_re.match(lines[j])
604
+ if not cm:
605
+ break
606
+ body = cm.group(1)
607
+ block_bodies.append(body)
608
+ if code_indicators.search(body):
609
+ code_lines += 1
610
+ j += 1
611
+ block_len = j - block_start
612
+
613
+ # F9e: the permissive code_indicators count is a cheap PRE-FILTER
614
+ # only. A block is reported as commented-out code solely when the
615
+ # prose-vs-code discriminator (parseable Python run OR >= 2 distinct
616
+ # strong structural signals) confirms it — this rejects explanatory
617
+ # prose that merely mentions a code keyword in a sentence.
618
+ if (
619
+ block_len >= 3
620
+ and code_lines >= 2
621
+ and _commented_block_is_code(block_bodies, lang)
622
+ ):
623
+ if block_start < 4:
624
+ i = j
625
+ continue
626
+ # F9d: audit-trail marker allowlist
627
+ if _has_audit_trail_marker_above(block_start, j):
628
+ i = j
629
+ continue
630
+ # F9d: docstring skip (block fully inside a docstring range)
631
+ if _line_in_docstring(block_start + 1):
632
+ i = j
633
+ continue
634
+ # F9d: long-block skip (likely preserved algorithm doc)
635
+ if block_len > _COMMENTED_CODE_LONG_BLOCK_THRESHOLD:
636
+ i = j
637
+ continue
638
+ detail = f"Block of {block_len} commented-out code lines starting at line {block_start + 1}"
639
+ findings.append(build_finding(
640
+ check_id="commented_code_scan",
641
+ category=GateCategory.DRIFT,
642
+ title=f"[commented_code] {file_path}:{block_start + 1}",
643
+ severity=GateSeverity.LOW,
644
+ impact=GateImpact.WARN,
645
+ summary=detail,
646
+ recommendation="Remove commented-out code; use version control to recover old code if needed.",
647
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
648
+ repair_kind=RepairKind.REMOVE_DUPLICATE.value,
649
+ executor_action=f"Remove commented-out code block at {file_path}:{block_start + 1}",
650
+ ))
651
+ i = j
652
+ else:
653
+ i += 1
654
+ if len(findings) >= 10:
655
+ break
656
+
657
+ return findings
658
+
659
+
660
+ # ---------------------------------------------------------------------------
661
+ # Cluster 42: Missing Await / Unawaited Coroutines
662
+ # ---------------------------------------------------------------------------
663
+
664
+
665
+ def _build_missing_await_findings_ast(
666
+ file_path: str,
667
+ content: str,
668
+ ) -> list[GateFinding]:
669
+ """AST-based missing-await detection for Python.
670
+
671
+ Algorithm:
672
+ 1. Parse module into AST.
673
+ 2. Collect all names defined with `async def` in the module (not just
674
+ reachable — a name defined both as sync and async triggers a
675
+ name-collision skip for that name).
676
+ 3. Pre-pass: build an "async-reachable" set of sync function names that
677
+ are clearly invoked under an async runtime (`asyncio.run(...)`,
678
+ `asyncio.gather(...)`, `asyncio.ensure_future(...)`,
679
+ `asyncio.create_task(...)`, `loop.run_until_complete(...)`) or
680
+ decorated with `@pytest.mark.asyncio`, `@asyncio.coroutine`,
681
+ `@async_timeout`. These sync defs behave like async contexts — closure
682
+ depth 1 only (no transitive resolution to avoid FP).
683
+ 4. Walk every ast.Call node; for each call whose callee name matches an
684
+ async def:
685
+ a. Walk up the parent chain to find the nearest enclosing function.
686
+ b. If enclosing function is ast.AsyncFunctionDef: require ast.Await
687
+ wrapper. Missing wrapper → real finding.
688
+ c. If enclosing function is ast.FunctionDef AND its name is in the
689
+ async-reachable set: treat like async context → emit finding.
690
+ d. If enclosing function is ast.FunctionDef (sync) and NOT reachable:
691
+ conservative skip (assume legitimate sync wrapper: thread executor
692
+ / deliberate fire-and-forget / pure sync call that happens to
693
+ share a name).
694
+ e. If no enclosing function (module-level call): also skip (likely
695
+ asyncio.run(main()) at script entry point).
696
+ 5. Pointless-async detection: async def that never contains await /
697
+ async for / async with in its body. Stubs (pass / ... /
698
+ raise NotImplementedError) are exempt.
699
+
700
+ Skip heuristics applied:
701
+ - Names with both `def X` and `async def X` → name collision, skip.
702
+ - Inside TYPE_CHECKING blocks.
703
+ - Inside @pytest.mark.asyncio decorated functions (treated like async
704
+ context for flag purposes, but those are already AsyncFunctionDef).
705
+ """
706
+ import ast
707
+
708
+ try:
709
+ tree = ast.parse(content)
710
+ except SyntaxError:
711
+ return []
712
+
713
+ findings: list[GateFinding] = []
714
+
715
+ # ------------------------------------------------------------------
716
+ # Step 1: collect async def names and detect name collisions
717
+ # ------------------------------------------------------------------
718
+ async_names: set[str] = set()
719
+ sync_names: set[str] = set()
720
+
721
+ for node in ast.walk(tree):
722
+ if isinstance(node, ast.AsyncFunctionDef):
723
+ async_names.add(node.name)
724
+ elif isinstance(node, ast.FunctionDef):
725
+ sync_names.add(node.name)
726
+
727
+ # Names that exist as BOTH sync and async — ambiguous, skip entirely
728
+ collision_names = async_names & sync_names
729
+
730
+ # ------------------------------------------------------------------
731
+ # Step 1b: async-reachability pre-pass
732
+ # - Sync `def` targets of asyncio.run / gather / ensure_future /
733
+ # create_task / loop.run_until_complete are treated as async context.
734
+ # - Sync defs decorated with @pytest.mark.asyncio, @asyncio.coroutine,
735
+ # @async_timeout also qualify.
736
+ # - Closure depth 1 only (no transitive resolution).
737
+ # ------------------------------------------------------------------
738
+ _ASYNC_RUNNER_ATTRS = {
739
+ "run",
740
+ "gather",
741
+ "ensure_future",
742
+ "create_task",
743
+ "run_until_complete",
744
+ "run_coroutine_threadsafe",
745
+ }
746
+ _ASYNC_RUNNER_BARE = {"gather", "ensure_future", "create_task"}
747
+ _ASYNC_DECO_NAMES = {"asyncio.coroutine", "async_timeout"}
748
+ _ASYNC_DECO_PYTEST_ATTR = "asyncio" # for @pytest.mark.asyncio
749
+
750
+ def _arg_callee_name(arg: ast.AST) -> str | None:
751
+ """Extract simple name from a call-arg that is either Name or Call(Name)."""
752
+ if isinstance(arg, ast.Call):
753
+ func = arg.func
754
+ if isinstance(func, ast.Name):
755
+ return func.id
756
+ if isinstance(func, ast.Attribute):
757
+ return func.attr
758
+ return None
759
+ if isinstance(arg, ast.Name):
760
+ return arg.id
761
+ if isinstance(arg, ast.Attribute):
762
+ return arg.attr
763
+ return None
764
+
765
+ def _is_async_runner_call(call: ast.Call) -> bool:
766
+ func = call.func
767
+ if isinstance(func, ast.Attribute) and func.attr in _ASYNC_RUNNER_ATTRS:
768
+ return True
769
+ if isinstance(func, ast.Name) and func.id in _ASYNC_RUNNER_BARE:
770
+ # bare `gather(x())` / `create_task(x())` after `from asyncio import ...`
771
+ return True
772
+ return False
773
+
774
+ def _decorator_marks_async(dec: ast.AST) -> bool:
775
+ # @asyncio.coroutine / @async_timeout / @pytest.mark.asyncio
776
+ if isinstance(dec, ast.Call):
777
+ dec = dec.func
778
+ if isinstance(dec, ast.Name):
779
+ return dec.id == "async_timeout"
780
+ if isinstance(dec, ast.Attribute):
781
+ # @asyncio.coroutine
782
+ if isinstance(dec.value, ast.Name) and dec.value.id == "asyncio" and dec.attr == "coroutine":
783
+ return True
784
+ # @async_timeout.timeout — also async scope
785
+ if isinstance(dec.value, ast.Name) and dec.value.id == "async_timeout":
786
+ return True
787
+ # @pytest.mark.asyncio (Attribute: value=Attribute(pytest, mark), attr=asyncio)
788
+ if dec.attr == _ASYNC_DECO_PYTEST_ATTR:
789
+ inner = dec.value
790
+ if isinstance(inner, ast.Attribute) and inner.attr == "mark":
791
+ if isinstance(inner.value, ast.Name) and inner.value.id == "pytest":
792
+ return True
793
+ return False
794
+
795
+ async_reachable_syncs: set[str] = set()
796
+
797
+ # Decorator-driven reachability
798
+ for node in ast.walk(tree):
799
+ if isinstance(node, ast.FunctionDef):
800
+ for dec in node.decorator_list:
801
+ if _decorator_marks_async(dec):
802
+ async_reachable_syncs.add(node.name)
803
+ break
804
+
805
+ # Runner-argument-driven reachability
806
+ for node in ast.walk(tree):
807
+ if not isinstance(node, ast.Call):
808
+ continue
809
+ if not _is_async_runner_call(node):
810
+ continue
811
+ for arg in list(node.args) + [kw.value for kw in node.keywords]:
812
+ name = _arg_callee_name(arg)
813
+ if name is None:
814
+ continue
815
+ # Only promote to "reachable" if this name is defined as a sync def
816
+ # in THIS module (closure depth 1). Async defs need no promotion.
817
+ if name in sync_names and name not in async_names:
818
+ async_reachable_syncs.add(name)
819
+
820
+ if not async_names:
821
+ return []
822
+
823
+ # ------------------------------------------------------------------
824
+ # Step 2: detect TYPE_CHECKING blocks to exclude their contents
825
+ # ------------------------------------------------------------------
826
+ # Collect line ranges that are inside `if TYPE_CHECKING:` guards.
827
+ type_checking_ranges: list[tuple[int, int]] = []
828
+ for node in ast.walk(tree):
829
+ if isinstance(node, ast.If):
830
+ test = node.test
831
+ is_tc = (
832
+ (isinstance(test, ast.Name) and test.id == "TYPE_CHECKING")
833
+ or (isinstance(test, ast.Attribute) and test.attr == "TYPE_CHECKING")
834
+ )
835
+ if is_tc and hasattr(node, "lineno") and hasattr(node, "end_lineno"):
836
+ type_checking_ranges.append((node.lineno, node.end_lineno or node.lineno))
837
+
838
+ def _in_type_checking(lineno: int) -> bool:
839
+ return any(start <= lineno <= end for start, end in type_checking_ranges)
840
+
841
+ # ------------------------------------------------------------------
842
+ # Step 3: build parent map for ancestor walking
843
+ # ------------------------------------------------------------------
844
+ parent_map: dict[int, ast.AST] = {}
845
+ for node in ast.walk(tree):
846
+ for child in ast.iter_child_nodes(node):
847
+ parent_map[id(child)] = node
848
+
849
+ def _get_enclosing_func(node: ast.AST) -> ast.AsyncFunctionDef | ast.FunctionDef | None:
850
+ """Walk parent chain, return nearest enclosing function def or None."""
851
+ current = parent_map.get(id(node))
852
+ while current is not None:
853
+ if isinstance(current, (ast.AsyncFunctionDef, ast.FunctionDef)):
854
+ return current
855
+ current = parent_map.get(id(current))
856
+ return None
857
+
858
+ def _is_directly_awaited(call_node: ast.Call) -> bool:
859
+ """Return True if the Call node is the direct expression of an Await."""
860
+ parent = parent_map.get(id(call_node))
861
+ return isinstance(parent, ast.Await)
862
+
863
+ def _is_asyncio_run_call(call_node: ast.Call) -> bool:
864
+ """Return True if this call is the argument to asyncio.run() or
865
+ loop.run_until_complete() in the same statement."""
866
+ parent = parent_map.get(id(call_node))
867
+ if not isinstance(parent, ast.Call):
868
+ return False
869
+ func = parent.func
870
+ if isinstance(func, ast.Attribute):
871
+ if func.attr in ("run", "run_until_complete", "run_coroutine_threadsafe"):
872
+ return True
873
+ if isinstance(func, ast.Name) and func.id == "run":
874
+ return True
875
+ return False
876
+
877
+ def _callee_name(call_node: ast.Call) -> str | None:
878
+ """Extract simple name from a Call node's func field."""
879
+ func = call_node.func
880
+ if isinstance(func, ast.Name):
881
+ return func.id
882
+ if isinstance(func, ast.Attribute):
883
+ return func.attr
884
+ return None
885
+
886
+ # ------------------------------------------------------------------
887
+ # Step 4: walk all Call nodes; flag un-awaited calls to async funcs
888
+ # inside async context
889
+ # ------------------------------------------------------------------
890
+ for node in ast.walk(tree):
891
+ if not isinstance(node, ast.Call):
892
+ continue
893
+ if not hasattr(node, "lineno"):
894
+ continue
895
+ if _in_type_checking(node.lineno):
896
+ continue
897
+
898
+ name = _callee_name(node)
899
+ if name is None or name not in async_names or name in collision_names:
900
+ continue
901
+
902
+ # Skip if already awaited
903
+ if _is_directly_awaited(node):
904
+ continue
905
+
906
+ # Skip if passed into asyncio.run() / run_until_complete() etc.
907
+ if _is_asyncio_run_call(node):
908
+ continue
909
+
910
+ enclosing = _get_enclosing_func(node)
911
+
912
+ if enclosing is None:
913
+ # Module-level call — conservative skip (likely asyncio.run(main()))
914
+ continue
915
+
916
+ if isinstance(enclosing, ast.FunctionDef):
917
+ # Sync enclosing function. Only treat as async context if this
918
+ # sync def is in the async-reachable set (closure depth 1:
919
+ # invoked under asyncio.run/gather/ensure_future/create_task/
920
+ # run_until_complete OR decorated with @pytest.mark.asyncio,
921
+ # @asyncio.coroutine, @async_timeout). Otherwise keep
922
+ # conservative skip.
923
+ if enclosing.name not in async_reachable_syncs:
924
+ continue
925
+ # Fall through → emit finding (sync def runs in async context).
926
+
927
+ # enclosing is AsyncFunctionDef (or async-reachable sync) and call is
928
+ # NOT awaited → real bug
929
+ lineno = node.lineno
930
+ if len(findings) >= 10:
931
+ break
932
+ detail = f"Async function '{name}()' called without `await` (line {lineno})"
933
+ findings.append(build_finding(
934
+ check_id="missing_await_scan",
935
+ category=GateCategory.RUNTIME_BEHAVIOR,
936
+ title=f"[missing_await] {file_path}:{lineno}",
937
+ severity=GateSeverity.HIGH,
938
+ impact=GateImpact.REVISE,
939
+ summary=detail,
940
+ recommendation=f"Add `await` before calling `{name}()` inside an async context.",
941
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
942
+ repair_kind=RepairKind.FIX_CONTRACT.value,
943
+ executor_action=f"Add missing await at {file_path}:{lineno}",
944
+ ))
945
+
946
+ # ------------------------------------------------------------------
947
+ # Step 5: pointless-async detection (unchanged logic, regex-free)
948
+ # ------------------------------------------------------------------
949
+ for node in ast.walk(tree):
950
+ if not isinstance(node, ast.AsyncFunctionDef):
951
+ continue
952
+ # Check body for any await / async for / async with
953
+ has_await = False
954
+ for child in ast.walk(node):
955
+ if child is node:
956
+ continue
957
+ if isinstance(child, (ast.Await, ast.AsyncFor, ast.AsyncWith)):
958
+ has_await = True
959
+ break
960
+ if has_await:
961
+ continue
962
+ # Exempt stubs
963
+ body_nodes = node.body
964
+ if len(body_nodes) == 1:
965
+ stmt = body_nodes[0]
966
+ if isinstance(stmt, ast.Pass):
967
+ continue
968
+ if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant):
969
+ if stmt.value.value is ...:
970
+ continue
971
+ if isinstance(stmt, ast.Raise):
972
+ continue
973
+ lineno = node.lineno
974
+ func_name = node.name
975
+ detail = f"async def {func_name}() never uses await -- pointless async"
976
+ findings.append(build_finding(
977
+ check_id="missing_await_scan",
978
+ category=GateCategory.RUNTIME_BEHAVIOR,
979
+ title=f"[missing_await] {file_path}:{lineno}:{func_name}",
980
+ severity=GateSeverity.LOW,
981
+ impact=GateImpact.WARN,
982
+ summary=detail,
983
+ recommendation=f"Remove `async` from `{func_name}()` if it doesn't need to be async.",
984
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
985
+ repair_kind=RepairKind.FIX_CONTRACT.value,
986
+ executor_action=f"Remove pointless async from {func_name}() at {file_path}:{lineno}",
987
+ ))
988
+
989
+ return findings
990
+
991
+
992
+ def assess_missing_await(
993
+ file_path: str,
994
+ content: str,
995
+ ) -> list[GateFinding]:
996
+ """Cluster 42: Detect async calls without await and pointless async functions."""
997
+ import re
998
+
999
+ if not content.strip():
1000
+ return []
1001
+
1002
+ lang = detect_language(file_path)
1003
+ if lang not in ("python", "javascript", "typescript"):
1004
+ return []
1005
+
1006
+ findings: list[GateFinding] = []
1007
+
1008
+ if lang == "python":
1009
+ findings = _build_missing_await_findings_ast(file_path, content)
1010
+
1011
+ elif lang in ("javascript", "typescript"):
1012
+ for i, line in enumerate(content.splitlines(), 1):
1013
+ stripped = line.strip()
1014
+ if stripped.startswith("//"):
1015
+ continue
1016
+ if re.search(r'\bfetch\s*\(', stripped) and "await" not in stripped:
1017
+ if ".then(" not in stripped:
1018
+ detail = f"fetch() called without `await` or `.then()` (line {i})"
1019
+ findings.append(build_finding(
1020
+ check_id="missing_await_scan",
1021
+ category=GateCategory.RUNTIME_BEHAVIOR,
1022
+ title=f"[missing_await] {file_path}:{i}",
1023
+ severity=GateSeverity.HIGH,
1024
+ impact=GateImpact.REVISE,
1025
+ summary=detail,
1026
+ recommendation="Add `await` before `fetch()` or chain `.then()` to handle the promise.",
1027
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
1028
+ repair_kind=RepairKind.FIX_CONTRACT.value,
1029
+ executor_action=f"Add missing await/then at {file_path}:{i}",
1030
+ ))
1031
+ if len(findings) >= 10:
1032
+ break
1033
+
1034
+ return findings
1035
+
1036
+
1037
+ # ---------------------------------------------------------------------------
1038
+ # Cluster 43: API Response Without Status Check
1039
+ # ---------------------------------------------------------------------------
1040
+
1041
+
1042
+ def assess_unchecked_response(
1043
+ file_path: str,
1044
+ content: str,
1045
+ ) -> list[GateFinding]:
1046
+ """Cluster 43: Detect HTTP responses used without checking status."""
1047
+ import re
1048
+
1049
+ if not content.strip():
1050
+ return []
1051
+
1052
+ lang = detect_language(file_path)
1053
+ if lang not in ("python", "javascript", "typescript"):
1054
+ return []
1055
+
1056
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
1057
+ if basename.startswith("test_") or basename.startswith("conftest"):
1058
+ return []
1059
+
1060
+ findings: list[GateFinding] = []
1061
+ lines = content.splitlines()
1062
+
1063
+ if lang == "python":
1064
+ for i, line in enumerate(lines, 1):
1065
+ stripped = line.strip()
1066
+ if stripped.startswith("#"):
1067
+ continue
1068
+ m = re.search(r'(\w+)\s*=\s*requests\.(get|post|put|delete|patch)\s*\(', stripped)
1069
+ if m:
1070
+ var_name = m.group(1)
1071
+ has_check = False
1072
+ for j in range(i, min(i + 10, len(lines))):
1073
+ check_line = lines[j]
1074
+ if f"{var_name}.raise_for_status()" in check_line:
1075
+ has_check = True
1076
+ break
1077
+ if f"{var_name}.status_code" in check_line:
1078
+ has_check = True
1079
+ break
1080
+ if f"{var_name}.ok" in check_line:
1081
+ has_check = True
1082
+ break
1083
+ if not has_check:
1084
+ detail = f"requests.{m.group(2)}() without status check (line {i}) -- use .raise_for_status()"
1085
+ findings.append(build_finding(
1086
+ check_id="response_status_scan",
1087
+ category=GateCategory.RUNTIME_BEHAVIOR,
1088
+ title=f"[unchecked_response] {file_path}:{i}",
1089
+ severity=GateSeverity.MEDIUM,
1090
+ impact=GateImpact.REVISE,
1091
+ summary=detail,
1092
+ recommendation="Call `.raise_for_status()` or check `.status_code` before using the response.",
1093
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
1094
+ repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
1095
+ executor_action=f"Add status check at {file_path}:{i}",
1096
+ ))
1097
+ if re.search(r'(\w+)\s*=\s*(?:httpx\.\w+|urllib\.request\.urlopen)\s*\(', stripped):
1098
+ var_m = re.match(r'\s*(\w+)\s*=', stripped)
1099
+ if var_m:
1100
+ var_name = var_m.group(1)
1101
+ has_check = any(
1102
+ f"{var_name}.status" in lines[j] or f"{var_name}.raise_for_status" in lines[j]
1103
+ for j in range(i, min(i + 10, len(lines)))
1104
+ )
1105
+ if not has_check:
1106
+ detail = f"HTTP response without status check (line {i})"
1107
+ findings.append(build_finding(
1108
+ check_id="response_status_scan",
1109
+ category=GateCategory.RUNTIME_BEHAVIOR,
1110
+ title=f"[unchecked_response] {file_path}:{i}",
1111
+ severity=GateSeverity.MEDIUM,
1112
+ impact=GateImpact.REVISE,
1113
+ summary=detail,
1114
+ recommendation="Check the response status before processing.",
1115
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
1116
+ repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
1117
+ executor_action=f"Add status check at {file_path}:{i}",
1118
+ ))
1119
+ if len(findings) >= 10:
1120
+ break
1121
+
1122
+ elif lang in ("javascript", "typescript"):
1123
+ for i, line in enumerate(lines, 1):
1124
+ stripped = line.strip()
1125
+ if stripped.startswith("//"):
1126
+ continue
1127
+ m = re.search(r'(\w+)\s*=\s*await\s+fetch\s*\(', stripped)
1128
+ if m:
1129
+ var_name = m.group(1)
1130
+ has_check = False
1131
+ for j in range(i, min(i + 10, len(lines))):
1132
+ cl = lines[j]
1133
+ if f"{var_name}.ok" in cl or f"{var_name}.status" in cl:
1134
+ has_check = True
1135
+ break
1136
+ if f"!{var_name}.ok" in cl or f"{var_name}.status !==" in cl:
1137
+ has_check = True
1138
+ break
1139
+ if not has_check:
1140
+ detail = f"fetch() result used without .ok/.status check (line {i})"
1141
+ findings.append(build_finding(
1142
+ check_id="response_status_scan",
1143
+ category=GateCategory.RUNTIME_BEHAVIOR,
1144
+ title=f"[unchecked_response] {file_path}:{i}",
1145
+ severity=GateSeverity.MEDIUM,
1146
+ impact=GateImpact.REVISE,
1147
+ summary=detail,
1148
+ recommendation="Check `response.ok` or `response.status` before processing the response.",
1149
+ evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
1150
+ repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
1151
+ executor_action=f"Add status check at {file_path}:{i}",
1152
+ ))
1153
+ if len(findings) >= 10:
1154
+ break
1155
+
1156
+ return findings