vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,1156 @@
|
|
|
1
|
+
"""Async correctness, debug prints, commented-out code, HTTP response checks.
|
|
2
|
+
Clusters 39-43.
|
|
3
|
+
|
|
4
|
+
Clusters:
|
|
5
|
+
39 - Broad Catch + Log Without Reraise
|
|
6
|
+
40 - Debug Prints in Production
|
|
7
|
+
41 - Commented-Out Code Blocks
|
|
8
|
+
42 - Missing Await / Unawaited Coroutines
|
|
9
|
+
43 - API Response Without Status Check
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from .core import detect_language
|
|
14
|
+
from .exception_boundary import _extract_except_body
|
|
15
|
+
from ...gate_models import (
|
|
16
|
+
EvidenceReference,
|
|
17
|
+
GateCategory,
|
|
18
|
+
GateFinding,
|
|
19
|
+
GateImpact,
|
|
20
|
+
GateSeverity,
|
|
21
|
+
RepairKind,
|
|
22
|
+
)
|
|
23
|
+
from ..common import (
|
|
24
|
+
build_finding,
|
|
25
|
+
collect_constant_container_literal_lines,
|
|
26
|
+
collect_main_block_line_ranges,
|
|
27
|
+
has_allowlist_for,
|
|
28
|
+
is_cli_surface_file,
|
|
29
|
+
line_in_ranges,
|
|
30
|
+
)
|
|
31
|
+
from .._ast_helpers import (
|
|
32
|
+
collect_cli_output_func_line_ranges,
|
|
33
|
+
collect_print_call_line_nums,
|
|
34
|
+
collect_string_constant_line_ranges,
|
|
35
|
+
)
|
|
36
|
+
import logging
|
|
37
|
+
import re
|
|
38
|
+
_log = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Cluster 39: Broad Catch + Log Without Reraise
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def assess_broad_catch_no_reraise(
|
|
47
|
+
file_path: str,
|
|
48
|
+
content: str,
|
|
49
|
+
) -> list[GateFinding]:
|
|
50
|
+
"""Cluster 39: Detect except Exception/BaseException with log-only (no reraise)."""
|
|
51
|
+
import re
|
|
52
|
+
|
|
53
|
+
if not content.strip():
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
lang = detect_language(file_path)
|
|
57
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
58
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
findings: list[GateFinding] = []
|
|
62
|
+
|
|
63
|
+
if lang == "python":
|
|
64
|
+
lines = content.splitlines()
|
|
65
|
+
for i, line in enumerate(lines):
|
|
66
|
+
stripped = line.strip()
|
|
67
|
+
if re.match(r'^except\s+(Exception|BaseException)(\s+as\s+\w+)?\s*:', stripped):
|
|
68
|
+
body = _extract_except_body(lines, i)
|
|
69
|
+
body_lines = [l.strip() for l in body.splitlines() if l.strip()]
|
|
70
|
+
if not body_lines:
|
|
71
|
+
continue
|
|
72
|
+
has_raise = any(l.startswith("raise") for l in body_lines)
|
|
73
|
+
has_return = any(l.startswith("return ") for l in body_lines)
|
|
74
|
+
if has_raise or has_return:
|
|
75
|
+
continue
|
|
76
|
+
is_log_only = all(
|
|
77
|
+
l.startswith(("log", "logger", "logging", "print(", "#", "warnings.warn", "traceback")) # noqa: debug_print_scan # gate pattern reference, not a production print call
|
|
78
|
+
for l in body_lines
|
|
79
|
+
)
|
|
80
|
+
if is_log_only:
|
|
81
|
+
exc_m = re.match(r'^except\s+(\w+)', stripped)
|
|
82
|
+
exc_type = exc_m.group(1) if exc_m else "Exception"
|
|
83
|
+
detail = f"`except {exc_type}` logs but doesn't reraise (line {i + 1}) -- error silently consumed"
|
|
84
|
+
findings.append(build_finding(
|
|
85
|
+
check_id="broad_catch_scan",
|
|
86
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
87
|
+
title=f"[broad_catch_no_reraise] {file_path}:{i + 1}",
|
|
88
|
+
severity=GateSeverity.HIGH,
|
|
89
|
+
impact=GateImpact.REVISE,
|
|
90
|
+
summary=detail,
|
|
91
|
+
recommendation="Add `raise` after logging, or use `logger.exception()` and reraise.",
|
|
92
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
93
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
94
|
+
executor_action=f"Fix broad catch without reraise at {file_path}:{i + 1}",
|
|
95
|
+
))
|
|
96
|
+
if len(findings) >= 10:
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
elif lang in ("javascript", "typescript"):
|
|
100
|
+
lines = content.splitlines()
|
|
101
|
+
for i, line in enumerate(lines):
|
|
102
|
+
stripped = line.strip()
|
|
103
|
+
if re.match(r'^catch\s*\(', stripped) or stripped == "catch {":
|
|
104
|
+
body_lines = []
|
|
105
|
+
indent = len(line) - len(line.lstrip())
|
|
106
|
+
for j in range(i + 1, min(i + 15, len(lines))):
|
|
107
|
+
bl = lines[j]
|
|
108
|
+
if not bl.strip():
|
|
109
|
+
continue
|
|
110
|
+
bl_indent = len(bl) - len(bl.lstrip())
|
|
111
|
+
if bl_indent <= indent and bl.strip() not in ("}",):
|
|
112
|
+
break
|
|
113
|
+
body_lines.append(bl.strip())
|
|
114
|
+
has_throw = any(l.startswith("throw") for l in body_lines)
|
|
115
|
+
if has_throw:
|
|
116
|
+
continue
|
|
117
|
+
is_log_only = all(
|
|
118
|
+
l.startswith(("console.", "//", "}")) or not l
|
|
119
|
+
for l in body_lines
|
|
120
|
+
)
|
|
121
|
+
if is_log_only and any(l.startswith("console.") for l in body_lines):
|
|
122
|
+
detail = f"catch block logs but doesn't rethrow (line {i + 1})"
|
|
123
|
+
findings.append(build_finding(
|
|
124
|
+
check_id="broad_catch_scan",
|
|
125
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
126
|
+
title=f"[broad_catch_no_reraise] {file_path}:{i + 1}",
|
|
127
|
+
severity=GateSeverity.HIGH,
|
|
128
|
+
impact=GateImpact.REVISE,
|
|
129
|
+
summary=detail,
|
|
130
|
+
recommendation="Add `throw err` after logging to propagate the error.",
|
|
131
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
132
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
133
|
+
executor_action=f"Fix broad catch without rethrow at {file_path}:{i + 1}",
|
|
134
|
+
))
|
|
135
|
+
if len(findings) >= 10:
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
elif lang == "java":
|
|
139
|
+
lines = content.splitlines()
|
|
140
|
+
for i, line in enumerate(lines):
|
|
141
|
+
stripped = line.strip()
|
|
142
|
+
if re.match(r'^catch\s*\(\s*(Exception|Throwable|RuntimeException)\s+', stripped):
|
|
143
|
+
body_lines = []
|
|
144
|
+
indent = len(line) - len(line.lstrip())
|
|
145
|
+
for j in range(i + 1, min(i + 15, len(lines))):
|
|
146
|
+
bl = lines[j]
|
|
147
|
+
if not bl.strip():
|
|
148
|
+
continue
|
|
149
|
+
bl_indent = len(bl) - len(bl.lstrip())
|
|
150
|
+
if bl_indent <= indent and bl.strip() not in ("}",):
|
|
151
|
+
break
|
|
152
|
+
body_lines.append(bl.strip())
|
|
153
|
+
has_throw = any(l.startswith("throw") for l in body_lines)
|
|
154
|
+
if has_throw:
|
|
155
|
+
continue
|
|
156
|
+
is_log_only = all(
|
|
157
|
+
l.startswith(("log", "logger", "System.err", "System.out", "e.print", "//", "}")) or not l
|
|
158
|
+
for l in body_lines
|
|
159
|
+
)
|
|
160
|
+
if is_log_only and len(body_lines) > 0:
|
|
161
|
+
detail = f"Broad catch logs but doesn't rethrow (line {i + 1})"
|
|
162
|
+
findings.append(build_finding(
|
|
163
|
+
check_id="broad_catch_scan",
|
|
164
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
165
|
+
title=f"[broad_catch_no_reraise] {file_path}:{i + 1}",
|
|
166
|
+
severity=GateSeverity.HIGH,
|
|
167
|
+
impact=GateImpact.REVISE,
|
|
168
|
+
summary=detail,
|
|
169
|
+
recommendation="Add `throw` after logging to propagate the exception.",
|
|
170
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
171
|
+
repair_kind=RepairKind.REPLACE_WITH_FAIL_LOUD.value,
|
|
172
|
+
executor_action=f"Fix broad catch without rethrow at {file_path}:{i + 1}",
|
|
173
|
+
))
|
|
174
|
+
if len(findings) >= 10:
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
return findings
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
# Cluster 40: Debug Prints in Production
|
|
182
|
+
# ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
_DEBUG_PRINT_PATTERNS: dict[str, list[str]] = {
|
|
185
|
+
"python": [r'\bprint\s*\('],
|
|
186
|
+
"javascript": [r'\bconsole\.(log|debug|info|warn|dir|trace|table)\s*\('],
|
|
187
|
+
"typescript": [r'\bconsole\.(log|debug|info|warn|dir|trace|table)\s*\('],
|
|
188
|
+
"go": [r'\bfmt\.(Print|Println|Printf)\s*\('],
|
|
189
|
+
"rust": [r'\b(println|dbg|eprintln)!\s*\('],
|
|
190
|
+
"java": [r'\bSystem\.(out|err)\.(print|println)\s*\('],
|
|
191
|
+
"kotlin": [r'\bprintln\s*\('],
|
|
192
|
+
"ruby": [r'\bputs\s+', r'\bp\s+\w'],
|
|
193
|
+
"php": [r'\b(var_dump|print_r|echo)\s*\('],
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def assess_debug_prints(
|
|
198
|
+
file_path: str,
|
|
199
|
+
content: str,
|
|
200
|
+
) -> list[GateFinding]:
|
|
201
|
+
"""Cluster 40: Detect debug print/log statements left in production code."""
|
|
202
|
+
import re
|
|
203
|
+
|
|
204
|
+
if not content.strip():
|
|
205
|
+
return []
|
|
206
|
+
|
|
207
|
+
lang = detect_language(file_path)
|
|
208
|
+
patterns = _DEBUG_PRINT_PATTERNS.get(lang)
|
|
209
|
+
if not patterns:
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
213
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
# Test fixture files (e.g. polyglot JS/JSX samples under fixtures/) are
|
|
217
|
+
# never production code; gate should not flag them regardless of language.
|
|
218
|
+
if "fixtures/" in file_path.replace("\\", "/"):
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
# F14c sub-fix 3: ``print()`` is legitimate user-facing output in CLI
|
|
222
|
+
# surface files (INTERFACE/cli/**, self_audit.py, cli_forensic_audit.py).
|
|
223
|
+
# Skip the entire file for Python CLI surfaces.
|
|
224
|
+
if lang == "python" and is_cli_surface_file(file_path):
|
|
225
|
+
return []
|
|
226
|
+
|
|
227
|
+
# F14c sub-fix 3: also skip ``print()`` calls that live inside a
|
|
228
|
+
# ``if __name__ == "__main__":`` guard or a conventionally-named CLI
|
|
229
|
+
# entrypoint function (``main`` / ``cli_main`` / ``run`` / ``_cli_*``).
|
|
230
|
+
# AST-derived inclusive line ranges.
|
|
231
|
+
main_ranges: list[tuple[int, int]] = []
|
|
232
|
+
# F14c extra: skip interior lines of multi-line string constants
|
|
233
|
+
# (docstrings that *describe* ``print()`` patterns, regex pattern tuples
|
|
234
|
+
# that contain the substring ``print(``).
|
|
235
|
+
string_literal_lines: frozenset[int] = frozenset()
|
|
236
|
+
# F14c sub-fix 1 (applied to debug_prints too): skip string literals
|
|
237
|
+
# inside UPPER_CASE module-level container assignments such as
|
|
238
|
+
# ``_TEXTUAL_STDOUT_SINKS = ("print(", "console.log(", ...)`` so the
|
|
239
|
+
# gate doesn't self-match on its own pattern definitions.
|
|
240
|
+
container_lines: frozenset[int] = frozenset()
|
|
241
|
+
# FP-precision fix (debug_print_scan):
|
|
242
|
+
# * ``print_call_lines`` — 1-based lines of GENUINE ``print(...)`` AST
|
|
243
|
+
# calls. For Python this is the authoritative signal: a ``print(``
|
|
244
|
+
# substring inside a string literal or an attribute call
|
|
245
|
+
# (``obj.print(...)``) is NOT in this set, so it is never flagged.
|
|
246
|
+
# * ``cli_output_ranges`` — body ranges of user-facing output functions
|
|
247
|
+
# (``print_*`` / ``_print_*`` / ``main`` / ``cli`` …) where ``print()``
|
|
248
|
+
# is intentional. Robust to package layout, unlike the hard-coded path
|
|
249
|
+
# allowlist in ``is_cli_surface_file`` (which only knew the pre-migration
|
|
250
|
+
# ``BRAIN/autoforensics/self_audit.py`` path).
|
|
251
|
+
print_call_lines: frozenset[int] = frozenset()
|
|
252
|
+
cli_output_ranges: list[tuple[int, int]] = []
|
|
253
|
+
python_ast_ok = True
|
|
254
|
+
if lang == "python":
|
|
255
|
+
main_ranges = collect_main_block_line_ranges(content)
|
|
256
|
+
string_literal_lines = collect_string_constant_line_ranges(content)
|
|
257
|
+
container_lines = collect_constant_container_literal_lines(content)
|
|
258
|
+
print_call_lines = collect_print_call_line_nums(content)
|
|
259
|
+
cli_output_ranges = collect_cli_output_func_line_ranges(content)
|
|
260
|
+
# If the source does not parse, every AST helper returns empty. We must
|
|
261
|
+
# not silently emit zero findings on a real (broken) file, so detect
|
|
262
|
+
# that case and fall back to a statement-position regex below.
|
|
263
|
+
try:
|
|
264
|
+
import ast as _ast
|
|
265
|
+
_ast.parse(content)
|
|
266
|
+
except (SyntaxError, ValueError):
|
|
267
|
+
python_ast_ok = False
|
|
268
|
+
|
|
269
|
+
def _line_has_noqa(line_text: str, lineno: int) -> bool:
|
|
270
|
+
# Respect per-line suppression: ``# noqa: debug_print_scan`` (handled by
|
|
271
|
+
# has_allowlist_for, incl. same/previous line) and a *bare* ``# noqa``.
|
|
272
|
+
if has_allowlist_for(content, "debug_print_scan", lineno):
|
|
273
|
+
return True
|
|
274
|
+
stripped_comment = line_text.split("#", 1)[1].strip().lower() if "#" in line_text else ""
|
|
275
|
+
# bare ``# noqa`` (no ``: check_id``) suppresses everything on the line.
|
|
276
|
+
return stripped_comment == "noqa" or stripped_comment.startswith("noqa ")
|
|
277
|
+
|
|
278
|
+
findings: list[GateFinding] = []
|
|
279
|
+
|
|
280
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
281
|
+
stripped = line.strip()
|
|
282
|
+
if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
|
|
283
|
+
continue
|
|
284
|
+
# All languages: respect inline ``# noqa`` suppression.
|
|
285
|
+
if _line_has_noqa(line, i):
|
|
286
|
+
continue
|
|
287
|
+
if lang == "python":
|
|
288
|
+
if '__name__' in stripped and '__main__' in stripped:
|
|
289
|
+
continue
|
|
290
|
+
if 'help=' in stripped or 'parser.add' in stripped:
|
|
291
|
+
continue
|
|
292
|
+
# F14c sub-fix 3: skip lines inside main-guard / CLI-entrypoint
|
|
293
|
+
# AST ranges.
|
|
294
|
+
if main_ranges and line_in_ranges(i, main_ranges):
|
|
295
|
+
continue
|
|
296
|
+
# FP-precision fix: skip prints inside user-facing output funcs
|
|
297
|
+
# (``print_*`` / ``_print_*`` / ``main`` / ``cli`` …).
|
|
298
|
+
if cli_output_ranges and line_in_ranges(i, cli_output_ranges):
|
|
299
|
+
continue
|
|
300
|
+
# F14c extra: skip lines inside multi-line string constants.
|
|
301
|
+
if i in string_literal_lines:
|
|
302
|
+
continue
|
|
303
|
+
# F14c sub-fix 1: skip UPPER_CASE container literal lines.
|
|
304
|
+
if i in container_lines:
|
|
305
|
+
continue
|
|
306
|
+
# FP-precision fix (authoritative): when the file parses, only a
|
|
307
|
+
# line carrying a genuine ``print(...)`` AST call may be flagged.
|
|
308
|
+
# This rejects ``print(`` inside string literals and attribute
|
|
309
|
+
# calls. On a non-parsing file we fall back to requiring the
|
|
310
|
+
# stripped line to START with the call (statement position), so a
|
|
311
|
+
# ``print(`` buried mid-line (e.g. inside a literal) is still not
|
|
312
|
+
# flagged.
|
|
313
|
+
if python_ast_ok:
|
|
314
|
+
if i not in print_call_lines:
|
|
315
|
+
continue
|
|
316
|
+
else:
|
|
317
|
+
if not re.match(r'print\s*\(', stripped):
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
for pat in patterns:
|
|
321
|
+
if re.search(pat, stripped):
|
|
322
|
+
detail = f"Debug print in production code (line {i}): {stripped[:60]}"
|
|
323
|
+
findings.append(build_finding(
|
|
324
|
+
check_id="debug_print_scan",
|
|
325
|
+
category=GateCategory.DRIFT,
|
|
326
|
+
title=f"[debug_prints] {file_path}:{i}",
|
|
327
|
+
severity=GateSeverity.LOW,
|
|
328
|
+
impact=GateImpact.WARN,
|
|
329
|
+
summary=detail,
|
|
330
|
+
recommendation="Remove debug print or replace with proper logging.",
|
|
331
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
332
|
+
repair_kind=RepairKind.REMOVE_DUPLICATE.value,
|
|
333
|
+
executor_action=f"Remove debug print at {file_path}:{i}",
|
|
334
|
+
))
|
|
335
|
+
break
|
|
336
|
+
if len(findings) >= 10:
|
|
337
|
+
break
|
|
338
|
+
|
|
339
|
+
return findings
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
# ---------------------------------------------------------------------------
|
|
343
|
+
# Cluster 41: Commented-Out Code Blocks
|
|
344
|
+
# ---------------------------------------------------------------------------
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _collect_docstring_line_ranges(content: str) -> list[tuple[int, int]]:
|
|
348
|
+
"""Return list of (start_line, end_line) ranges covered by module/class/
|
|
349
|
+
function docstrings. AST-based (F2 reuse).
|
|
350
|
+
|
|
351
|
+
1-based inclusive line numbers.
|
|
352
|
+
"""
|
|
353
|
+
import ast
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
tree = ast.parse(content)
|
|
357
|
+
except SyntaxError:
|
|
358
|
+
return []
|
|
359
|
+
|
|
360
|
+
ranges: list[tuple[int, int]] = []
|
|
361
|
+
for node in ast.walk(tree):
|
|
362
|
+
if not isinstance(
|
|
363
|
+
node,
|
|
364
|
+
(ast.Module, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef),
|
|
365
|
+
):
|
|
366
|
+
continue
|
|
367
|
+
body = getattr(node, "body", None)
|
|
368
|
+
if not body:
|
|
369
|
+
continue
|
|
370
|
+
first = body[0]
|
|
371
|
+
if (
|
|
372
|
+
isinstance(first, ast.Expr)
|
|
373
|
+
and isinstance(first.value, ast.Constant)
|
|
374
|
+
and isinstance(first.value.value, str)
|
|
375
|
+
):
|
|
376
|
+
start = first.lineno
|
|
377
|
+
end = getattr(first, "end_lineno", start) or start
|
|
378
|
+
ranges.append((start, end))
|
|
379
|
+
return ranges
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
# F9d: audit-trail allowlist markers. If a commented-code block is preceded
|
|
383
|
+
# within 3 lines by one of these markers, skip the finding.
|
|
384
|
+
_AUDIT_TRAIL_MARKERS: tuple[str, ...] = (
|
|
385
|
+
"# ALLOWLIST_AUDIT_TRAIL",
|
|
386
|
+
"# AUDIT_TRAIL:",
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# F9d: commented-code blocks longer than this threshold are likely preserved
|
|
390
|
+
# spec / algorithm documentation and are skipped to avoid false positives.
|
|
391
|
+
_COMMENTED_CODE_LONG_BLOCK_THRESHOLD = 10
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
# ---------------------------------------------------------------------------
|
|
395
|
+
# F9e: prose-vs-commented-code discrimination.
|
|
396
|
+
#
|
|
397
|
+
# The old decision ("ANY 2 lines whose body matches a permissive code_indicators
|
|
398
|
+
# regex") flagged explanatory PROSE that merely *mentions* a code keyword in an
|
|
399
|
+
# English sentence (e.g. "... a line-only regex cannot tell a swallow from the
|
|
400
|
+
# correct ``except BaseException: <cleanup>; raise`` idiom."). That produced a
|
|
401
|
+
# verified false positive at broad_except_checks.py:21.
|
|
402
|
+
#
|
|
403
|
+
# A de-commented comment block is treated as REAL commented-out code only when:
|
|
404
|
+
# (Python) a contiguous run of >= 2 of its body lines ``ast.parse``-s as valid
|
|
405
|
+
# Python statements (a prose intro line that does not parse is simply
|
|
406
|
+
# trimmed away — the oracle block "legacy impl ...:\n for v in ...:"
|
|
407
|
+
# is still caught via its inner code run), OR
|
|
408
|
+
# (any language, fallback) the block carries >= 2 *distinct strong* code
|
|
409
|
+
# signals: an assignment with an identifier LHS, a def/class/import/
|
|
410
|
+
# func/const/let/var header, a bare ``name(...)`` call statement, or a
|
|
411
|
+
# block-header line (``if ...:``/``for ...:``/``} {``).
|
|
412
|
+
#
|
|
413
|
+
# A single keyword embedded in grammatical English is NOT a strong signal, so
|
|
414
|
+
# prose does not reach the >= 2 bar.
|
|
415
|
+
# ---------------------------------------------------------------------------
|
|
416
|
+
|
|
417
|
+
# Strong, structural code signals (used for every language; the AST path is the
|
|
418
|
+
# primary signal for Python). Each regex anchors at the START of the (stripped)
|
|
419
|
+
# body so a keyword mid-sentence does not match.
|
|
420
|
+
_STRONG_ASSIGN_RE = re.compile(
|
|
421
|
+
r'^[A-Za-z_]\w*(?:\.[A-Za-z_]\w*|\[[^\]]*\])*\s*(?:[-+*/%|&^@]|//|\*\*|>>|<<)?=\s*\S'
|
|
422
|
+
)
|
|
423
|
+
_STRONG_DEFCLASS_RE = re.compile(
|
|
424
|
+
r'^(?:async\s+)?(?:def|class|import|from|func|public|private|protected|const|let|var)\s+\w'
|
|
425
|
+
)
|
|
426
|
+
_STRONG_CALL_RE = re.compile(r'^[A-Za-z_][\w.]*\s*\([^)]*\)\s*;?\s*$')
|
|
427
|
+
_STRONG_BLOCKHEAD_RE = re.compile(
|
|
428
|
+
r'^(?:if|elif|else|for|while|try|except|finally|with|switch|case|do)\b.*[:{]\s*$'
|
|
429
|
+
r'|^\}'
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def _largest_parseable_python_run(bodies: list[str], min_run: int = 2) -> int:
|
|
434
|
+
"""Return the length of the longest contiguous run of >= ``min_run`` body
|
|
435
|
+
lines that ``ast.parse``-s as valid Python (after dedent), else 0.
|
|
436
|
+
|
|
437
|
+
Trims leading/trailing prose: a real commented-out block preceded by a
|
|
438
|
+
one-line prose intro (which alone breaks parsing) is still recognised via
|
|
439
|
+
its inner code run.
|
|
440
|
+
"""
|
|
441
|
+
import ast
|
|
442
|
+
import textwrap
|
|
443
|
+
|
|
444
|
+
n = len(bodies)
|
|
445
|
+
if n < min_run:
|
|
446
|
+
return 0
|
|
447
|
+
best = 0
|
|
448
|
+
for start in range(n):
|
|
449
|
+
# Longest window first for this start; stop at the first that parses.
|
|
450
|
+
for end in range(n, start + min_run - 1, -1):
|
|
451
|
+
if end - start < min_run:
|
|
452
|
+
continue
|
|
453
|
+
text = textwrap.dedent("\n".join(bodies[start:end]))
|
|
454
|
+
if not text.strip():
|
|
455
|
+
continue
|
|
456
|
+
try:
|
|
457
|
+
tree = ast.parse(text)
|
|
458
|
+
except (SyntaxError, ValueError):
|
|
459
|
+
continue
|
|
460
|
+
if tree.body:
|
|
461
|
+
if end - start > best:
|
|
462
|
+
best = end - start
|
|
463
|
+
break
|
|
464
|
+
return best
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def _count_strong_code_signals(bodies: list[str]) -> int:
|
|
468
|
+
"""Count DISTINCT strong structural code-signal kinds across the block.
|
|
469
|
+
|
|
470
|
+
Distinct kinds (not raw line count) so a single repeated construct does not
|
|
471
|
+
by itself clear the bar; >= 2 different kinds is strong evidence of code.
|
|
472
|
+
"""
|
|
473
|
+
kinds: set[str] = set()
|
|
474
|
+
for raw in bodies:
|
|
475
|
+
s = raw.strip()
|
|
476
|
+
if not s:
|
|
477
|
+
continue
|
|
478
|
+
if _STRONG_DEFCLASS_RE.search(s):
|
|
479
|
+
kinds.add("defclass")
|
|
480
|
+
if _STRONG_ASSIGN_RE.search(s):
|
|
481
|
+
kinds.add("assign")
|
|
482
|
+
if _STRONG_CALL_RE.search(s):
|
|
483
|
+
kinds.add("call")
|
|
484
|
+
if _STRONG_BLOCKHEAD_RE.search(s):
|
|
485
|
+
kinds.add("blockhead")
|
|
486
|
+
return len(kinds)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _commented_block_is_code(bodies: list[str], lang: str) -> bool:
|
|
490
|
+
"""True when a de-commented comment block is REAL commented-out code rather
|
|
491
|
+
than explanatory prose. See the F9e note above for the discrimination rule.
|
|
492
|
+
"""
|
|
493
|
+
if lang == "python":
|
|
494
|
+
if _largest_parseable_python_run(bodies, min_run=2) >= 2:
|
|
495
|
+
return True
|
|
496
|
+
# Language-agnostic fallback (and a backstop for Python blocks that no longer
|
|
497
|
+
# parse standalone — e.g. a dangling continuation): >= 2 distinct strong
|
|
498
|
+
# structural signals.
|
|
499
|
+
return _count_strong_code_signals(bodies) >= 2
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def assess_commented_code(
|
|
503
|
+
file_path: str,
|
|
504
|
+
content: str,
|
|
505
|
+
) -> list[GateFinding]:
|
|
506
|
+
"""Cluster 41: Detect blocks of commented-out code (3+ consecutive lines).
|
|
507
|
+
|
|
508
|
+
F9d: skip blocks that (a) are preceded by a `# ALLOWLIST_AUDIT_TRAIL` /
|
|
509
|
+
`# AUDIT_TRAIL:` marker within 3 lines, (b) are located inside a docstring
|
|
510
|
+
(AST-based), or (c) are longer than 10 consecutive commented lines
|
|
511
|
+
(preserved spec text).
|
|
512
|
+
"""
|
|
513
|
+
import re
|
|
514
|
+
|
|
515
|
+
if not content.strip():
|
|
516
|
+
return []
|
|
517
|
+
|
|
518
|
+
lang = detect_language(file_path)
|
|
519
|
+
if lang in ("json", "yaml", "toml", "markdown", "restructuredtext"):
|
|
520
|
+
return []
|
|
521
|
+
|
|
522
|
+
if lang == "python":
|
|
523
|
+
comment_re = re.compile(r'^\s*#\s?(.*)')
|
|
524
|
+
elif lang in ("shell", "ruby", "php"):
|
|
525
|
+
comment_re = re.compile(r'^\s*#\s?(.*)')
|
|
526
|
+
else:
|
|
527
|
+
comment_re = re.compile(r'^\s*//\s?(.*)')
|
|
528
|
+
|
|
529
|
+
code_indicators = re.compile(
|
|
530
|
+
r'(?:'
|
|
531
|
+
r'\w+\s*=\s*\w'
|
|
532
|
+
r'|def\s+\w+\s*\('
|
|
533
|
+
r'|class\s+\w+'
|
|
534
|
+
r'|function\s+\w+'
|
|
535
|
+
r'|return\s+\w'
|
|
536
|
+
r'|if\s+\w.*:'
|
|
537
|
+
r'|if\s*\(.*\)\s*\{'
|
|
538
|
+
r'|for\s+\w'
|
|
539
|
+
r'|while\s+\w'
|
|
540
|
+
r'|import\s+\w'
|
|
541
|
+
r'|from\s+\w+\s+import'
|
|
542
|
+
r'|\w+\.\w+\s*\('
|
|
543
|
+
r'|raise\s+\w'
|
|
544
|
+
r'|throw\s+\w'
|
|
545
|
+
r'|except\s+\w'
|
|
546
|
+
r'|catch\s*\('
|
|
547
|
+
r'|try\s*[:{]'
|
|
548
|
+
r')'
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
lines = content.splitlines()
|
|
552
|
+
# F9d: AST docstring ranges (only meaningful for Python).
|
|
553
|
+
docstring_ranges: list[tuple[int, int]] = []
|
|
554
|
+
if lang == "python":
|
|
555
|
+
docstring_ranges = _collect_docstring_line_ranges(content)
|
|
556
|
+
|
|
557
|
+
def _line_in_docstring(lineno_1based: int) -> bool:
|
|
558
|
+
return any(s <= lineno_1based <= e for s, e in docstring_ranges)
|
|
559
|
+
|
|
560
|
+
def _has_audit_trail_marker_above(block_start_idx: int, block_end_idx: int) -> bool:
|
|
561
|
+
# block_start_idx is 0-based. Check up to 3 preceding non-blank lines
|
|
562
|
+
# AND the first 3 lines of the block itself (since the marker is
|
|
563
|
+
# typically placed as the first comment of a preserved block).
|
|
564
|
+
def _line_has_marker(line_text: str) -> bool:
|
|
565
|
+
stripped = line_text.strip()
|
|
566
|
+
if not stripped:
|
|
567
|
+
return False
|
|
568
|
+
for marker in _AUDIT_TRAIL_MARKERS:
|
|
569
|
+
if marker in stripped:
|
|
570
|
+
return True
|
|
571
|
+
return False
|
|
572
|
+
|
|
573
|
+
# Check block's own first 3 lines.
|
|
574
|
+
for idx in range(block_start_idx, min(block_start_idx + 3, block_end_idx)):
|
|
575
|
+
if _line_has_marker(lines[idx]):
|
|
576
|
+
return True
|
|
577
|
+
|
|
578
|
+
# Check up to 3 preceding non-blank lines.
|
|
579
|
+
inspected = 0
|
|
580
|
+
j = block_start_idx - 1
|
|
581
|
+
while j >= 0 and inspected < 3:
|
|
582
|
+
ln = lines[j].strip()
|
|
583
|
+
if not ln:
|
|
584
|
+
j -= 1
|
|
585
|
+
continue
|
|
586
|
+
inspected += 1
|
|
587
|
+
if _line_has_marker(lines[j]):
|
|
588
|
+
return True
|
|
589
|
+
j -= 1
|
|
590
|
+
return False
|
|
591
|
+
|
|
592
|
+
findings: list[GateFinding] = []
|
|
593
|
+
i = 0
|
|
594
|
+
|
|
595
|
+
while i < len(lines):
|
|
596
|
+
m = comment_re.match(lines[i])
|
|
597
|
+
if m:
|
|
598
|
+
block_start = i
|
|
599
|
+
code_lines = 0
|
|
600
|
+
block_bodies: list[str] = []
|
|
601
|
+
j = i
|
|
602
|
+
while j < len(lines):
|
|
603
|
+
cm = comment_re.match(lines[j])
|
|
604
|
+
if not cm:
|
|
605
|
+
break
|
|
606
|
+
body = cm.group(1)
|
|
607
|
+
block_bodies.append(body)
|
|
608
|
+
if code_indicators.search(body):
|
|
609
|
+
code_lines += 1
|
|
610
|
+
j += 1
|
|
611
|
+
block_len = j - block_start
|
|
612
|
+
|
|
613
|
+
# F9e: the permissive code_indicators count is a cheap PRE-FILTER
|
|
614
|
+
# only. A block is reported as commented-out code solely when the
|
|
615
|
+
# prose-vs-code discriminator (parseable Python run OR >= 2 distinct
|
|
616
|
+
# strong structural signals) confirms it — this rejects explanatory
|
|
617
|
+
# prose that merely mentions a code keyword in a sentence.
|
|
618
|
+
if (
|
|
619
|
+
block_len >= 3
|
|
620
|
+
and code_lines >= 2
|
|
621
|
+
and _commented_block_is_code(block_bodies, lang)
|
|
622
|
+
):
|
|
623
|
+
if block_start < 4:
|
|
624
|
+
i = j
|
|
625
|
+
continue
|
|
626
|
+
# F9d: audit-trail marker allowlist
|
|
627
|
+
if _has_audit_trail_marker_above(block_start, j):
|
|
628
|
+
i = j
|
|
629
|
+
continue
|
|
630
|
+
# F9d: docstring skip (block fully inside a docstring range)
|
|
631
|
+
if _line_in_docstring(block_start + 1):
|
|
632
|
+
i = j
|
|
633
|
+
continue
|
|
634
|
+
# F9d: long-block skip (likely preserved algorithm doc)
|
|
635
|
+
if block_len > _COMMENTED_CODE_LONG_BLOCK_THRESHOLD:
|
|
636
|
+
i = j
|
|
637
|
+
continue
|
|
638
|
+
detail = f"Block of {block_len} commented-out code lines starting at line {block_start + 1}"
|
|
639
|
+
findings.append(build_finding(
|
|
640
|
+
check_id="commented_code_scan",
|
|
641
|
+
category=GateCategory.DRIFT,
|
|
642
|
+
title=f"[commented_code] {file_path}:{block_start + 1}",
|
|
643
|
+
severity=GateSeverity.LOW,
|
|
644
|
+
impact=GateImpact.WARN,
|
|
645
|
+
summary=detail,
|
|
646
|
+
recommendation="Remove commented-out code; use version control to recover old code if needed.",
|
|
647
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
648
|
+
repair_kind=RepairKind.REMOVE_DUPLICATE.value,
|
|
649
|
+
executor_action=f"Remove commented-out code block at {file_path}:{block_start + 1}",
|
|
650
|
+
))
|
|
651
|
+
i = j
|
|
652
|
+
else:
|
|
653
|
+
i += 1
|
|
654
|
+
if len(findings) >= 10:
|
|
655
|
+
break
|
|
656
|
+
|
|
657
|
+
return findings
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
# ---------------------------------------------------------------------------
|
|
661
|
+
# Cluster 42: Missing Await / Unawaited Coroutines
|
|
662
|
+
# ---------------------------------------------------------------------------
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def _build_missing_await_findings_ast(
|
|
666
|
+
file_path: str,
|
|
667
|
+
content: str,
|
|
668
|
+
) -> list[GateFinding]:
|
|
669
|
+
"""AST-based missing-await detection for Python.
|
|
670
|
+
|
|
671
|
+
Algorithm:
|
|
672
|
+
1. Parse module into AST.
|
|
673
|
+
2. Collect all names defined with `async def` in the module (not just
|
|
674
|
+
reachable — a name defined both as sync and async triggers a
|
|
675
|
+
name-collision skip for that name).
|
|
676
|
+
3. Pre-pass: build an "async-reachable" set of sync function names that
|
|
677
|
+
are clearly invoked under an async runtime (`asyncio.run(...)`,
|
|
678
|
+
`asyncio.gather(...)`, `asyncio.ensure_future(...)`,
|
|
679
|
+
`asyncio.create_task(...)`, `loop.run_until_complete(...)`) or
|
|
680
|
+
decorated with `@pytest.mark.asyncio`, `@asyncio.coroutine`,
|
|
681
|
+
`@async_timeout`. These sync defs behave like async contexts — closure
|
|
682
|
+
depth 1 only (no transitive resolution to avoid FP).
|
|
683
|
+
4. Walk every ast.Call node; for each call whose callee name matches an
|
|
684
|
+
async def:
|
|
685
|
+
a. Walk up the parent chain to find the nearest enclosing function.
|
|
686
|
+
b. If enclosing function is ast.AsyncFunctionDef: require ast.Await
|
|
687
|
+
wrapper. Missing wrapper → real finding.
|
|
688
|
+
c. If enclosing function is ast.FunctionDef AND its name is in the
|
|
689
|
+
async-reachable set: treat like async context → emit finding.
|
|
690
|
+
d. If enclosing function is ast.FunctionDef (sync) and NOT reachable:
|
|
691
|
+
conservative skip (assume legitimate sync wrapper: thread executor
|
|
692
|
+
/ deliberate fire-and-forget / pure sync call that happens to
|
|
693
|
+
share a name).
|
|
694
|
+
e. If no enclosing function (module-level call): also skip (likely
|
|
695
|
+
asyncio.run(main()) at script entry point).
|
|
696
|
+
5. Pointless-async detection: async def that never contains await /
|
|
697
|
+
async for / async with in its body. Stubs (pass / ... /
|
|
698
|
+
raise NotImplementedError) are exempt.
|
|
699
|
+
|
|
700
|
+
Skip heuristics applied:
|
|
701
|
+
- Names with both `def X` and `async def X` → name collision, skip.
|
|
702
|
+
- Inside TYPE_CHECKING blocks.
|
|
703
|
+
- Inside @pytest.mark.asyncio decorated functions (treated like async
|
|
704
|
+
context for flag purposes, but those are already AsyncFunctionDef).
|
|
705
|
+
"""
|
|
706
|
+
import ast
|
|
707
|
+
|
|
708
|
+
try:
|
|
709
|
+
tree = ast.parse(content)
|
|
710
|
+
except SyntaxError:
|
|
711
|
+
return []
|
|
712
|
+
|
|
713
|
+
findings: list[GateFinding] = []
|
|
714
|
+
|
|
715
|
+
# ------------------------------------------------------------------
|
|
716
|
+
# Step 1: collect async def names and detect name collisions
|
|
717
|
+
# ------------------------------------------------------------------
|
|
718
|
+
async_names: set[str] = set()
|
|
719
|
+
sync_names: set[str] = set()
|
|
720
|
+
|
|
721
|
+
for node in ast.walk(tree):
|
|
722
|
+
if isinstance(node, ast.AsyncFunctionDef):
|
|
723
|
+
async_names.add(node.name)
|
|
724
|
+
elif isinstance(node, ast.FunctionDef):
|
|
725
|
+
sync_names.add(node.name)
|
|
726
|
+
|
|
727
|
+
# Names that exist as BOTH sync and async — ambiguous, skip entirely
|
|
728
|
+
collision_names = async_names & sync_names
|
|
729
|
+
|
|
730
|
+
# ------------------------------------------------------------------
|
|
731
|
+
# Step 1b: async-reachability pre-pass
|
|
732
|
+
# - Sync `def` targets of asyncio.run / gather / ensure_future /
|
|
733
|
+
# create_task / loop.run_until_complete are treated as async context.
|
|
734
|
+
# - Sync defs decorated with @pytest.mark.asyncio, @asyncio.coroutine,
|
|
735
|
+
# @async_timeout also qualify.
|
|
736
|
+
# - Closure depth 1 only (no transitive resolution).
|
|
737
|
+
# ------------------------------------------------------------------
|
|
738
|
+
_ASYNC_RUNNER_ATTRS = {
|
|
739
|
+
"run",
|
|
740
|
+
"gather",
|
|
741
|
+
"ensure_future",
|
|
742
|
+
"create_task",
|
|
743
|
+
"run_until_complete",
|
|
744
|
+
"run_coroutine_threadsafe",
|
|
745
|
+
}
|
|
746
|
+
_ASYNC_RUNNER_BARE = {"gather", "ensure_future", "create_task"}
|
|
747
|
+
_ASYNC_DECO_NAMES = {"asyncio.coroutine", "async_timeout"}
|
|
748
|
+
_ASYNC_DECO_PYTEST_ATTR = "asyncio" # for @pytest.mark.asyncio
|
|
749
|
+
|
|
750
|
+
def _arg_callee_name(arg: ast.AST) -> str | None:
|
|
751
|
+
"""Extract simple name from a call-arg that is either Name or Call(Name)."""
|
|
752
|
+
if isinstance(arg, ast.Call):
|
|
753
|
+
func = arg.func
|
|
754
|
+
if isinstance(func, ast.Name):
|
|
755
|
+
return func.id
|
|
756
|
+
if isinstance(func, ast.Attribute):
|
|
757
|
+
return func.attr
|
|
758
|
+
return None
|
|
759
|
+
if isinstance(arg, ast.Name):
|
|
760
|
+
return arg.id
|
|
761
|
+
if isinstance(arg, ast.Attribute):
|
|
762
|
+
return arg.attr
|
|
763
|
+
return None
|
|
764
|
+
|
|
765
|
+
def _is_async_runner_call(call: ast.Call) -> bool:
|
|
766
|
+
func = call.func
|
|
767
|
+
if isinstance(func, ast.Attribute) and func.attr in _ASYNC_RUNNER_ATTRS:
|
|
768
|
+
return True
|
|
769
|
+
if isinstance(func, ast.Name) and func.id in _ASYNC_RUNNER_BARE:
|
|
770
|
+
# bare `gather(x())` / `create_task(x())` after `from asyncio import ...`
|
|
771
|
+
return True
|
|
772
|
+
return False
|
|
773
|
+
|
|
774
|
+
def _decorator_marks_async(dec: ast.AST) -> bool:
|
|
775
|
+
# @asyncio.coroutine / @async_timeout / @pytest.mark.asyncio
|
|
776
|
+
if isinstance(dec, ast.Call):
|
|
777
|
+
dec = dec.func
|
|
778
|
+
if isinstance(dec, ast.Name):
|
|
779
|
+
return dec.id == "async_timeout"
|
|
780
|
+
if isinstance(dec, ast.Attribute):
|
|
781
|
+
# @asyncio.coroutine
|
|
782
|
+
if isinstance(dec.value, ast.Name) and dec.value.id == "asyncio" and dec.attr == "coroutine":
|
|
783
|
+
return True
|
|
784
|
+
# @async_timeout.timeout — also async scope
|
|
785
|
+
if isinstance(dec.value, ast.Name) and dec.value.id == "async_timeout":
|
|
786
|
+
return True
|
|
787
|
+
# @pytest.mark.asyncio (Attribute: value=Attribute(pytest, mark), attr=asyncio)
|
|
788
|
+
if dec.attr == _ASYNC_DECO_PYTEST_ATTR:
|
|
789
|
+
inner = dec.value
|
|
790
|
+
if isinstance(inner, ast.Attribute) and inner.attr == "mark":
|
|
791
|
+
if isinstance(inner.value, ast.Name) and inner.value.id == "pytest":
|
|
792
|
+
return True
|
|
793
|
+
return False
|
|
794
|
+
|
|
795
|
+
async_reachable_syncs: set[str] = set()
|
|
796
|
+
|
|
797
|
+
# Decorator-driven reachability
|
|
798
|
+
for node in ast.walk(tree):
|
|
799
|
+
if isinstance(node, ast.FunctionDef):
|
|
800
|
+
for dec in node.decorator_list:
|
|
801
|
+
if _decorator_marks_async(dec):
|
|
802
|
+
async_reachable_syncs.add(node.name)
|
|
803
|
+
break
|
|
804
|
+
|
|
805
|
+
# Runner-argument-driven reachability
|
|
806
|
+
for node in ast.walk(tree):
|
|
807
|
+
if not isinstance(node, ast.Call):
|
|
808
|
+
continue
|
|
809
|
+
if not _is_async_runner_call(node):
|
|
810
|
+
continue
|
|
811
|
+
for arg in list(node.args) + [kw.value for kw in node.keywords]:
|
|
812
|
+
name = _arg_callee_name(arg)
|
|
813
|
+
if name is None:
|
|
814
|
+
continue
|
|
815
|
+
# Only promote to "reachable" if this name is defined as a sync def
|
|
816
|
+
# in THIS module (closure depth 1). Async defs need no promotion.
|
|
817
|
+
if name in sync_names and name not in async_names:
|
|
818
|
+
async_reachable_syncs.add(name)
|
|
819
|
+
|
|
820
|
+
if not async_names:
|
|
821
|
+
return []
|
|
822
|
+
|
|
823
|
+
# ------------------------------------------------------------------
|
|
824
|
+
# Step 2: detect TYPE_CHECKING blocks to exclude their contents
|
|
825
|
+
# ------------------------------------------------------------------
|
|
826
|
+
# Collect line ranges that are inside `if TYPE_CHECKING:` guards.
|
|
827
|
+
type_checking_ranges: list[tuple[int, int]] = []
|
|
828
|
+
for node in ast.walk(tree):
|
|
829
|
+
if isinstance(node, ast.If):
|
|
830
|
+
test = node.test
|
|
831
|
+
is_tc = (
|
|
832
|
+
(isinstance(test, ast.Name) and test.id == "TYPE_CHECKING")
|
|
833
|
+
or (isinstance(test, ast.Attribute) and test.attr == "TYPE_CHECKING")
|
|
834
|
+
)
|
|
835
|
+
if is_tc and hasattr(node, "lineno") and hasattr(node, "end_lineno"):
|
|
836
|
+
type_checking_ranges.append((node.lineno, node.end_lineno or node.lineno))
|
|
837
|
+
|
|
838
|
+
def _in_type_checking(lineno: int) -> bool:
|
|
839
|
+
return any(start <= lineno <= end for start, end in type_checking_ranges)
|
|
840
|
+
|
|
841
|
+
# ------------------------------------------------------------------
|
|
842
|
+
# Step 3: build parent map for ancestor walking
|
|
843
|
+
# ------------------------------------------------------------------
|
|
844
|
+
parent_map: dict[int, ast.AST] = {}
|
|
845
|
+
for node in ast.walk(tree):
|
|
846
|
+
for child in ast.iter_child_nodes(node):
|
|
847
|
+
parent_map[id(child)] = node
|
|
848
|
+
|
|
849
|
+
def _get_enclosing_func(node: ast.AST) -> ast.AsyncFunctionDef | ast.FunctionDef | None:
|
|
850
|
+
"""Walk parent chain, return nearest enclosing function def or None."""
|
|
851
|
+
current = parent_map.get(id(node))
|
|
852
|
+
while current is not None:
|
|
853
|
+
if isinstance(current, (ast.AsyncFunctionDef, ast.FunctionDef)):
|
|
854
|
+
return current
|
|
855
|
+
current = parent_map.get(id(current))
|
|
856
|
+
return None
|
|
857
|
+
|
|
858
|
+
def _is_directly_awaited(call_node: ast.Call) -> bool:
|
|
859
|
+
"""Return True if the Call node is the direct expression of an Await."""
|
|
860
|
+
parent = parent_map.get(id(call_node))
|
|
861
|
+
return isinstance(parent, ast.Await)
|
|
862
|
+
|
|
863
|
+
def _is_asyncio_run_call(call_node: ast.Call) -> bool:
|
|
864
|
+
"""Return True if this call is the argument to asyncio.run() or
|
|
865
|
+
loop.run_until_complete() in the same statement."""
|
|
866
|
+
parent = parent_map.get(id(call_node))
|
|
867
|
+
if not isinstance(parent, ast.Call):
|
|
868
|
+
return False
|
|
869
|
+
func = parent.func
|
|
870
|
+
if isinstance(func, ast.Attribute):
|
|
871
|
+
if func.attr in ("run", "run_until_complete", "run_coroutine_threadsafe"):
|
|
872
|
+
return True
|
|
873
|
+
if isinstance(func, ast.Name) and func.id == "run":
|
|
874
|
+
return True
|
|
875
|
+
return False
|
|
876
|
+
|
|
877
|
+
def _callee_name(call_node: ast.Call) -> str | None:
|
|
878
|
+
"""Extract simple name from a Call node's func field."""
|
|
879
|
+
func = call_node.func
|
|
880
|
+
if isinstance(func, ast.Name):
|
|
881
|
+
return func.id
|
|
882
|
+
if isinstance(func, ast.Attribute):
|
|
883
|
+
return func.attr
|
|
884
|
+
return None
|
|
885
|
+
|
|
886
|
+
# ------------------------------------------------------------------
|
|
887
|
+
# Step 4: walk all Call nodes; flag un-awaited calls to async funcs
|
|
888
|
+
# inside async context
|
|
889
|
+
# ------------------------------------------------------------------
|
|
890
|
+
for node in ast.walk(tree):
|
|
891
|
+
if not isinstance(node, ast.Call):
|
|
892
|
+
continue
|
|
893
|
+
if not hasattr(node, "lineno"):
|
|
894
|
+
continue
|
|
895
|
+
if _in_type_checking(node.lineno):
|
|
896
|
+
continue
|
|
897
|
+
|
|
898
|
+
name = _callee_name(node)
|
|
899
|
+
if name is None or name not in async_names or name in collision_names:
|
|
900
|
+
continue
|
|
901
|
+
|
|
902
|
+
# Skip if already awaited
|
|
903
|
+
if _is_directly_awaited(node):
|
|
904
|
+
continue
|
|
905
|
+
|
|
906
|
+
# Skip if passed into asyncio.run() / run_until_complete() etc.
|
|
907
|
+
if _is_asyncio_run_call(node):
|
|
908
|
+
continue
|
|
909
|
+
|
|
910
|
+
enclosing = _get_enclosing_func(node)
|
|
911
|
+
|
|
912
|
+
if enclosing is None:
|
|
913
|
+
# Module-level call — conservative skip (likely asyncio.run(main()))
|
|
914
|
+
continue
|
|
915
|
+
|
|
916
|
+
if isinstance(enclosing, ast.FunctionDef):
|
|
917
|
+
# Sync enclosing function. Only treat as async context if this
|
|
918
|
+
# sync def is in the async-reachable set (closure depth 1:
|
|
919
|
+
# invoked under asyncio.run/gather/ensure_future/create_task/
|
|
920
|
+
# run_until_complete OR decorated with @pytest.mark.asyncio,
|
|
921
|
+
# @asyncio.coroutine, @async_timeout). Otherwise keep
|
|
922
|
+
# conservative skip.
|
|
923
|
+
if enclosing.name not in async_reachable_syncs:
|
|
924
|
+
continue
|
|
925
|
+
# Fall through → emit finding (sync def runs in async context).
|
|
926
|
+
|
|
927
|
+
# enclosing is AsyncFunctionDef (or async-reachable sync) and call is
|
|
928
|
+
# NOT awaited → real bug
|
|
929
|
+
lineno = node.lineno
|
|
930
|
+
if len(findings) >= 10:
|
|
931
|
+
break
|
|
932
|
+
detail = f"Async function '{name}()' called without `await` (line {lineno})"
|
|
933
|
+
findings.append(build_finding(
|
|
934
|
+
check_id="missing_await_scan",
|
|
935
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
936
|
+
title=f"[missing_await] {file_path}:{lineno}",
|
|
937
|
+
severity=GateSeverity.HIGH,
|
|
938
|
+
impact=GateImpact.REVISE,
|
|
939
|
+
summary=detail,
|
|
940
|
+
recommendation=f"Add `await` before calling `{name}()` inside an async context.",
|
|
941
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
942
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
943
|
+
executor_action=f"Add missing await at {file_path}:{lineno}",
|
|
944
|
+
))
|
|
945
|
+
|
|
946
|
+
# ------------------------------------------------------------------
|
|
947
|
+
# Step 5: pointless-async detection (unchanged logic, regex-free)
|
|
948
|
+
# ------------------------------------------------------------------
|
|
949
|
+
for node in ast.walk(tree):
|
|
950
|
+
if not isinstance(node, ast.AsyncFunctionDef):
|
|
951
|
+
continue
|
|
952
|
+
# Check body for any await / async for / async with
|
|
953
|
+
has_await = False
|
|
954
|
+
for child in ast.walk(node):
|
|
955
|
+
if child is node:
|
|
956
|
+
continue
|
|
957
|
+
if isinstance(child, (ast.Await, ast.AsyncFor, ast.AsyncWith)):
|
|
958
|
+
has_await = True
|
|
959
|
+
break
|
|
960
|
+
if has_await:
|
|
961
|
+
continue
|
|
962
|
+
# Exempt stubs
|
|
963
|
+
body_nodes = node.body
|
|
964
|
+
if len(body_nodes) == 1:
|
|
965
|
+
stmt = body_nodes[0]
|
|
966
|
+
if isinstance(stmt, ast.Pass):
|
|
967
|
+
continue
|
|
968
|
+
if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant):
|
|
969
|
+
if stmt.value.value is ...:
|
|
970
|
+
continue
|
|
971
|
+
if isinstance(stmt, ast.Raise):
|
|
972
|
+
continue
|
|
973
|
+
lineno = node.lineno
|
|
974
|
+
func_name = node.name
|
|
975
|
+
detail = f"async def {func_name}() never uses await -- pointless async"
|
|
976
|
+
findings.append(build_finding(
|
|
977
|
+
check_id="missing_await_scan",
|
|
978
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
979
|
+
title=f"[missing_await] {file_path}:{lineno}:{func_name}",
|
|
980
|
+
severity=GateSeverity.LOW,
|
|
981
|
+
impact=GateImpact.WARN,
|
|
982
|
+
summary=detail,
|
|
983
|
+
recommendation=f"Remove `async` from `{func_name}()` if it doesn't need to be async.",
|
|
984
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
985
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
986
|
+
executor_action=f"Remove pointless async from {func_name}() at {file_path}:{lineno}",
|
|
987
|
+
))
|
|
988
|
+
|
|
989
|
+
return findings
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
def assess_missing_await(
|
|
993
|
+
file_path: str,
|
|
994
|
+
content: str,
|
|
995
|
+
) -> list[GateFinding]:
|
|
996
|
+
"""Cluster 42: Detect async calls without await and pointless async functions."""
|
|
997
|
+
import re
|
|
998
|
+
|
|
999
|
+
if not content.strip():
|
|
1000
|
+
return []
|
|
1001
|
+
|
|
1002
|
+
lang = detect_language(file_path)
|
|
1003
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
1004
|
+
return []
|
|
1005
|
+
|
|
1006
|
+
findings: list[GateFinding] = []
|
|
1007
|
+
|
|
1008
|
+
if lang == "python":
|
|
1009
|
+
findings = _build_missing_await_findings_ast(file_path, content)
|
|
1010
|
+
|
|
1011
|
+
elif lang in ("javascript", "typescript"):
|
|
1012
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
1013
|
+
stripped = line.strip()
|
|
1014
|
+
if stripped.startswith("//"):
|
|
1015
|
+
continue
|
|
1016
|
+
if re.search(r'\bfetch\s*\(', stripped) and "await" not in stripped:
|
|
1017
|
+
if ".then(" not in stripped:
|
|
1018
|
+
detail = f"fetch() called without `await` or `.then()` (line {i})"
|
|
1019
|
+
findings.append(build_finding(
|
|
1020
|
+
check_id="missing_await_scan",
|
|
1021
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
1022
|
+
title=f"[missing_await] {file_path}:{i}",
|
|
1023
|
+
severity=GateSeverity.HIGH,
|
|
1024
|
+
impact=GateImpact.REVISE,
|
|
1025
|
+
summary=detail,
|
|
1026
|
+
recommendation="Add `await` before `fetch()` or chain `.then()` to handle the promise.",
|
|
1027
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
1028
|
+
repair_kind=RepairKind.FIX_CONTRACT.value,
|
|
1029
|
+
executor_action=f"Add missing await/then at {file_path}:{i}",
|
|
1030
|
+
))
|
|
1031
|
+
if len(findings) >= 10:
|
|
1032
|
+
break
|
|
1033
|
+
|
|
1034
|
+
return findings
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
# ---------------------------------------------------------------------------
|
|
1038
|
+
# Cluster 43: API Response Without Status Check
|
|
1039
|
+
# ---------------------------------------------------------------------------
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
def assess_unchecked_response(
|
|
1043
|
+
file_path: str,
|
|
1044
|
+
content: str,
|
|
1045
|
+
) -> list[GateFinding]:
|
|
1046
|
+
"""Cluster 43: Detect HTTP responses used without checking status."""
|
|
1047
|
+
import re
|
|
1048
|
+
|
|
1049
|
+
if not content.strip():
|
|
1050
|
+
return []
|
|
1051
|
+
|
|
1052
|
+
lang = detect_language(file_path)
|
|
1053
|
+
if lang not in ("python", "javascript", "typescript"):
|
|
1054
|
+
return []
|
|
1055
|
+
|
|
1056
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1] if "/" in file_path.replace("\\", "/") else file_path
|
|
1057
|
+
if basename.startswith("test_") or basename.startswith("conftest"):
|
|
1058
|
+
return []
|
|
1059
|
+
|
|
1060
|
+
findings: list[GateFinding] = []
|
|
1061
|
+
lines = content.splitlines()
|
|
1062
|
+
|
|
1063
|
+
if lang == "python":
|
|
1064
|
+
for i, line in enumerate(lines, 1):
|
|
1065
|
+
stripped = line.strip()
|
|
1066
|
+
if stripped.startswith("#"):
|
|
1067
|
+
continue
|
|
1068
|
+
m = re.search(r'(\w+)\s*=\s*requests\.(get|post|put|delete|patch)\s*\(', stripped)
|
|
1069
|
+
if m:
|
|
1070
|
+
var_name = m.group(1)
|
|
1071
|
+
has_check = False
|
|
1072
|
+
for j in range(i, min(i + 10, len(lines))):
|
|
1073
|
+
check_line = lines[j]
|
|
1074
|
+
if f"{var_name}.raise_for_status()" in check_line:
|
|
1075
|
+
has_check = True
|
|
1076
|
+
break
|
|
1077
|
+
if f"{var_name}.status_code" in check_line:
|
|
1078
|
+
has_check = True
|
|
1079
|
+
break
|
|
1080
|
+
if f"{var_name}.ok" in check_line:
|
|
1081
|
+
has_check = True
|
|
1082
|
+
break
|
|
1083
|
+
if not has_check:
|
|
1084
|
+
detail = f"requests.{m.group(2)}() without status check (line {i}) -- use .raise_for_status()"
|
|
1085
|
+
findings.append(build_finding(
|
|
1086
|
+
check_id="response_status_scan",
|
|
1087
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
1088
|
+
title=f"[unchecked_response] {file_path}:{i}",
|
|
1089
|
+
severity=GateSeverity.MEDIUM,
|
|
1090
|
+
impact=GateImpact.REVISE,
|
|
1091
|
+
summary=detail,
|
|
1092
|
+
recommendation="Call `.raise_for_status()` or check `.status_code` before using the response.",
|
|
1093
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
1094
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
1095
|
+
executor_action=f"Add status check at {file_path}:{i}",
|
|
1096
|
+
))
|
|
1097
|
+
if re.search(r'(\w+)\s*=\s*(?:httpx\.\w+|urllib\.request\.urlopen)\s*\(', stripped):
|
|
1098
|
+
var_m = re.match(r'\s*(\w+)\s*=', stripped)
|
|
1099
|
+
if var_m:
|
|
1100
|
+
var_name = var_m.group(1)
|
|
1101
|
+
has_check = any(
|
|
1102
|
+
f"{var_name}.status" in lines[j] or f"{var_name}.raise_for_status" in lines[j]
|
|
1103
|
+
for j in range(i, min(i + 10, len(lines)))
|
|
1104
|
+
)
|
|
1105
|
+
if not has_check:
|
|
1106
|
+
detail = f"HTTP response without status check (line {i})"
|
|
1107
|
+
findings.append(build_finding(
|
|
1108
|
+
check_id="response_status_scan",
|
|
1109
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
1110
|
+
title=f"[unchecked_response] {file_path}:{i}",
|
|
1111
|
+
severity=GateSeverity.MEDIUM,
|
|
1112
|
+
impact=GateImpact.REVISE,
|
|
1113
|
+
summary=detail,
|
|
1114
|
+
recommendation="Check the response status before processing.",
|
|
1115
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
1116
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
1117
|
+
executor_action=f"Add status check at {file_path}:{i}",
|
|
1118
|
+
))
|
|
1119
|
+
if len(findings) >= 10:
|
|
1120
|
+
break
|
|
1121
|
+
|
|
1122
|
+
elif lang in ("javascript", "typescript"):
|
|
1123
|
+
for i, line in enumerate(lines, 1):
|
|
1124
|
+
stripped = line.strip()
|
|
1125
|
+
if stripped.startswith("//"):
|
|
1126
|
+
continue
|
|
1127
|
+
m = re.search(r'(\w+)\s*=\s*await\s+fetch\s*\(', stripped)
|
|
1128
|
+
if m:
|
|
1129
|
+
var_name = m.group(1)
|
|
1130
|
+
has_check = False
|
|
1131
|
+
for j in range(i, min(i + 10, len(lines))):
|
|
1132
|
+
cl = lines[j]
|
|
1133
|
+
if f"{var_name}.ok" in cl or f"{var_name}.status" in cl:
|
|
1134
|
+
has_check = True
|
|
1135
|
+
break
|
|
1136
|
+
if f"!{var_name}.ok" in cl or f"{var_name}.status !==" in cl:
|
|
1137
|
+
has_check = True
|
|
1138
|
+
break
|
|
1139
|
+
if not has_check:
|
|
1140
|
+
detail = f"fetch() result used without .ok/.status check (line {i})"
|
|
1141
|
+
findings.append(build_finding(
|
|
1142
|
+
check_id="response_status_scan",
|
|
1143
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
1144
|
+
title=f"[unchecked_response] {file_path}:{i}",
|
|
1145
|
+
severity=GateSeverity.MEDIUM,
|
|
1146
|
+
impact=GateImpact.REVISE,
|
|
1147
|
+
summary=detail,
|
|
1148
|
+
recommendation="Check `response.ok` or `response.status` before processing the response.",
|
|
1149
|
+
evidence=(EvidenceReference(kind="probe", path=file_path, detail=detail, ok=False),),
|
|
1150
|
+
repair_kind=RepairKind.ADD_BOUNDARY_CHECK.value,
|
|
1151
|
+
executor_action=f"Add status check at {file_path}:{i}",
|
|
1152
|
+
))
|
|
1153
|
+
if len(findings) >= 10:
|
|
1154
|
+
break
|
|
1155
|
+
|
|
1156
|
+
return findings
|