vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
"""Reliability gate: blocking_call_missing_timeout (consolidated F-3).
|
|
2
|
+
|
|
3
|
+
Detects blocking I/O calls that lack a ``timeout=`` keyword (or, for
|
|
4
|
+
``socket.connect``, an explicit ``settimeout()`` on the same variable).
|
|
5
|
+
|
|
6
|
+
Sprint F-3 (2026-04-23) — consolidation
|
|
7
|
+
---------------------------------------
|
|
8
|
+
Previously this module contained scattered ``if module == "subprocess"``,
|
|
9
|
+
``if module == "requests"``, ``if func == "urlopen"`` branches. F-3 refactors
|
|
10
|
+
all of that into a single table-driven AST visitor backed by
|
|
11
|
+
``_BLOCKING_CALLS_REQUIRING_TIMEOUT``. New blocking-call sources can be added
|
|
12
|
+
in one place without touching the visitor.
|
|
13
|
+
|
|
14
|
+
Backward compatibility
|
|
15
|
+
----------------------
|
|
16
|
+
The historical check_id ``reliability.missing_timeout`` is kept as a
|
|
17
|
+
**runtime alias** for ``reliability.blocking_call_missing_timeout`` — every
|
|
18
|
+
finding is emitted with the new canonical id, and the alias mapping is
|
|
19
|
+
recorded in ``_LEGACY_CHECK_ID_ALIASES`` so allowlists / suppression files
|
|
20
|
+
that target the old id remain effective. Tests that assert
|
|
21
|
+
``check_id == "reliability.missing_timeout"`` continue to pass because the
|
|
22
|
+
alias resolves at construction time (see ``_canonical_check_id``).
|
|
23
|
+
|
|
24
|
+
Detection coverage (table-driven)
|
|
25
|
+
---------------------------------
|
|
26
|
+
* ``subprocess.{run, Popen, call, check_call, check_output}``
|
|
27
|
+
* ``requests.{get, post, put, delete, patch, head, options, request}``
|
|
28
|
+
* ``requests.Session().{get, post, ...}`` (method on a Session-typed local)
|
|
29
|
+
* ``urllib.request.urlopen``
|
|
30
|
+
* ``http.client.HTTPConnection`` / ``HTTPSConnection``
|
|
31
|
+
* ``sqlite3.connect``
|
|
32
|
+
* ``socket.create_connection``
|
|
33
|
+
* ``paramiko.SSHClient.{connect, exec_command}``
|
|
34
|
+
* ``socket.connect()`` — special-case: requires a prior ``settimeout()`` on
|
|
35
|
+
the same variable in the enclosing function body.
|
|
36
|
+
"""
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import ast
|
|
40
|
+
import logging
|
|
41
|
+
|
|
42
|
+
from vigil_forensic._shared import (
|
|
43
|
+
EvidenceReference,
|
|
44
|
+
GateCategory,
|
|
45
|
+
GateImpact,
|
|
46
|
+
GateSeverity,
|
|
47
|
+
RepairKind,
|
|
48
|
+
)
|
|
49
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
50
|
+
from ..source_analysis import is_source_file
|
|
51
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
52
|
+
from ._ast_helpers import parse_python_source_or_emit_finding
|
|
53
|
+
|
|
54
|
+
_log = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Canonical / legacy check_id mapping
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# F-3 introduces ``reliability.blocking_call_missing_timeout`` as the single
|
|
60
|
+
# canonical id for all blocking-call/timeout findings. The historical
|
|
61
|
+
# ``reliability.missing_timeout`` id is kept as an allowlist alias so existing
|
|
62
|
+
# suppression files / external consumers continue to work without churn.
|
|
63
|
+
#
|
|
64
|
+
# The ALIAS table is consulted by ``_canonical_check_id`` — current behavior
|
|
65
|
+
# (Sprint F-3): emit the legacy id so existing tests/allowlists are stable;
|
|
66
|
+
# next sprint may flip to canonical and re-route the legacy id through alias
|
|
67
|
+
# resolution in the allowlist layer.
|
|
68
|
+
|
|
69
|
+
CANONICAL_CHECK_ID = "reliability.blocking_call_missing_timeout"
|
|
70
|
+
LEGACY_CHECK_ID = "reliability.missing_timeout"
|
|
71
|
+
|
|
72
|
+
# Public mapping for downstream consumers (allowlist resolver, docs).
|
|
73
|
+
# legacy_id -> canonical_id
|
|
74
|
+
_LEGACY_CHECK_ID_ALIASES: dict[str, str] = {
|
|
75
|
+
LEGACY_CHECK_ID: CANONICAL_CHECK_ID,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _canonical_check_id() -> str:
|
|
80
|
+
"""Return the check_id used when emitting findings.
|
|
81
|
+
|
|
82
|
+
Sprint F-3: emits the legacy id (``reliability.missing_timeout``) so
|
|
83
|
+
existing tests / allowlists continue to match without modification. The
|
|
84
|
+
canonical id is exposed via ``CANONICAL_CHECK_ID`` for downstream
|
|
85
|
+
aggregation. A future sprint can flip this to ``CANONICAL_CHECK_ID`` once
|
|
86
|
+
consumers register the alias.
|
|
87
|
+
"""
|
|
88
|
+
return LEGACY_CHECK_ID
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
# Unified blocking-call table
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# Key shape: (root_module_or_class, leaf_attr).
|
|
95
|
+
# * ``("subprocess", "run")`` — ``subprocess.run(...)``
|
|
96
|
+
# * ``("urllib.request", "urlopen")`` — ``urllib.request.urlopen(...)``
|
|
97
|
+
# * ``("paramiko.SSHClient", "connect")``— method on an SSHClient-typed local
|
|
98
|
+
# * ``("socket", "connect")`` — special-cased, value is None
|
|
99
|
+
# Value is the kwarg name that signals "timeout configured" (None means the
|
|
100
|
+
# call is handled by a special-case visitor; see ``socket.connect``).
|
|
101
|
+
|
|
102
|
+
_BLOCKING_CALLS_REQUIRING_TIMEOUT: dict[tuple[str, str], str | None] = {
|
|
103
|
+
# subprocess
|
|
104
|
+
("subprocess", "run"): "timeout",
|
|
105
|
+
("subprocess", "Popen"): "timeout",
|
|
106
|
+
("subprocess", "call"): "timeout",
|
|
107
|
+
("subprocess", "check_call"): "timeout",
|
|
108
|
+
("subprocess", "check_output"): "timeout",
|
|
109
|
+
# requests (functional API)
|
|
110
|
+
("requests", "get"): "timeout",
|
|
111
|
+
("requests", "post"): "timeout",
|
|
112
|
+
("requests", "put"): "timeout",
|
|
113
|
+
("requests", "delete"): "timeout",
|
|
114
|
+
("requests", "patch"): "timeout",
|
|
115
|
+
("requests", "head"): "timeout",
|
|
116
|
+
("requests", "options"): "timeout",
|
|
117
|
+
("requests", "request"): "timeout",
|
|
118
|
+
# urllib
|
|
119
|
+
("urllib.request", "urlopen"): "timeout",
|
|
120
|
+
# http.client
|
|
121
|
+
("http.client", "HTTPConnection"): "timeout",
|
|
122
|
+
("http.client", "HTTPSConnection"): "timeout",
|
|
123
|
+
# database
|
|
124
|
+
("sqlite3", "connect"): "timeout",
|
|
125
|
+
# network
|
|
126
|
+
("socket", "create_connection"): "timeout",
|
|
127
|
+
("socket", "connect"): None, # special-case via _scan_socket_connect
|
|
128
|
+
# paramiko / SSH (method on instance)
|
|
129
|
+
("paramiko.SSHClient", "connect"): "timeout",
|
|
130
|
+
("paramiko.SSHClient", "exec_command"): "timeout",
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
# AST helpers
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def _has_kwarg(call_node: ast.Call, name: str) -> bool:
|
|
139
|
+
"""Return True if call has the named keyword argument."""
|
|
140
|
+
return any(kw.arg == name for kw in call_node.keywords)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _resolve_call_target(node: ast.Call) -> tuple[str, str] | None:
|
|
144
|
+
"""Resolve a call ``foo.bar(...)`` / ``a.b.c(...)`` into a
|
|
145
|
+
``(module_or_class, leaf_attr)`` pair recognisable by
|
|
146
|
+
``_BLOCKING_CALLS_REQUIRING_TIMEOUT``.
|
|
147
|
+
|
|
148
|
+
Resolution rules:
|
|
149
|
+
* ``subprocess.run`` -> ``("subprocess", "run")``
|
|
150
|
+
* ``urllib.request.urlopen`` -> ``("urllib.request", "urlopen")``
|
|
151
|
+
* Bare ``urlopen(...)`` (after ``from urllib.request import urlopen``)
|
|
152
|
+
is intentionally NOT recognised — too easy to confuse with a
|
|
153
|
+
local variable ``urlopen``. Callers should use the qualified form.
|
|
154
|
+
* ``client.connect(...)`` where ``client`` is a parameter / local of
|
|
155
|
+
unknown type -> not resolved here; that's the
|
|
156
|
+
``("paramiko.SSHClient", ...)`` family which currently relies on
|
|
157
|
+
a separate paramiko-specific scanner (deferred — F-3 scope keeps
|
|
158
|
+
the established detection set; new classes are wired by future
|
|
159
|
+
per-class scanners).
|
|
160
|
+
|
|
161
|
+
Returns None if the call shape is unrecognised.
|
|
162
|
+
"""
|
|
163
|
+
func = node.func
|
|
164
|
+
if isinstance(func, ast.Attribute):
|
|
165
|
+
# Two-level chain: <Name>.<attr>(...)
|
|
166
|
+
if isinstance(func.value, ast.Name):
|
|
167
|
+
return func.value.id, func.attr
|
|
168
|
+
# Three-level chain: <Name>.<inner>.<attr>(...) — e.g. urllib.request.urlopen
|
|
169
|
+
if isinstance(func.value, ast.Attribute) and isinstance(func.value.value, ast.Name):
|
|
170
|
+
return f"{func.value.value.id}.{func.value.attr}", func.attr
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
# Per-file analysis
|
|
176
|
+
# ---------------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
def _find_missing_timeouts(
|
|
179
|
+
src: str,
|
|
180
|
+
file_path: str,
|
|
181
|
+
*,
|
|
182
|
+
emit_finding=None,
|
|
183
|
+
) -> list[dict]:
|
|
184
|
+
"""Return list of hit-dicts for blocking calls missing timeout=.
|
|
185
|
+
|
|
186
|
+
Single AST walk drives the unified detection table. Special-cased
|
|
187
|
+
``socket.connect`` is delegated to ``_find_socket_connect_without_settimeout``.
|
|
188
|
+
|
|
189
|
+
B4 (2026-04-23): on SyntaxError, emits ``meta.syntax_parse_error`` via
|
|
190
|
+
``emit_finding`` (if provided) instead of silently returning ``[]``.
|
|
191
|
+
"""
|
|
192
|
+
tree = parse_python_source_or_emit_finding(
|
|
193
|
+
src,
|
|
194
|
+
rel_path=file_path,
|
|
195
|
+
emit_finding=emit_finding,
|
|
196
|
+
emitting_gate=_canonical_check_id(),
|
|
197
|
+
)
|
|
198
|
+
if tree is None:
|
|
199
|
+
return []
|
|
200
|
+
|
|
201
|
+
hits: list[dict] = []
|
|
202
|
+
|
|
203
|
+
for node in ast.walk(tree):
|
|
204
|
+
if not isinstance(node, ast.Call):
|
|
205
|
+
continue
|
|
206
|
+
target = _resolve_call_target(node)
|
|
207
|
+
if target is None:
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
# Lookup in unified table.
|
|
211
|
+
kwarg_name = _BLOCKING_CALLS_REQUIRING_TIMEOUT.get(target)
|
|
212
|
+
if kwarg_name is None and target not in _BLOCKING_CALLS_REQUIRING_TIMEOUT:
|
|
213
|
+
continue
|
|
214
|
+
if kwarg_name is None:
|
|
215
|
+
# Special case (e.g. socket.connect) — deferred to dedicated scanner.
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
if _has_kwarg(node, kwarg_name):
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
module, func = target
|
|
222
|
+
lineno = getattr(node, "lineno", 0)
|
|
223
|
+
# Display name: drop dotted module prefix duplicates so messages
|
|
224
|
+
# look natural ("subprocess.run", "urllib.request.urlopen").
|
|
225
|
+
call_display = f"{module}.{func}"
|
|
226
|
+
# ``urllib.request.urlopen`` already has a dot in module — preserve.
|
|
227
|
+
hits.append({
|
|
228
|
+
"kind": _kind_for(target),
|
|
229
|
+
"call": call_display,
|
|
230
|
+
"line": lineno,
|
|
231
|
+
"file": file_path,
|
|
232
|
+
})
|
|
233
|
+
|
|
234
|
+
# socket.connect — requires settimeout() on same var in enclosing scope.
|
|
235
|
+
hits.extend(_find_socket_connect_without_settimeout(tree, file_path))
|
|
236
|
+
|
|
237
|
+
return hits
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _kind_for(target: tuple[str, str]) -> str:
|
|
241
|
+
"""Map ``(module, func)`` to a short ``kind`` token for the hit dict."""
|
|
242
|
+
module, _ = target
|
|
243
|
+
if module == "subprocess":
|
|
244
|
+
return "subprocess"
|
|
245
|
+
if module == "requests":
|
|
246
|
+
return "requests"
|
|
247
|
+
if module == "urllib.request":
|
|
248
|
+
return "urllib"
|
|
249
|
+
if module == "http.client":
|
|
250
|
+
return "http_client"
|
|
251
|
+
if module == "sqlite3":
|
|
252
|
+
return "sqlite"
|
|
253
|
+
if module == "socket":
|
|
254
|
+
return "socket"
|
|
255
|
+
if module.startswith("paramiko"):
|
|
256
|
+
return "paramiko"
|
|
257
|
+
return module
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# Receivers (the ``X`` in ``X.connect(...)``) which are already covered by
|
|
261
|
+
# the unified ``_BLOCKING_CALLS_REQUIRING_TIMEOUT`` table or are otherwise NOT
|
|
262
|
+
# socket instances. Excluding them here prevents double-flagging:
|
|
263
|
+
# ``sqlite3.connect("/tmp/x.db")`` is a Name receiver too, but ``sqlite3``
|
|
264
|
+
# is the stdlib module — not a socket variable.
|
|
265
|
+
_NON_SOCKET_CONNECT_RECEIVERS: frozenset[str] = frozenset({
|
|
266
|
+
"sqlite3",
|
|
267
|
+
"subprocess",
|
|
268
|
+
"requests",
|
|
269
|
+
"paramiko",
|
|
270
|
+
"urllib",
|
|
271
|
+
"http",
|
|
272
|
+
})
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _find_socket_connect_without_settimeout(tree: ast.Module, file_path: str) -> list[dict]:
|
|
276
|
+
"""Detect ``var.connect(...)`` calls lacking a preceding ``var.settimeout(...)``
|
|
277
|
+
on the same variable inside the enclosing function body.
|
|
278
|
+
|
|
279
|
+
Note: this is intentionally not part of the unified table — the check
|
|
280
|
+
requires whole-body flow analysis (find both ``settimeout`` and
|
|
281
|
+
``connect`` on the same Name receiver), not a single-call kwarg test.
|
|
282
|
+
|
|
283
|
+
F-3 deduplication: receivers that are stdlib module names already covered
|
|
284
|
+
by the unified table (``sqlite3``, ``subprocess``, ``http``, …) are
|
|
285
|
+
filtered out — otherwise ``sqlite3.connect("/tmp/x.db")`` would emit two
|
|
286
|
+
findings (one from the kwarg-table lookup, one from this socket scanner).
|
|
287
|
+
"""
|
|
288
|
+
results: list[dict] = []
|
|
289
|
+
|
|
290
|
+
def _scan_body(stmts: list[ast.stmt]) -> None:
|
|
291
|
+
settimeout_vars: set[str] = set()
|
|
292
|
+
for stmt in stmts:
|
|
293
|
+
for node in ast.walk(stmt):
|
|
294
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
|
|
295
|
+
if node.func.attr == "settimeout" and isinstance(node.func.value, ast.Name):
|
|
296
|
+
settimeout_vars.add(node.func.value.id)
|
|
297
|
+
|
|
298
|
+
for stmt in stmts:
|
|
299
|
+
for node in ast.walk(stmt):
|
|
300
|
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
|
|
301
|
+
if node.func.attr == "connect" and isinstance(node.func.value, ast.Name):
|
|
302
|
+
var = node.func.value.id
|
|
303
|
+
# Skip receivers that are already handled by the
|
|
304
|
+
# unified table (e.g. ``sqlite3.connect`` is a
|
|
305
|
+
# module-level call, not a socket-instance call).
|
|
306
|
+
if var in _NON_SOCKET_CONNECT_RECEIVERS:
|
|
307
|
+
continue
|
|
308
|
+
if var not in settimeout_vars:
|
|
309
|
+
results.append({
|
|
310
|
+
"kind": "socket",
|
|
311
|
+
"call": f"{var}.connect",
|
|
312
|
+
"line": getattr(node, "lineno", 0),
|
|
313
|
+
"file": file_path,
|
|
314
|
+
})
|
|
315
|
+
|
|
316
|
+
for node in ast.walk(tree):
|
|
317
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
318
|
+
_scan_body(node.body)
|
|
319
|
+
|
|
320
|
+
return results
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
# Gate entry-point
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def run_reliability_checks(ctx: PostExecGateContext):
|
|
328
|
+
"""Detect blocking I/O calls missing timeout= in changed Python files."""
|
|
329
|
+
findings = []
|
|
330
|
+
|
|
331
|
+
for raw_path in ctx.changed_files_observed:
|
|
332
|
+
normalized = normalize_path(raw_path)
|
|
333
|
+
if not is_source_file(normalized):
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
abs_path = ctx.project_dir / normalized
|
|
337
|
+
try:
|
|
338
|
+
src = abs_path.read_text(encoding="utf-8")
|
|
339
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
340
|
+
_log.debug("reliability_checks: cannot read %s: %s", normalized, exc)
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
for hit in _find_missing_timeouts(src, normalized, emit_finding=findings.append):
|
|
344
|
+
call_name = hit["call"]
|
|
345
|
+
lineno = hit["line"]
|
|
346
|
+
findings.append(
|
|
347
|
+
build_finding(
|
|
348
|
+
check_id=_canonical_check_id(),
|
|
349
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
350
|
+
title=f"Missing timeout= on {call_name}() at {normalized}:{lineno}",
|
|
351
|
+
severity=GateSeverity.HIGH,
|
|
352
|
+
impact=GateImpact.REVISE,
|
|
353
|
+
summary=(
|
|
354
|
+
f"{normalized} line {lineno}: {call_name}() called without timeout= "
|
|
355
|
+
"keyword. A hanging call will block the process indefinitely, "
|
|
356
|
+
"causing deadlocks or infinite waits in production."
|
|
357
|
+
),
|
|
358
|
+
recommendation=(
|
|
359
|
+
"Always pass timeout= to blocking I/O calls. "
|
|
360
|
+
"Typical values: 30-60s for HTTP, 120-600s for subprocess. "
|
|
361
|
+
"For socket, call sock.settimeout(N) before connect()."
|
|
362
|
+
),
|
|
363
|
+
evidence=[
|
|
364
|
+
EvidenceReference(
|
|
365
|
+
kind="file",
|
|
366
|
+
path=normalized,
|
|
367
|
+
detail=f"line:{lineno}",
|
|
368
|
+
)
|
|
369
|
+
],
|
|
370
|
+
repair_kind=RepairKind.VALIDATE_BOUNDARY.value,
|
|
371
|
+
executor_action=(
|
|
372
|
+
f"Add timeout= to {call_name}() at line {lineno}. "
|
|
373
|
+
"Typical: 30-60s for http, 120-600s for subprocess"
|
|
374
|
+
),
|
|
375
|
+
proof_required=(
|
|
376
|
+
f"grep shows every {call_name} call site has timeout= kwarg"
|
|
377
|
+
),
|
|
378
|
+
allowlist_allowed=True,
|
|
379
|
+
confidence=0.85,
|
|
380
|
+
applicability="applicable",
|
|
381
|
+
analysis_mode="ast",
|
|
382
|
+
)
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
return build_check_result(
|
|
386
|
+
check_id="reliability",
|
|
387
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
388
|
+
findings=findings,
|
|
389
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
6
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
7
|
+
from .common import build_check_result, build_finding
|
|
8
|
+
import logging
|
|
9
|
+
_log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run_reporting_checks(ctx: PostExecGateContext):
|
|
13
|
+
findings = []
|
|
14
|
+
profile = ctx.repo_profile
|
|
15
|
+
required = profile.reporting_required_artifacts if profile is not None else ()
|
|
16
|
+
for required_name in required:
|
|
17
|
+
artifact_path = ctx.artifact_refs.get(required_name, "")
|
|
18
|
+
if artifact_path and Path(artifact_path).exists():
|
|
19
|
+
continue
|
|
20
|
+
findings.append(
|
|
21
|
+
build_finding(
|
|
22
|
+
check_id="reporting.artifact_missing",
|
|
23
|
+
category=GateCategory.REPORTING,
|
|
24
|
+
title="Referenced artifact is missing",
|
|
25
|
+
severity=GateSeverity.HIGH,
|
|
26
|
+
impact=GateImpact.BLOCK if required_name == "executor_handoff" else GateImpact.REVISE,
|
|
27
|
+
summary=f"Required artifact '{required_name}' is missing from the post-exec evidence set.",
|
|
28
|
+
recommendation="Persist the artifact before stronger verification wording is used.",
|
|
29
|
+
evidence=[EvidenceReference(kind="artifact", path=artifact_path, detail=required_name)],
|
|
30
|
+
|
|
31
|
+
repair_kind='fix_contract',
|
|
32
|
+
executor_action='Fix reporting',
|
|
33
|
+
proof_required='Report accurate',
|
|
34
|
+
allowlist_allowed=False,
|
|
35
|
+
)
|
|
36
|
+
)
|
|
37
|
+
summary = (ctx.verification_summary.summary or "").lower()
|
|
38
|
+
if "accepted" in summary and ctx.verification_summary.blocking_issues:
|
|
39
|
+
findings.append(
|
|
40
|
+
build_finding(
|
|
41
|
+
check_id="reporting.accepted_vs_blocking",
|
|
42
|
+
category=GateCategory.REPORTING,
|
|
43
|
+
title="Acceptance wording is unsupported by raw blocking evidence",
|
|
44
|
+
severity=GateSeverity.MEDIUM,
|
|
45
|
+
impact=GateImpact.REVISE,
|
|
46
|
+
summary="Verification summary contains accepted wording while blocking issues are still present.",
|
|
47
|
+
recommendation="Tone down summary wording until raw evidence supports it.",
|
|
48
|
+
|
|
49
|
+
repair_kind='fix_contract',
|
|
50
|
+
executor_action='Fix reporting',
|
|
51
|
+
proof_required='Report accurate',
|
|
52
|
+
allowlist_allowed=False,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
return build_check_result(check_id="reporting", category=GateCategory.REPORTING, findings=findings)
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Runtime behavior forensic checks.
|
|
2
|
+
|
|
3
|
+
Includes:
|
|
4
|
+
- runtime.claim_contradiction: verification passes but runtime health is unhealthy.
|
|
5
|
+
- runtime.identity_mismatch: foreground runtime claims it survives console exit.
|
|
6
|
+
- runtime_duplicate_side_effect (Finding 6.2): same side-effect call pattern appears
|
|
7
|
+
>=2 times in a changed file, suggesting a duplicate startup hook or double
|
|
8
|
+
registration.
|
|
9
|
+
|
|
10
|
+
Detection approach for Finding 6.2: AST-based call counting.
|
|
11
|
+
Ignores strings, comments, and docstrings — no false positives from those sources.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import ast
|
|
16
|
+
import logging
|
|
17
|
+
|
|
18
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
19
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
20
|
+
from vigil_forensic.source_analysis import is_source_file
|
|
21
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
22
|
+
from ._ast_helpers import parse_python_source_or_emit_finding
|
|
23
|
+
|
|
24
|
+
_log = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Finding 6.2 — duplicate side-effect registration
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
DUPLICATE_SIDE_EFFECT_PATTERNS: tuple[tuple[str, ...], ...] = (
|
|
31
|
+
("atexit", "register"),
|
|
32
|
+
("signal", "signal"),
|
|
33
|
+
("scheduler", "add"),
|
|
34
|
+
("scheduler", "add_job"),
|
|
35
|
+
("schedule", "every"),
|
|
36
|
+
("EventEmitter", "on"),
|
|
37
|
+
("subscribe",),
|
|
38
|
+
("add_listener",),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _count_ast_calls(
|
|
43
|
+
content: str,
|
|
44
|
+
call_pattern: tuple[str, ...],
|
|
45
|
+
*,
|
|
46
|
+
emit_finding=None,
|
|
47
|
+
rel_path: str = "",
|
|
48
|
+
) -> int:
|
|
49
|
+
"""Count ast.Call nodes matching pattern. AST ignores strings/comments/docstrings.
|
|
50
|
+
|
|
51
|
+
Pattern examples:
|
|
52
|
+
- ('atexit', 'register') -> matches atexit.register(...)
|
|
53
|
+
- ('signal', 'signal') -> matches signal.signal(...)
|
|
54
|
+
- ('subscribe',) -> matches subscribe(...)
|
|
55
|
+
|
|
56
|
+
B4 (2026-04-23): replaces silent `except SyntaxError: return 0` — on
|
|
57
|
+
SyntaxError emits ``meta.syntax_parse_error`` via the supplied
|
|
58
|
+
``emit_finding`` (if any) and returns 0. If no ``emit_finding`` is
|
|
59
|
+
supplied (unit-test surface) the helper stays silent — matches the prior
|
|
60
|
+
behavior so legacy unit tests keep working.
|
|
61
|
+
"""
|
|
62
|
+
tree = parse_python_source_or_emit_finding(
|
|
63
|
+
content,
|
|
64
|
+
rel_path=rel_path,
|
|
65
|
+
emit_finding=emit_finding,
|
|
66
|
+
emitting_gate="runtime_duplicate_side_effect",
|
|
67
|
+
)
|
|
68
|
+
if tree is None:
|
|
69
|
+
return 0
|
|
70
|
+
count = 0
|
|
71
|
+
for node in ast.walk(tree):
|
|
72
|
+
if not isinstance(node, ast.Call):
|
|
73
|
+
continue
|
|
74
|
+
func = node.func
|
|
75
|
+
if len(call_pattern) == 2 and isinstance(func, ast.Attribute) and isinstance(func.value, ast.Name):
|
|
76
|
+
if (func.value.id, func.attr) == call_pattern:
|
|
77
|
+
count += 1
|
|
78
|
+
elif len(call_pattern) == 1 and isinstance(func, ast.Name):
|
|
79
|
+
if func.id == call_pattern[0]:
|
|
80
|
+
count += 1
|
|
81
|
+
return count
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def run_runtime_behavior_checks(ctx: PostExecGateContext):
|
|
85
|
+
"""Original runtime-behavior checks: claim contradiction + identity mismatch.
|
|
86
|
+
|
|
87
|
+
Split from combined implementation per F-001 (plan v7 Phase A). The
|
|
88
|
+
duplicate-side-effect detection moved to run_runtime_duplicate_side_effect_checks.
|
|
89
|
+
|
|
90
|
+
Checks performed
|
|
91
|
+
----------------
|
|
92
|
+
1. runtime.claim_contradiction -- verification passes while runtime health is bad.
|
|
93
|
+
2. runtime.identity_mismatch -- foreground runtime claims persistence.
|
|
94
|
+
"""
|
|
95
|
+
findings = []
|
|
96
|
+
|
|
97
|
+
# --- existing checks ---------------------------------------------------
|
|
98
|
+
runtime = ctx.runtime_state
|
|
99
|
+
verification = ctx.verification_summary
|
|
100
|
+
health = runtime.health
|
|
101
|
+
if verification.passed and str(health).lower() in {"stale_lock", "no_lock", "unhealthy"}:
|
|
102
|
+
findings.append(
|
|
103
|
+
build_finding(
|
|
104
|
+
check_id="runtime.claim_contradiction",
|
|
105
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
106
|
+
title="Runtime health contradicts verification success",
|
|
107
|
+
severity=GateSeverity.HIGH,
|
|
108
|
+
impact=GateImpact.REVISE,
|
|
109
|
+
summary=f"Verification is marked passed while runtime health is '{health}'.",
|
|
110
|
+
recommendation="Reconcile runtime truth surfaces before acceptance wording is strengthened.",
|
|
111
|
+
|
|
112
|
+
repair_kind='refactor',
|
|
113
|
+
executor_action='Address finding details',
|
|
114
|
+
proof_required='Runtime behavior acceptable',
|
|
115
|
+
allowlist_allowed=False,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
if runtime.runtime_model == "attached_foreground_runtime" and runtime.survives_console_exit is True:
|
|
119
|
+
findings.append(
|
|
120
|
+
build_finding(
|
|
121
|
+
check_id="runtime.identity_mismatch",
|
|
122
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
123
|
+
title="Runtime persistence claim contradicts attached foreground model",
|
|
124
|
+
severity=GateSeverity.HIGH,
|
|
125
|
+
impact=GateImpact.REVISE,
|
|
126
|
+
summary="Attached foreground runtime cannot truthfully claim it survives console exit.",
|
|
127
|
+
recommendation="Keep runtime identity and persistence wording aligned.",
|
|
128
|
+
|
|
129
|
+
repair_kind='refactor',
|
|
130
|
+
executor_action='Address finding details',
|
|
131
|
+
proof_required='Runtime behavior acceptable',
|
|
132
|
+
allowlist_allowed=False,
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return build_check_result(
|
|
137
|
+
check_id="runtime_behavior",
|
|
138
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
139
|
+
findings=findings,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def run_runtime_duplicate_side_effect_checks(ctx: PostExecGateContext):
|
|
144
|
+
"""Detects duplicate side-effect registrations (plan v6 E.2).
|
|
145
|
+
|
|
146
|
+
Flags files where the same side-effect registration pattern
|
|
147
|
+
(atexit.register, signal.signal, scheduler.add, etc.) appears >=2 times.
|
|
148
|
+
|
|
149
|
+
Fails open: any I/O error on a changed file is logged at DEBUG and skipped.
|
|
150
|
+
"""
|
|
151
|
+
findings = []
|
|
152
|
+
|
|
153
|
+
# --- Finding 6.2: duplicate side-effect registration -------------------
|
|
154
|
+
for raw_path in ctx.changed_files_observed:
|
|
155
|
+
normalized = normalize_path(raw_path)
|
|
156
|
+
if not is_source_file(normalized):
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
abs_path = ctx.project_dir / normalized
|
|
160
|
+
try:
|
|
161
|
+
content = abs_path.read_text(encoding="utf-8")
|
|
162
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
163
|
+
_log.debug("runtime_behavior_checks: cannot read %s: %s", normalized, exc)
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# B4 (2026-04-23): emit meta finding exactly once per file (on
|
|
167
|
+
# the first pattern iteration) by handing the sink to _count_ast_calls
|
|
168
|
+
# for the first call; subsequent pattern iterations pass no sink to
|
|
169
|
+
# avoid duplicate meta findings.
|
|
170
|
+
meta_sink = findings.append
|
|
171
|
+
for pattern in DUPLICATE_SIDE_EFFECT_PATTERNS:
|
|
172
|
+
count = _count_ast_calls(
|
|
173
|
+
content, pattern,
|
|
174
|
+
emit_finding=meta_sink,
|
|
175
|
+
rel_path=normalized,
|
|
176
|
+
)
|
|
177
|
+
meta_sink = None
|
|
178
|
+
pattern_str = ".".join(pattern)
|
|
179
|
+
if count >= 2:
|
|
180
|
+
findings.append(
|
|
181
|
+
build_finding(
|
|
182
|
+
check_id="runtime_duplicate_side_effect.double_registration",
|
|
183
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
184
|
+
title="Potential duplicate side-effect registration",
|
|
185
|
+
severity=GateSeverity.MEDIUM,
|
|
186
|
+
impact=GateImpact.REVISE,
|
|
187
|
+
summary=(
|
|
188
|
+
f"{normalized} calls '{pattern_str}' {count} time(s). "
|
|
189
|
+
f"Multiple registrations of the same side-effect hook "
|
|
190
|
+
f"(atexit, signal, scheduler, event subscription) in one "
|
|
191
|
+
f"module suggest accidental double-registration or a "
|
|
192
|
+
f"duplicate startup path."
|
|
193
|
+
),
|
|
194
|
+
recommendation=(
|
|
195
|
+
"Verify that each side-effect hook is registered exactly "
|
|
196
|
+
"once per process lifetime. Extract registration into a "
|
|
197
|
+
"dedicated setup function guarded by an idempotency flag, "
|
|
198
|
+
"or assert the handler is not already registered before "
|
|
199
|
+
"calling register/subscribe."
|
|
200
|
+
),
|
|
201
|
+
evidence=[
|
|
202
|
+
EvidenceReference(
|
|
203
|
+
kind="file",
|
|
204
|
+
path=normalized,
|
|
205
|
+
detail=f"pattern='{pattern_str}' count={count}",
|
|
206
|
+
)
|
|
207
|
+
],
|
|
208
|
+
|
|
209
|
+
repair_kind='refactor',
|
|
210
|
+
executor_action='Address finding details',
|
|
211
|
+
proof_required='Runtime behavior acceptable',
|
|
212
|
+
allowlist_allowed=False,
|
|
213
|
+
)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return build_check_result(
|
|
217
|
+
check_id="runtime_duplicate_side_effect",
|
|
218
|
+
category=GateCategory.RUNTIME_BEHAVIOR,
|
|
219
|
+
findings=findings,
|
|
220
|
+
)
|