vigil-codeintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
- vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
- vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
- vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
- vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
- vigil_forensic/__init__.py +224 -0
- vigil_forensic/_git_utils.py +178 -0
- vigil_forensic/_shared.py +510 -0
- vigil_forensic/_stubs.py +156 -0
- vigil_forensic/gate_checks/__init__.py +1 -0
- vigil_forensic/gate_checks/_ast_helpers.py +629 -0
- vigil_forensic/gate_checks/_deployment_detector.py +573 -0
- vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
- vigil_forensic/gate_checks/authority_checks.py +95 -0
- vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
- vigil_forensic/gate_checks/broad_except_checks.py +301 -0
- vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
- vigil_forensic/gate_checks/common.py +253 -0
- vigil_forensic/gate_checks/config_safety_checks.py +704 -0
- vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
- vigil_forensic/gate_checks/conflict_checks.py +193 -0
- vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
- vigil_forensic/gate_checks/context_health_checks.py +289 -0
- vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
- vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
- vigil_forensic/gate_checks/duplication_checks.py +387 -0
- vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
- vigil_forensic/gate_checks/empty_output_checks.py +87 -0
- vigil_forensic/gate_checks/encoding_checks.py +847 -0
- vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
- vigil_forensic/gate_checks/fallback_checks.py +41 -0
- vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
- vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
- vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
- vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
- vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
- vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
- vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
- vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
- vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
- vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
- vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
- vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
- vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
- vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
- vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
- vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
- vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
- vigil_forensic/gate_checks/hallucination_checks.py +566 -0
- vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
- vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
- vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
- vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
- vigil_forensic/gate_checks/ml_checks.py +318 -0
- vigil_forensic/gate_checks/performance_checks.py +106 -0
- vigil_forensic/gate_checks/project_specific_runner.py +691 -0
- vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
- vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
- vigil_forensic/gate_checks/reliability_checks.py +389 -0
- vigil_forensic/gate_checks/reporting_checks.py +55 -0
- vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
- vigil_forensic/gate_checks/security_injection_checks.py +332 -0
- vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
- vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
- vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
- vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
- vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
- vigil_forensic/gate_checks/test_quality_checks.py +946 -0
- vigil_forensic/gate_checks/testing_checks.py +149 -0
- vigil_forensic/gate_checks/toctou_checks.py +367 -0
- vigil_forensic/gate_checks/type_checking_checks.py +316 -0
- vigil_forensic/gate_models.py +392 -0
- vigil_forensic/gate_packs/__init__.py +1 -0
- vigil_forensic/gate_packs/universal.py +179 -0
- vigil_forensic/gate_profile.json +31 -0
- vigil_forensic/gate_registry.py +21 -0
- vigil_forensic/language_profiles.py +219 -0
- vigil_forensic/meta_findings.py +207 -0
- vigil_forensic/self_audit.py +725 -0
- vigil_forensic/source_analysis.py +175 -0
- vigil_mapper/__init__.py +103 -0
- vigil_mapper/_ast_helpers_minimal.py +229 -0
- vigil_mapper/_extract_imports_impl.py +123 -0
- vigil_mapper/_file_count_guard.py +129 -0
- vigil_mapper/_git_utils.py +178 -0
- vigil_mapper/_runtime_ast.py +438 -0
- vigil_mapper/_runtime_dispatch.py +137 -0
- vigil_mapper/_seed_helpers.py +82 -0
- vigil_mapper/authority_builder.py +1102 -0
- vigil_mapper/cli_entry.py +731 -0
- vigil_mapper/conflict_builder.py +818 -0
- vigil_mapper/data_contract_builder.py +446 -0
- vigil_mapper/findings_builder.py +716 -0
- vigil_mapper/fingerprint.py +53 -0
- vigil_mapper/hotspot_builder.py +539 -0
- vigil_mapper/map_common.py +449 -0
- vigil_mapper/map_errors.py +55 -0
- vigil_mapper/map_models.py +431 -0
- vigil_mapper/map_models_ext.py +206 -0
- vigil_mapper/map_models_findings.py +130 -0
- vigil_mapper/map_storage.py +455 -0
- vigil_mapper/parse_cache.py +795 -0
- vigil_mapper/refactor_boundary_builder.py +266 -0
- vigil_mapper/runtime_builder.py +527 -0
- vigil_mapper/runtime_tracer.py +243 -0
- vigil_mapper/runtime_tracer_entry.py +199 -0
- vigil_mapper/semantic_diff.py +71 -0
- vigil_mapper/source_adapters/__init__.py +109 -0
- vigil_mapper/source_adapters/_base.py +264 -0
- vigil_mapper/source_adapters/_ir.py +156 -0
- vigil_mapper/source_adapters/_lexer.py +309 -0
- vigil_mapper/source_adapters/_patterns.py +212 -0
- vigil_mapper/source_adapters/_treesitter.py +182 -0
- vigil_mapper/source_adapters/go.py +553 -0
- vigil_mapper/source_adapters/java.py +541 -0
- vigil_mapper/source_adapters/javascript.py +626 -0
- vigil_mapper/source_adapters/python.py +325 -0
- vigil_mapper/source_adapters/typescript.py +749 -0
- vigil_mapper/structural_builder.py +586 -0
- vigil_mcp/__init__.py +1 -0
- vigil_mcp/_jobs.py +587 -0
- vigil_mcp/_paths.py +93 -0
- vigil_mcp/forensic_server.py +419 -0
- vigil_mcp/map_server.py +452 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""Broad-except hidden-sentinel detector (Finding G.4 plan v7).
|
|
2
|
+
|
|
3
|
+
Detects exception-handler variants that silently swallow errors without the
|
|
4
|
+
broad_except.swallow check (which targets 'except Exception: pass'):
|
|
5
|
+
|
|
6
|
+
- bare ``except:`` (catches *everything* incl. KeyboardInterrupt)
|
|
7
|
+
- ``except BaseException:`` (catches *everything*)
|
|
8
|
+
- handler body is a single ``return None/{}/()/[]`` -- silent sentinel return
|
|
9
|
+
- handler body is ``[log.warning/debug(...), pass]`` -- log-then-swallow
|
|
10
|
+
|
|
11
|
+
Emit MEDIUM/WARN for every match.
|
|
12
|
+
Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import ast
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
|
|
20
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
21
|
+
from vigil_forensic.source_analysis import is_source_file
|
|
22
|
+
from .common import build_check_result, build_finding, normalize_path
|
|
23
|
+
|
|
24
|
+
_log = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Sentinel constant values that indicate "silent return"
|
|
27
|
+
_SENTINEL_VALUES = frozenset({None, "", 0})
|
|
28
|
+
|
|
29
|
+
# Logging method names that qualify for "log-then-swallow" detection
|
|
30
|
+
_LOG_SWALLOW_METHODS = frozenset({"warning", "warn", "debug", "info"})
|
|
31
|
+
|
|
32
|
+
# F16c — Observability markers: if an except-body calls any of these methods
|
|
33
|
+
# before returning a sentinel, the return is treated as explicit design
|
|
34
|
+
# (logged fallback) rather than a silent swallow. AST-matched by attribute
|
|
35
|
+
# name on the Call target.
|
|
36
|
+
#
|
|
37
|
+
# Attribute-form recognized on ANY receiver: receiver.<attr>(...) where
|
|
38
|
+
# attribute name is in _OBS_LOG_METHODS. This deliberately matches the common
|
|
39
|
+
# project conventions (`logger.warning`, `_log.error`, `log.exception`, plus
|
|
40
|
+
# user-defined wrappers that adopt the same verb names).
|
|
41
|
+
_OBS_LOG_METHODS = frozenset({
|
|
42
|
+
"debug", "info", "warning", "warn", "error", "exception",
|
|
43
|
+
"critical", "fatal", "log",
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
# Attribute-form recognized where the RECEIVER name is one of these (any
|
|
47
|
+
# attribute). Covers `metrics.increment(...)`, `alerts.send(...)`, etc.
|
|
48
|
+
_OBS_RECEIVERS = frozenset({
|
|
49
|
+
"metrics", "alert", "alerts", "telemetry", "statsd", "sentry",
|
|
50
|
+
"observability", "obs",
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
# Plain-call names (no attribute) that indicate stderr/CLI log equivalents.
|
|
54
|
+
_OBS_PLAIN_CALLS = frozenset({"print"})
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Internal helpers
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
def _is_bare_or_base(handler: ast.ExceptHandler) -> bool:
|
|
62
|
+
"""Return True for bare ``except:`` or ``except BaseException:``."""
|
|
63
|
+
if handler.type is None:
|
|
64
|
+
return True
|
|
65
|
+
if isinstance(handler.type, ast.Name) and handler.type.id == "BaseException":
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _reraises(handler: ast.ExceptHandler) -> bool:
|
|
71
|
+
"""Return True if the handler re-raises at the top level of its body.
|
|
72
|
+
|
|
73
|
+
``except BaseException: <cleanup>; raise`` (the cancel-cleanup idiom) and
|
|
74
|
+
``raise SomeError(...) from exc`` (translate-and-propagate) both let the
|
|
75
|
+
error propagate — they are NOT silent swallows and must not be flagged.
|
|
76
|
+
Verified against filelock/_api.py:513-517 and asyncio.py:268-270.
|
|
77
|
+
|
|
78
|
+
Only top-level ``raise`` statements count; a ``raise`` buried in a nested
|
|
79
|
+
``try``/``if`` is not a guaranteed re-raise.
|
|
80
|
+
"""
|
|
81
|
+
return any(isinstance(stmt, ast.Raise) for stmt in handler.body)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _exception_names(node: ast.expr | None) -> tuple[str, ...]:
|
|
85
|
+
"""Return a flat tuple of exception class names referenced by ``node``.
|
|
86
|
+
|
|
87
|
+
Handles the common argument shapes used in ``except`` clauses:
|
|
88
|
+
- ``Name`` -> ("Exception",)
|
|
89
|
+
- ``Attribute`` -> ("os.error",) — keep terminal attr
|
|
90
|
+
- ``Tuple`` of either -> flattened
|
|
91
|
+
Unknown shapes collapse to () so callers treat them as "broad / unknown".
|
|
92
|
+
"""
|
|
93
|
+
if node is None:
|
|
94
|
+
return ()
|
|
95
|
+
if isinstance(node, ast.Name):
|
|
96
|
+
return (node.id,)
|
|
97
|
+
if isinstance(node, ast.Attribute):
|
|
98
|
+
return (node.attr,)
|
|
99
|
+
if isinstance(node, ast.Tuple):
|
|
100
|
+
out: list[str] = []
|
|
101
|
+
for elt in node.elts:
|
|
102
|
+
out.extend(_exception_names(elt))
|
|
103
|
+
return tuple(out)
|
|
104
|
+
return ()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _is_narrow_catch(handler: ast.ExceptHandler) -> bool:
|
|
108
|
+
"""Return True when the handler catches only specific non-broad exceptions.
|
|
109
|
+
|
|
110
|
+
F16c rationale: ``except ValueError: return None`` is an intentional,
|
|
111
|
+
type-scoped fallback; the narrow type name IS the author's assertion that
|
|
112
|
+
the failure mode is expected and handled. Do not flag these as silent
|
|
113
|
+
swallows.
|
|
114
|
+
|
|
115
|
+
Broad catches (rejected as "not narrow"):
|
|
116
|
+
- bare ``except:``
|
|
117
|
+
- ``except BaseException:``
|
|
118
|
+
- ``except Exception:`` (and any tuple containing ``Exception``)
|
|
119
|
+
|
|
120
|
+
All other catches — including stdlib sub-exceptions (``OSError``,
|
|
121
|
+
``SyntaxError``, ``json.JSONDecodeError``, ``subprocess.SubprocessError``,
|
|
122
|
+
project-specific ``FooError``) — are considered narrow.
|
|
123
|
+
"""
|
|
124
|
+
if _is_bare_or_base(handler):
|
|
125
|
+
return False
|
|
126
|
+
names = _exception_names(handler.type)
|
|
127
|
+
if not names:
|
|
128
|
+
# Unknown shape — err on the side of "not narrow" so the detector can
|
|
129
|
+
# still decide via body inspection.
|
|
130
|
+
return False
|
|
131
|
+
broad = {"Exception", "BaseException"}
|
|
132
|
+
return not any(n in broad for n in names)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _is_observability_call(node: ast.AST) -> bool:
|
|
136
|
+
"""Return True when ``node`` is a Call that routes to observability.
|
|
137
|
+
|
|
138
|
+
Matches three shapes:
|
|
139
|
+
1. ``<receiver>.<method>(...)`` where method name is a known log verb
|
|
140
|
+
(debug/info/warning/warn/error/exception/critical/fatal/log).
|
|
141
|
+
Receiver is any expression — covers ``logger.warning``, ``_log.error``,
|
|
142
|
+
``self.log.debug``, ``LOG.exception``, project wrappers, etc.
|
|
143
|
+
2. ``<known_receiver>.<any_attr>(...)`` where receiver is a canonical
|
|
144
|
+
observability facade: metrics/alerts/telemetry/statsd/sentry/obs.
|
|
145
|
+
3. Plain-name calls ``print(...)`` — stderr/CLI log equivalent.
|
|
146
|
+
|
|
147
|
+
Deliberately permissive on the logger side (any method name from the verb
|
|
148
|
+
set) so project-specific log wrappers are recognized without an allowlist.
|
|
149
|
+
"""
|
|
150
|
+
if not isinstance(node, ast.Call):
|
|
151
|
+
return False
|
|
152
|
+
func = node.func
|
|
153
|
+
# Shape 3 — plain ``print(...)``
|
|
154
|
+
if isinstance(func, ast.Name) and func.id in _OBS_PLAIN_CALLS:
|
|
155
|
+
return True
|
|
156
|
+
if not isinstance(func, ast.Attribute):
|
|
157
|
+
return False
|
|
158
|
+
# Shape 1 — any-receiver .<log_verb>(...)
|
|
159
|
+
if func.attr in _OBS_LOG_METHODS:
|
|
160
|
+
return True
|
|
161
|
+
# Shape 2 — known-observability receiver, any attribute
|
|
162
|
+
value = func.value
|
|
163
|
+
if isinstance(value, ast.Name) and value.id in _OBS_RECEIVERS:
|
|
164
|
+
return True
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _returns_silent_sentinel(stmt: ast.stmt) -> bool:
|
|
169
|
+
"""Return True iff ``stmt`` is ``return <None|{}|[]|()>`` or bare return."""
|
|
170
|
+
if not isinstance(stmt, ast.Return):
|
|
171
|
+
return False
|
|
172
|
+
val = stmt.value
|
|
173
|
+
if val is None:
|
|
174
|
+
return True
|
|
175
|
+
if isinstance(val, ast.Constant) and val.value is None:
|
|
176
|
+
return True
|
|
177
|
+
if isinstance(val, ast.Dict) and not val.keys:
|
|
178
|
+
return True
|
|
179
|
+
if isinstance(val, ast.List) and not val.elts:
|
|
180
|
+
return True
|
|
181
|
+
if isinstance(val, ast.Tuple) and not val.elts:
|
|
182
|
+
return True
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _is_silent_sentinel_return(handler: ast.ExceptHandler) -> bool:
|
|
187
|
+
"""Return True when the handler silently returns a sentinel (F16c-tightened).
|
|
188
|
+
|
|
189
|
+
Flags only genuine silent swallows. Accepted (FLAG) patterns:
|
|
190
|
+
- body is exactly ``return None/{}/()/[]`` (with optional leading
|
|
191
|
+
``pass``), AND
|
|
192
|
+
- handler catches a broad exception (Exception / BaseException / bare).
|
|
193
|
+
|
|
194
|
+
Skipped (NOT FLAGGED) patterns, per F16c FP reduction:
|
|
195
|
+
- narrow ``except SpecificError:`` — the type itself documents intent
|
|
196
|
+
(``except OSError``, ``except json.JSONDecodeError``, etc.)
|
|
197
|
+
- body logs before returning (``logger.warning(...); return None``) —
|
|
198
|
+
covered implicitly because body has more than one statement
|
|
199
|
+
- body re-raises (``raise`` anywhere) — covered by the body-shape
|
|
200
|
+
constraint AND by the explicit ``Raise`` skip below for robustness
|
|
201
|
+
- any body shape other than a single sentinel return
|
|
202
|
+
|
|
203
|
+
Rationale for keeping the body-shape constraint strict (single stmt +
|
|
204
|
+
optional leading ``pass``): widening to multi-statement bodies introduces
|
|
205
|
+
project-specific FPs where the first call is an error-surface wrapper
|
|
206
|
+
(e.g., ``_error(handler, ...); return``) not recognizable from the AST
|
|
207
|
+
without a per-project allowlist. The observability helper is still
|
|
208
|
+
available via :func:`_is_observability_call` for future callers and for
|
|
209
|
+
documenting intent.
|
|
210
|
+
"""
|
|
211
|
+
body = handler.body
|
|
212
|
+
if not body:
|
|
213
|
+
return False
|
|
214
|
+
|
|
215
|
+
# F16c skip #1 — narrow catches are acceptable design
|
|
216
|
+
if _is_narrow_catch(handler):
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
# Strip tolerated leading ``pass`` noise.
|
|
220
|
+
tail = [s for s in body if not isinstance(s, ast.Pass)]
|
|
221
|
+
if len(tail) != 1:
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
stmt = tail[0]
|
|
225
|
+
# F16c skip #3 — if the sole statement is a ``raise``, error propagates
|
|
226
|
+
# (not a silent swallow). This is defensive; a ``raise`` at tail with
|
|
227
|
+
# nothing else is intentional reraise.
|
|
228
|
+
if isinstance(stmt, ast.Raise):
|
|
229
|
+
return False
|
|
230
|
+
|
|
231
|
+
return _returns_silent_sentinel(stmt)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _is_log_then_swallow(handler: ast.ExceptHandler) -> bool:
|
|
235
|
+
"""Return True for the pattern: [log.warning/debug(...), pass].
|
|
236
|
+
|
|
237
|
+
Matches:
|
|
238
|
+
- exactly 2 statements
|
|
239
|
+
- first is an ast.Expr wrapping a Call whose attribute is a log-swallow
|
|
240
|
+
method (warning/warn/debug/info)
|
|
241
|
+
- second is ast.Pass
|
|
242
|
+
"""
|
|
243
|
+
body = handler.body
|
|
244
|
+
if len(body) != 2:
|
|
245
|
+
return False
|
|
246
|
+
first, second = body
|
|
247
|
+
if not isinstance(second, ast.Pass):
|
|
248
|
+
return False
|
|
249
|
+
if not isinstance(first, ast.Expr):
|
|
250
|
+
return False
|
|
251
|
+
call = first.value
|
|
252
|
+
if not isinstance(call, ast.Call):
|
|
253
|
+
return False
|
|
254
|
+
if not isinstance(call.func, ast.Attribute):
|
|
255
|
+
return False
|
|
256
|
+
return call.func.attr in _LOG_SWALLOW_METHODS
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
# Per-handler analysis
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
|
|
263
|
+
def _classify_handler(
|
|
264
|
+
handler: ast.ExceptHandler,
|
|
265
|
+
) -> tuple[bool, str, str]:
|
|
266
|
+
"""Return (flagged, sub_check_id, reason) for a single ExceptHandler.
|
|
267
|
+
|
|
268
|
+
Priority order:
|
|
269
|
+
1. bare/BaseException (most severe)
|
|
270
|
+
2. silent sentinel return
|
|
271
|
+
3. log-then-swallow
|
|
272
|
+
|
|
273
|
+
A handler that re-raises at the top level of its body is the cancel-cleanup
|
|
274
|
+
idiom (propagates the error) and is never flagged.
|
|
275
|
+
"""
|
|
276
|
+
if _reraises(handler):
|
|
277
|
+
return False, "", ""
|
|
278
|
+
if _is_bare_or_base(handler):
|
|
279
|
+
type_name = "bare except" if handler.type is None else "except BaseException"
|
|
280
|
+
return True, "broad_except.hidden_sentinel.bare_or_base", type_name
|
|
281
|
+
if _is_silent_sentinel_return(handler):
|
|
282
|
+
return True, "broad_except.hidden_sentinel.silent_return", "silent sentinel return"
|
|
283
|
+
if _is_log_then_swallow(handler):
|
|
284
|
+
return True, "broad_except.hidden_sentinel.log_swallow", "log-then-swallow"
|
|
285
|
+
return False, "", ""
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# ---------------------------------------------------------------------------
|
|
289
|
+
# Public gate entry-point
|
|
290
|
+
# ---------------------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
def run_broad_except_hidden_sentinel_checks(ctx: PostExecGateContext):
|
|
293
|
+
"""Detect hidden-sentinel exception-swallowing patterns.
|
|
294
|
+
|
|
295
|
+
For each .py file in ctx.changed_files_observed:
|
|
296
|
+
1. Parse the AST.
|
|
297
|
+
2. Walk all ast.Try nodes.
|
|
298
|
+
3. Inspect each ExceptHandler for bare/BaseException, silent-return, or
|
|
299
|
+
log-then-swallow patterns.
|
|
300
|
+
4. Emit MEDIUM/WARN for each match.
|
|
301
|
+
|
|
302
|
+
Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
|
|
303
|
+
"""
|
|
304
|
+
findings = []
|
|
305
|
+
|
|
306
|
+
for raw_path in ctx.changed_files_observed:
|
|
307
|
+
normalized = normalize_path(raw_path)
|
|
308
|
+
if not is_source_file(normalized):
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
abs_path = ctx.project_dir / normalized
|
|
312
|
+
try:
|
|
313
|
+
src = abs_path.read_text(encoding="utf-8")
|
|
314
|
+
tree = ast.parse(src)
|
|
315
|
+
except (OSError, SyntaxError, UnicodeDecodeError) as exc:
|
|
316
|
+
_log.debug("broad_except_hidden_sentinel: failed to parse %s: %s", normalized, exc)
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
for node in ast.walk(tree):
|
|
320
|
+
if not isinstance(node, ast.Try):
|
|
321
|
+
continue
|
|
322
|
+
for handler in node.handlers:
|
|
323
|
+
flagged, sub_id, reason = _classify_handler(handler)
|
|
324
|
+
if not flagged:
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
line_no = handler.lineno
|
|
328
|
+
findings.append(
|
|
329
|
+
build_finding(
|
|
330
|
+
check_id=sub_id,
|
|
331
|
+
category=GateCategory.FALLBACK,
|
|
332
|
+
title=f"Hidden-sentinel exception handler ({reason}) in {normalized}:{line_no}",
|
|
333
|
+
severity=GateSeverity.MEDIUM,
|
|
334
|
+
impact=GateImpact.REVISE,
|
|
335
|
+
summary=(
|
|
336
|
+
f"{normalized} line {line_no}: {reason} -- exception handler "
|
|
337
|
+
"silently discards the error without surfacing it to callers "
|
|
338
|
+
"or an observability layer."
|
|
339
|
+
),
|
|
340
|
+
recommendation=(
|
|
341
|
+
"Narrow the exception type to the specific error expected, "
|
|
342
|
+
"log it at WARNING or ERROR level, and re-raise or propagate "
|
|
343
|
+
"via an obs dict. Avoid returning sentinel values from except "
|
|
344
|
+
"blocks unless the caller is explicitly documented to handle them."
|
|
345
|
+
),
|
|
346
|
+
evidence=[
|
|
347
|
+
EvidenceReference(
|
|
348
|
+
kind="file",
|
|
349
|
+
path=normalized,
|
|
350
|
+
detail=f"line:{line_no}",
|
|
351
|
+
)
|
|
352
|
+
],
|
|
353
|
+
|
|
354
|
+
repair_kind='refactor',
|
|
355
|
+
executor_action='Address finding details',
|
|
356
|
+
proof_required='Issue fixed',
|
|
357
|
+
allowlist_allowed=False,
|
|
358
|
+
)
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
return build_check_result(
|
|
362
|
+
check_id="broad_except.hidden_sentinel",
|
|
363
|
+
category=GateCategory.FALLBACK,
|
|
364
|
+
findings=findings,
|
|
365
|
+
)
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import hashlib
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Iterable
|
|
8
|
+
|
|
9
|
+
_ALLOWLIST_COMMENT_RE = re.compile(
|
|
10
|
+
r"#\s*(?:noqa|autoforensics-skip)\s*:\s*([\w.\-]+(?:\s*,\s*[\w.\-]+)*)"
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# F16d: Generated / sanctioned-asset-bundle detector.
|
|
16
|
+
# Opt-in marker pattern: size + structural gates can skip files whose first
|
|
17
|
+
# few lines carry one of the markers below. Markers must be explicit — regular
|
|
18
|
+
# code cannot be accidentally skipped.
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
_GENERATED_MARKERS = (
|
|
22
|
+
"# autogenerated",
|
|
23
|
+
"# auto-generated",
|
|
24
|
+
"# Generated by",
|
|
25
|
+
"# DO NOT EDIT",
|
|
26
|
+
"# This file is auto-generated",
|
|
27
|
+
"@generated",
|
|
28
|
+
"# @generated",
|
|
29
|
+
"# SANCTIONED_ASSET_BUNDLE", # Vigil convention for sanctioned bundles
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def is_generated_file(source: str, max_lines: int = 10) -> bool:
|
|
34
|
+
"""Return True if *source* has a generator/sanctioned marker in first N lines.
|
|
35
|
+
|
|
36
|
+
F16d false-positive reduction: size + structural gates (size.*, god_object_zones.*)
|
|
37
|
+
use this to skip auto-generated files and sanctioned asset bundles. The check
|
|
38
|
+
is strictly opt-in -- regular code cannot be accidentally skipped because it
|
|
39
|
+
would have to voluntarily carry one of the listed markers in its head.
|
|
40
|
+
|
|
41
|
+
Matching rules:
|
|
42
|
+
- Case-insensitive substring match inside the stripped line.
|
|
43
|
+
- Only the first *max_lines* lines are inspected (default 10).
|
|
44
|
+
- Empty / missing source returns False (fail-open).
|
|
45
|
+
"""
|
|
46
|
+
if not source:
|
|
47
|
+
return False
|
|
48
|
+
head = source.splitlines()[:max_lines]
|
|
49
|
+
lowered_markers = [m.lower() for m in _GENERATED_MARKERS]
|
|
50
|
+
for line in head:
|
|
51
|
+
stripped = line.strip().lower()
|
|
52
|
+
for marker in lowered_markers:
|
|
53
|
+
if marker in stripped:
|
|
54
|
+
return True
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def has_allowlist_for(content: str, check_id: str, line_number: int | None = None) -> bool:
|
|
59
|
+
"""Check if finding should be suppressed by inline allowlist comment.
|
|
60
|
+
|
|
61
|
+
Patterns supported:
|
|
62
|
+
# noqa: check_id
|
|
63
|
+
# noqa: check_id, other_check
|
|
64
|
+
# autoforensics-skip: check_id
|
|
65
|
+
|
|
66
|
+
If line_number provided: check same line or previous line.
|
|
67
|
+
If line_number None: check entire file for file-level allowlist (first 20 lines).
|
|
68
|
+
"""
|
|
69
|
+
lines = content.splitlines()
|
|
70
|
+
if line_number is not None and 1 <= line_number <= len(lines):
|
|
71
|
+
candidates = [lines[line_number - 1]]
|
|
72
|
+
if line_number >= 2:
|
|
73
|
+
candidates.append(lines[line_number - 2])
|
|
74
|
+
else:
|
|
75
|
+
candidates = lines[:20]
|
|
76
|
+
|
|
77
|
+
for line in candidates:
|
|
78
|
+
for match in _ALLOWLIST_COMMENT_RE.finditer(line):
|
|
79
|
+
ids = [x.strip() for x in match.group(1).split(",")]
|
|
80
|
+
if check_id in ids or any(check_id.startswith(i + ".") for i in ids):
|
|
81
|
+
return True
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
from vigil_forensic._shared import EvidenceReference, GateCategory, GateCheckResult, GateFileSnapshot, GateFinding, GateImpact, GateSeverity
|
|
85
|
+
from vigil_forensic.gate_models import PostExecGateContext
|
|
86
|
+
import logging
|
|
87
|
+
_log = logging.getLogger(__name__)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def normalize_path(path: str) -> str:
|
|
91
|
+
return str(path or "").replace("\\", "/").lstrip("./")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def build_finding(
|
|
95
|
+
*,
|
|
96
|
+
check_id: str,
|
|
97
|
+
category: GateCategory,
|
|
98
|
+
title: str,
|
|
99
|
+
severity: GateSeverity,
|
|
100
|
+
impact: GateImpact,
|
|
101
|
+
summary: str,
|
|
102
|
+
recommendation: str,
|
|
103
|
+
evidence: Iterable[EvidenceReference] = (),
|
|
104
|
+
repair_kind: str = "",
|
|
105
|
+
executor_action: str = "",
|
|
106
|
+
proof_required: str = "",
|
|
107
|
+
allowlist_allowed: bool = True,
|
|
108
|
+
preferred_fix_shape: str = "",
|
|
109
|
+
# Sprint A (2026-04-23): confidence/applicability layer. Defaults preserve
|
|
110
|
+
# legacy semantics (every gate call that does not opt in emits an
|
|
111
|
+
# "applicable" finding with full detector confidence). Fingerprint
|
|
112
|
+
# computation below deliberately excludes these fields so allowlist
|
|
113
|
+
# stability is preserved as gates gradually migrate.
|
|
114
|
+
confidence: float = 1.0,
|
|
115
|
+
applicability: str = "applicable",
|
|
116
|
+
analysis_mode: str = "heuristic",
|
|
117
|
+
applicability_reason: str = "",
|
|
118
|
+
) -> GateFinding:
|
|
119
|
+
evidence_tuple = tuple(evidence)
|
|
120
|
+
# Fingerprint policy (unchanged): check_id|category|title|impact|evidence_paths.
|
|
121
|
+
# New Sprint A fields are intentionally excluded — confidence drifts with
|
|
122
|
+
# signal learning; including it would invalidate existing allowlists.
|
|
123
|
+
fingerprint = hashlib.sha256(
|
|
124
|
+
"|".join(
|
|
125
|
+
[check_id, category.value, title, impact.value]
|
|
126
|
+
+ [item.path or item.detail for item in evidence_tuple]
|
|
127
|
+
).encode("utf-8")
|
|
128
|
+
).hexdigest()[:16]
|
|
129
|
+
return GateFinding(
|
|
130
|
+
check_id=check_id,
|
|
131
|
+
category=category,
|
|
132
|
+
title=title,
|
|
133
|
+
severity=severity,
|
|
134
|
+
impact=impact,
|
|
135
|
+
summary=summary,
|
|
136
|
+
recommendation=recommendation,
|
|
137
|
+
evidence=evidence_tuple,
|
|
138
|
+
fingerprint=fingerprint,
|
|
139
|
+
repair_kind=repair_kind,
|
|
140
|
+
executor_action=executor_action,
|
|
141
|
+
proof_required=proof_required,
|
|
142
|
+
allowlist_allowed=allowlist_allowed,
|
|
143
|
+
preferred_fix_shape=preferred_fix_shape,
|
|
144
|
+
confidence=confidence,
|
|
145
|
+
applicability=applicability,
|
|
146
|
+
analysis_mode=analysis_mode,
|
|
147
|
+
applicability_reason=applicability_reason,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def build_check_result(
|
|
152
|
+
*,
|
|
153
|
+
check_id: str,
|
|
154
|
+
category: GateCategory,
|
|
155
|
+
findings: Iterable[GateFinding] = (),
|
|
156
|
+
notes: Iterable[str] = (),
|
|
157
|
+
) -> GateCheckResult:
|
|
158
|
+
return GateCheckResult(
|
|
159
|
+
check_id=check_id,
|
|
160
|
+
category=category,
|
|
161
|
+
findings=tuple(findings),
|
|
162
|
+
notes=tuple(str(item) for item in notes if str(item).strip()),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def read_snapshot(project_dir: Path, path: str) -> GateFileSnapshot:
|
|
167
|
+
repo_path = normalize_path(path)
|
|
168
|
+
abs_path = project_dir / repo_path
|
|
169
|
+
if not abs_path.exists() or not abs_path.is_file():
|
|
170
|
+
return GateFileSnapshot(path=repo_path, exists=False, size=0, line_count=0, text="")
|
|
171
|
+
text = abs_path.read_text(encoding="utf-8-sig", errors="replace")
|
|
172
|
+
return GateFileSnapshot(
|
|
173
|
+
path=repo_path,
|
|
174
|
+
exists=True,
|
|
175
|
+
size=abs_path.stat().st_size,
|
|
176
|
+
line_count=len(text.splitlines()),
|
|
177
|
+
text=text,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def iter_touched_snapshots(ctx: PostExecGateContext) -> list[GateFileSnapshot]:
|
|
182
|
+
snapshots: list[GateFileSnapshot] = []
|
|
183
|
+
for path in ctx.touched_files:
|
|
184
|
+
normalized = normalize_path(path)
|
|
185
|
+
if normalized in ctx.file_snapshots:
|
|
186
|
+
snapshots.append(ctx.file_snapshots[normalized])
|
|
187
|
+
else:
|
|
188
|
+
snapshots.append(read_snapshot(ctx.project_dir, normalized))
|
|
189
|
+
return snapshots
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def hash_normalized_code(text: str) -> str:
|
|
193
|
+
normalized = re.sub(r"\s+", " ", re.sub(r"\b[A-Za-z_][A-Za-z0-9_]*\b", "ID", text.strip()))
|
|
194
|
+
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def hash_text_block(text: str) -> str:
|
|
198
|
+
"""Hash for text-block dedup: whitespace-only normalization, identifiers preserved.
|
|
199
|
+
|
|
200
|
+
Unlike hash_normalized_code (which replaces all identifiers with 'ID' for structural
|
|
201
|
+
similarity), this preserves actual names -- catching literal copy-paste rather than
|
|
202
|
+
structural coincidence (e.g. two unrelated dataclasses with the same field count).
|
|
203
|
+
"""
|
|
204
|
+
normalized = re.sub(r"\s+", " ", text.strip())
|
|
205
|
+
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def extract_python_functions(text: str) -> list[tuple[str, int, int, str]]:
|
|
209
|
+
try:
|
|
210
|
+
tree = ast.parse(text)
|
|
211
|
+
except SyntaxError:
|
|
212
|
+
return []
|
|
213
|
+
lines = text.splitlines()
|
|
214
|
+
results: list[tuple[str, int, int, str]] = []
|
|
215
|
+
for node in ast.walk(tree):
|
|
216
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
217
|
+
continue
|
|
218
|
+
start = int(getattr(node, "lineno", 1))
|
|
219
|
+
end = int(getattr(node, "end_lineno", start))
|
|
220
|
+
snippet = "\n".join(lines[start - 1:end])
|
|
221
|
+
results.append((str(node.name), start, end, snippet))
|
|
222
|
+
return results
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def max_nesting_depth(text: str) -> int:
|
|
226
|
+
try:
|
|
227
|
+
tree = ast.parse(text)
|
|
228
|
+
except SyntaxError:
|
|
229
|
+
return 0
|
|
230
|
+
|
|
231
|
+
def _depth(node: ast.AST, current: int = 0) -> int:
|
|
232
|
+
next_depth = current
|
|
233
|
+
if isinstance(node, (ast.For, ast.AsyncFor, ast.While, ast.If, ast.With, ast.Try, ast.Match)):
|
|
234
|
+
next_depth += 1
|
|
235
|
+
child_depths = [_depth(child, next_depth) for child in ast.iter_child_nodes(node)]
|
|
236
|
+
return max([next_depth, *child_depths], default=next_depth)
|
|
237
|
+
|
|
238
|
+
return _depth(tree, 0)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
# F14c: Detector self-match suppression helpers
|
|
243
|
+
# ---------------------------------------------------------------------------
|
|
244
|
+
# Canonical implementation lives in ._ast_helpers (introduced in F14a).
|
|
245
|
+
# Re-export for backwards compatibility so gates can import these names from
|
|
246
|
+
# either ``..common`` or ``.._ast_helpers``.
|
|
247
|
+
from ._ast_helpers import ( # noqa: E402, F401 -- public re-export
|
|
248
|
+
collect_constant_container_literal_lines,
|
|
249
|
+
is_section_header_comment,
|
|
250
|
+
is_cli_surface_file,
|
|
251
|
+
collect_main_block_line_ranges,
|
|
252
|
+
line_in_ranges,
|
|
253
|
+
)
|