vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,365 @@
1
+ """Broad-except hidden-sentinel detector (Finding G.4 plan v7).
2
+
3
+ Detects exception-handler variants that silently swallow errors without the
4
+ broad_except.swallow check (which targets 'except Exception: pass'):
5
+
6
+ - bare ``except:`` (catches *everything* incl. KeyboardInterrupt)
7
+ - ``except BaseException:`` (catches *everything*)
8
+ - handler body is a single ``return None/{}/()/[]`` -- silent sentinel return
9
+ - handler body is ``[log.warning/debug(...), pass]`` -- log-then-swallow
10
+
11
+ Emit MEDIUM/WARN for every match.
12
+ Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import ast
17
+ import logging
18
+
19
+ from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity
20
+ from vigil_forensic.gate_models import PostExecGateContext
21
+ from vigil_forensic.source_analysis import is_source_file
22
+ from .common import build_check_result, build_finding, normalize_path
23
+
24
+ _log = logging.getLogger(__name__)
25
+
26
+ # Sentinel constant values that indicate "silent return"
27
+ _SENTINEL_VALUES = frozenset({None, "", 0})
28
+
29
+ # Logging method names that qualify for "log-then-swallow" detection
30
+ _LOG_SWALLOW_METHODS = frozenset({"warning", "warn", "debug", "info"})
31
+
32
+ # F16c — Observability markers: if an except-body calls any of these methods
33
+ # before returning a sentinel, the return is treated as explicit design
34
+ # (logged fallback) rather than a silent swallow. AST-matched by attribute
35
+ # name on the Call target.
36
+ #
37
+ # Attribute-form recognized on ANY receiver: receiver.<attr>(...) where
38
+ # attribute name is in _OBS_LOG_METHODS. This deliberately matches the common
39
+ # project conventions (`logger.warning`, `_log.error`, `log.exception`, plus
40
+ # user-defined wrappers that adopt the same verb names).
41
+ _OBS_LOG_METHODS = frozenset({
42
+ "debug", "info", "warning", "warn", "error", "exception",
43
+ "critical", "fatal", "log",
44
+ })
45
+
46
+ # Attribute-form recognized where the RECEIVER name is one of these (any
47
+ # attribute). Covers `metrics.increment(...)`, `alerts.send(...)`, etc.
48
+ _OBS_RECEIVERS = frozenset({
49
+ "metrics", "alert", "alerts", "telemetry", "statsd", "sentry",
50
+ "observability", "obs",
51
+ })
52
+
53
+ # Plain-call names (no attribute) that indicate stderr/CLI log equivalents.
54
+ _OBS_PLAIN_CALLS = frozenset({"print"})
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Internal helpers
59
+ # ---------------------------------------------------------------------------
60
+
61
+ def _is_bare_or_base(handler: ast.ExceptHandler) -> bool:
62
+ """Return True for bare ``except:`` or ``except BaseException:``."""
63
+ if handler.type is None:
64
+ return True
65
+ if isinstance(handler.type, ast.Name) and handler.type.id == "BaseException":
66
+ return True
67
+ return False
68
+
69
+
70
+ def _reraises(handler: ast.ExceptHandler) -> bool:
71
+ """Return True if the handler re-raises at the top level of its body.
72
+
73
+ ``except BaseException: <cleanup>; raise`` (the cancel-cleanup idiom) and
74
+ ``raise SomeError(...) from exc`` (translate-and-propagate) both let the
75
+ error propagate — they are NOT silent swallows and must not be flagged.
76
+ Verified against filelock/_api.py:513-517 and asyncio.py:268-270.
77
+
78
+ Only top-level ``raise`` statements count; a ``raise`` buried in a nested
79
+ ``try``/``if`` is not a guaranteed re-raise.
80
+ """
81
+ return any(isinstance(stmt, ast.Raise) for stmt in handler.body)
82
+
83
+
84
+ def _exception_names(node: ast.expr | None) -> tuple[str, ...]:
85
+ """Return a flat tuple of exception class names referenced by ``node``.
86
+
87
+ Handles the common argument shapes used in ``except`` clauses:
88
+ - ``Name`` -> ("Exception",)
89
+ - ``Attribute`` -> ("os.error",) — keep terminal attr
90
+ - ``Tuple`` of either -> flattened
91
+ Unknown shapes collapse to () so callers treat them as "broad / unknown".
92
+ """
93
+ if node is None:
94
+ return ()
95
+ if isinstance(node, ast.Name):
96
+ return (node.id,)
97
+ if isinstance(node, ast.Attribute):
98
+ return (node.attr,)
99
+ if isinstance(node, ast.Tuple):
100
+ out: list[str] = []
101
+ for elt in node.elts:
102
+ out.extend(_exception_names(elt))
103
+ return tuple(out)
104
+ return ()
105
+
106
+
107
+ def _is_narrow_catch(handler: ast.ExceptHandler) -> bool:
108
+ """Return True when the handler catches only specific non-broad exceptions.
109
+
110
+ F16c rationale: ``except ValueError: return None`` is an intentional,
111
+ type-scoped fallback; the narrow type name IS the author's assertion that
112
+ the failure mode is expected and handled. Do not flag these as silent
113
+ swallows.
114
+
115
+ Broad catches (rejected as "not narrow"):
116
+ - bare ``except:``
117
+ - ``except BaseException:``
118
+ - ``except Exception:`` (and any tuple containing ``Exception``)
119
+
120
+ All other catches — including stdlib sub-exceptions (``OSError``,
121
+ ``SyntaxError``, ``json.JSONDecodeError``, ``subprocess.SubprocessError``,
122
+ project-specific ``FooError``) — are considered narrow.
123
+ """
124
+ if _is_bare_or_base(handler):
125
+ return False
126
+ names = _exception_names(handler.type)
127
+ if not names:
128
+ # Unknown shape — err on the side of "not narrow" so the detector can
129
+ # still decide via body inspection.
130
+ return False
131
+ broad = {"Exception", "BaseException"}
132
+ return not any(n in broad for n in names)
133
+
134
+
135
+ def _is_observability_call(node: ast.AST) -> bool:
136
+ """Return True when ``node`` is a Call that routes to observability.
137
+
138
+ Matches three shapes:
139
+ 1. ``<receiver>.<method>(...)`` where method name is a known log verb
140
+ (debug/info/warning/warn/error/exception/critical/fatal/log).
141
+ Receiver is any expression — covers ``logger.warning``, ``_log.error``,
142
+ ``self.log.debug``, ``LOG.exception``, project wrappers, etc.
143
+ 2. ``<known_receiver>.<any_attr>(...)`` where receiver is a canonical
144
+ observability facade: metrics/alerts/telemetry/statsd/sentry/obs.
145
+ 3. Plain-name calls ``print(...)`` — stderr/CLI log equivalent.
146
+
147
+ Deliberately permissive on the logger side (any method name from the verb
148
+ set) so project-specific log wrappers are recognized without an allowlist.
149
+ """
150
+ if not isinstance(node, ast.Call):
151
+ return False
152
+ func = node.func
153
+ # Shape 3 — plain ``print(...)``
154
+ if isinstance(func, ast.Name) and func.id in _OBS_PLAIN_CALLS:
155
+ return True
156
+ if not isinstance(func, ast.Attribute):
157
+ return False
158
+ # Shape 1 — any-receiver .<log_verb>(...)
159
+ if func.attr in _OBS_LOG_METHODS:
160
+ return True
161
+ # Shape 2 — known-observability receiver, any attribute
162
+ value = func.value
163
+ if isinstance(value, ast.Name) and value.id in _OBS_RECEIVERS:
164
+ return True
165
+ return False
166
+
167
+
168
+ def _returns_silent_sentinel(stmt: ast.stmt) -> bool:
169
+ """Return True iff ``stmt`` is ``return <None|{}|[]|()>`` or bare return."""
170
+ if not isinstance(stmt, ast.Return):
171
+ return False
172
+ val = stmt.value
173
+ if val is None:
174
+ return True
175
+ if isinstance(val, ast.Constant) and val.value is None:
176
+ return True
177
+ if isinstance(val, ast.Dict) and not val.keys:
178
+ return True
179
+ if isinstance(val, ast.List) and not val.elts:
180
+ return True
181
+ if isinstance(val, ast.Tuple) and not val.elts:
182
+ return True
183
+ return False
184
+
185
+
186
+ def _is_silent_sentinel_return(handler: ast.ExceptHandler) -> bool:
187
+ """Return True when the handler silently returns a sentinel (F16c-tightened).
188
+
189
+ Flags only genuine silent swallows. Accepted (FLAG) patterns:
190
+ - body is exactly ``return None/{}/()/[]`` (with optional leading
191
+ ``pass``), AND
192
+ - handler catches a broad exception (Exception / BaseException / bare).
193
+
194
+ Skipped (NOT FLAGGED) patterns, per F16c FP reduction:
195
+ - narrow ``except SpecificError:`` — the type itself documents intent
196
+ (``except OSError``, ``except json.JSONDecodeError``, etc.)
197
+ - body logs before returning (``logger.warning(...); return None``) —
198
+ covered implicitly because body has more than one statement
199
+ - body re-raises (``raise`` anywhere) — covered by the body-shape
200
+ constraint AND by the explicit ``Raise`` skip below for robustness
201
+ - any body shape other than a single sentinel return
202
+
203
+ Rationale for keeping the body-shape constraint strict (single stmt +
204
+ optional leading ``pass``): widening to multi-statement bodies introduces
205
+ project-specific FPs where the first call is an error-surface wrapper
206
+ (e.g., ``_error(handler, ...); return``) not recognizable from the AST
207
+ without a per-project allowlist. The observability helper is still
208
+ available via :func:`_is_observability_call` for future callers and for
209
+ documenting intent.
210
+ """
211
+ body = handler.body
212
+ if not body:
213
+ return False
214
+
215
+ # F16c skip #1 — narrow catches are acceptable design
216
+ if _is_narrow_catch(handler):
217
+ return False
218
+
219
+ # Strip tolerated leading ``pass`` noise.
220
+ tail = [s for s in body if not isinstance(s, ast.Pass)]
221
+ if len(tail) != 1:
222
+ return False
223
+
224
+ stmt = tail[0]
225
+ # F16c skip #3 — if the sole statement is a ``raise``, error propagates
226
+ # (not a silent swallow). This is defensive; a ``raise`` at tail with
227
+ # nothing else is intentional reraise.
228
+ if isinstance(stmt, ast.Raise):
229
+ return False
230
+
231
+ return _returns_silent_sentinel(stmt)
232
+
233
+
234
+ def _is_log_then_swallow(handler: ast.ExceptHandler) -> bool:
235
+ """Return True for the pattern: [log.warning/debug(...), pass].
236
+
237
+ Matches:
238
+ - exactly 2 statements
239
+ - first is an ast.Expr wrapping a Call whose attribute is a log-swallow
240
+ method (warning/warn/debug/info)
241
+ - second is ast.Pass
242
+ """
243
+ body = handler.body
244
+ if len(body) != 2:
245
+ return False
246
+ first, second = body
247
+ if not isinstance(second, ast.Pass):
248
+ return False
249
+ if not isinstance(first, ast.Expr):
250
+ return False
251
+ call = first.value
252
+ if not isinstance(call, ast.Call):
253
+ return False
254
+ if not isinstance(call.func, ast.Attribute):
255
+ return False
256
+ return call.func.attr in _LOG_SWALLOW_METHODS
257
+
258
+
259
+ # ---------------------------------------------------------------------------
260
+ # Per-handler analysis
261
+ # ---------------------------------------------------------------------------
262
+
263
+ def _classify_handler(
264
+ handler: ast.ExceptHandler,
265
+ ) -> tuple[bool, str, str]:
266
+ """Return (flagged, sub_check_id, reason) for a single ExceptHandler.
267
+
268
+ Priority order:
269
+ 1. bare/BaseException (most severe)
270
+ 2. silent sentinel return
271
+ 3. log-then-swallow
272
+
273
+ A handler that re-raises at the top level of its body is the cancel-cleanup
274
+ idiom (propagates the error) and is never flagged.
275
+ """
276
+ if _reraises(handler):
277
+ return False, "", ""
278
+ if _is_bare_or_base(handler):
279
+ type_name = "bare except" if handler.type is None else "except BaseException"
280
+ return True, "broad_except.hidden_sentinel.bare_or_base", type_name
281
+ if _is_silent_sentinel_return(handler):
282
+ return True, "broad_except.hidden_sentinel.silent_return", "silent sentinel return"
283
+ if _is_log_then_swallow(handler):
284
+ return True, "broad_except.hidden_sentinel.log_swallow", "log-then-swallow"
285
+ return False, "", ""
286
+
287
+
288
+ # ---------------------------------------------------------------------------
289
+ # Public gate entry-point
290
+ # ---------------------------------------------------------------------------
291
+
292
+ def run_broad_except_hidden_sentinel_checks(ctx: PostExecGateContext):
293
+ """Detect hidden-sentinel exception-swallowing patterns.
294
+
295
+ For each .py file in ctx.changed_files_observed:
296
+ 1. Parse the AST.
297
+ 2. Walk all ast.Try nodes.
298
+ 3. Inspect each ExceptHandler for bare/BaseException, silent-return, or
299
+ log-then-swallow patterns.
300
+ 4. Emit MEDIUM/WARN for each match.
301
+
302
+ Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
303
+ """
304
+ findings = []
305
+
306
+ for raw_path in ctx.changed_files_observed:
307
+ normalized = normalize_path(raw_path)
308
+ if not is_source_file(normalized):
309
+ continue
310
+
311
+ abs_path = ctx.project_dir / normalized
312
+ try:
313
+ src = abs_path.read_text(encoding="utf-8")
314
+ tree = ast.parse(src)
315
+ except (OSError, SyntaxError, UnicodeDecodeError) as exc:
316
+ _log.debug("broad_except_hidden_sentinel: failed to parse %s: %s", normalized, exc)
317
+ continue
318
+
319
+ for node in ast.walk(tree):
320
+ if not isinstance(node, ast.Try):
321
+ continue
322
+ for handler in node.handlers:
323
+ flagged, sub_id, reason = _classify_handler(handler)
324
+ if not flagged:
325
+ continue
326
+
327
+ line_no = handler.lineno
328
+ findings.append(
329
+ build_finding(
330
+ check_id=sub_id,
331
+ category=GateCategory.FALLBACK,
332
+ title=f"Hidden-sentinel exception handler ({reason}) in {normalized}:{line_no}",
333
+ severity=GateSeverity.MEDIUM,
334
+ impact=GateImpact.REVISE,
335
+ summary=(
336
+ f"{normalized} line {line_no}: {reason} -- exception handler "
337
+ "silently discards the error without surfacing it to callers "
338
+ "or an observability layer."
339
+ ),
340
+ recommendation=(
341
+ "Narrow the exception type to the specific error expected, "
342
+ "log it at WARNING or ERROR level, and re-raise or propagate "
343
+ "via an obs dict. Avoid returning sentinel values from except "
344
+ "blocks unless the caller is explicitly documented to handle them."
345
+ ),
346
+ evidence=[
347
+ EvidenceReference(
348
+ kind="file",
349
+ path=normalized,
350
+ detail=f"line:{line_no}",
351
+ )
352
+ ],
353
+
354
+ repair_kind='refactor',
355
+ executor_action='Address finding details',
356
+ proof_required='Issue fixed',
357
+ allowlist_allowed=False,
358
+ )
359
+ )
360
+
361
+ return build_check_result(
362
+ check_id="broad_except.hidden_sentinel",
363
+ category=GateCategory.FALLBACK,
364
+ findings=findings,
365
+ )
@@ -0,0 +1,253 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import hashlib
5
+ import re
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+
9
+ _ALLOWLIST_COMMENT_RE = re.compile(
10
+ r"#\s*(?:noqa|autoforensics-skip)\s*:\s*([\w.\-]+(?:\s*,\s*[\w.\-]+)*)"
11
+ )
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # F16d: Generated / sanctioned-asset-bundle detector.
16
+ # Opt-in marker pattern: size + structural gates can skip files whose first
17
+ # few lines carry one of the markers below. Markers must be explicit — regular
18
+ # code cannot be accidentally skipped.
19
+ # ---------------------------------------------------------------------------
20
+
21
+ _GENERATED_MARKERS = (
22
+ "# autogenerated",
23
+ "# auto-generated",
24
+ "# Generated by",
25
+ "# DO NOT EDIT",
26
+ "# This file is auto-generated",
27
+ "@generated",
28
+ "# @generated",
29
+ "# SANCTIONED_ASSET_BUNDLE", # Vigil convention for sanctioned bundles
30
+ )
31
+
32
+
33
+ def is_generated_file(source: str, max_lines: int = 10) -> bool:
34
+ """Return True if *source* has a generator/sanctioned marker in first N lines.
35
+
36
+ F16d false-positive reduction: size + structural gates (size.*, god_object_zones.*)
37
+ use this to skip auto-generated files and sanctioned asset bundles. The check
38
+ is strictly opt-in -- regular code cannot be accidentally skipped because it
39
+ would have to voluntarily carry one of the listed markers in its head.
40
+
41
+ Matching rules:
42
+ - Case-insensitive substring match inside the stripped line.
43
+ - Only the first *max_lines* lines are inspected (default 10).
44
+ - Empty / missing source returns False (fail-open).
45
+ """
46
+ if not source:
47
+ return False
48
+ head = source.splitlines()[:max_lines]
49
+ lowered_markers = [m.lower() for m in _GENERATED_MARKERS]
50
+ for line in head:
51
+ stripped = line.strip().lower()
52
+ for marker in lowered_markers:
53
+ if marker in stripped:
54
+ return True
55
+ return False
56
+
57
+
58
+ def has_allowlist_for(content: str, check_id: str, line_number: int | None = None) -> bool:
59
+ """Check if finding should be suppressed by inline allowlist comment.
60
+
61
+ Patterns supported:
62
+ # noqa: check_id
63
+ # noqa: check_id, other_check
64
+ # autoforensics-skip: check_id
65
+
66
+ If line_number provided: check same line or previous line.
67
+ If line_number None: check entire file for file-level allowlist (first 20 lines).
68
+ """
69
+ lines = content.splitlines()
70
+ if line_number is not None and 1 <= line_number <= len(lines):
71
+ candidates = [lines[line_number - 1]]
72
+ if line_number >= 2:
73
+ candidates.append(lines[line_number - 2])
74
+ else:
75
+ candidates = lines[:20]
76
+
77
+ for line in candidates:
78
+ for match in _ALLOWLIST_COMMENT_RE.finditer(line):
79
+ ids = [x.strip() for x in match.group(1).split(",")]
80
+ if check_id in ids or any(check_id.startswith(i + ".") for i in ids):
81
+ return True
82
+ return False
83
+
84
+ from vigil_forensic._shared import EvidenceReference, GateCategory, GateCheckResult, GateFileSnapshot, GateFinding, GateImpact, GateSeverity
85
+ from vigil_forensic.gate_models import PostExecGateContext
86
+ import logging
87
+ _log = logging.getLogger(__name__)
88
+
89
+
90
+ def normalize_path(path: str) -> str:
91
+ return str(path or "").replace("\\", "/").lstrip("./")
92
+
93
+
94
+ def build_finding(
95
+ *,
96
+ check_id: str,
97
+ category: GateCategory,
98
+ title: str,
99
+ severity: GateSeverity,
100
+ impact: GateImpact,
101
+ summary: str,
102
+ recommendation: str,
103
+ evidence: Iterable[EvidenceReference] = (),
104
+ repair_kind: str = "",
105
+ executor_action: str = "",
106
+ proof_required: str = "",
107
+ allowlist_allowed: bool = True,
108
+ preferred_fix_shape: str = "",
109
+ # Sprint A (2026-04-23): confidence/applicability layer. Defaults preserve
110
+ # legacy semantics (every gate call that does not opt in emits an
111
+ # "applicable" finding with full detector confidence). Fingerprint
112
+ # computation below deliberately excludes these fields so allowlist
113
+ # stability is preserved as gates gradually migrate.
114
+ confidence: float = 1.0,
115
+ applicability: str = "applicable",
116
+ analysis_mode: str = "heuristic",
117
+ applicability_reason: str = "",
118
+ ) -> GateFinding:
119
+ evidence_tuple = tuple(evidence)
120
+ # Fingerprint policy (unchanged): check_id|category|title|impact|evidence_paths.
121
+ # New Sprint A fields are intentionally excluded — confidence drifts with
122
+ # signal learning; including it would invalidate existing allowlists.
123
+ fingerprint = hashlib.sha256(
124
+ "|".join(
125
+ [check_id, category.value, title, impact.value]
126
+ + [item.path or item.detail for item in evidence_tuple]
127
+ ).encode("utf-8")
128
+ ).hexdigest()[:16]
129
+ return GateFinding(
130
+ check_id=check_id,
131
+ category=category,
132
+ title=title,
133
+ severity=severity,
134
+ impact=impact,
135
+ summary=summary,
136
+ recommendation=recommendation,
137
+ evidence=evidence_tuple,
138
+ fingerprint=fingerprint,
139
+ repair_kind=repair_kind,
140
+ executor_action=executor_action,
141
+ proof_required=proof_required,
142
+ allowlist_allowed=allowlist_allowed,
143
+ preferred_fix_shape=preferred_fix_shape,
144
+ confidence=confidence,
145
+ applicability=applicability,
146
+ analysis_mode=analysis_mode,
147
+ applicability_reason=applicability_reason,
148
+ )
149
+
150
+
151
+ def build_check_result(
152
+ *,
153
+ check_id: str,
154
+ category: GateCategory,
155
+ findings: Iterable[GateFinding] = (),
156
+ notes: Iterable[str] = (),
157
+ ) -> GateCheckResult:
158
+ return GateCheckResult(
159
+ check_id=check_id,
160
+ category=category,
161
+ findings=tuple(findings),
162
+ notes=tuple(str(item) for item in notes if str(item).strip()),
163
+ )
164
+
165
+
166
+ def read_snapshot(project_dir: Path, path: str) -> GateFileSnapshot:
167
+ repo_path = normalize_path(path)
168
+ abs_path = project_dir / repo_path
169
+ if not abs_path.exists() or not abs_path.is_file():
170
+ return GateFileSnapshot(path=repo_path, exists=False, size=0, line_count=0, text="")
171
+ text = abs_path.read_text(encoding="utf-8-sig", errors="replace")
172
+ return GateFileSnapshot(
173
+ path=repo_path,
174
+ exists=True,
175
+ size=abs_path.stat().st_size,
176
+ line_count=len(text.splitlines()),
177
+ text=text,
178
+ )
179
+
180
+
181
+ def iter_touched_snapshots(ctx: PostExecGateContext) -> list[GateFileSnapshot]:
182
+ snapshots: list[GateFileSnapshot] = []
183
+ for path in ctx.touched_files:
184
+ normalized = normalize_path(path)
185
+ if normalized in ctx.file_snapshots:
186
+ snapshots.append(ctx.file_snapshots[normalized])
187
+ else:
188
+ snapshots.append(read_snapshot(ctx.project_dir, normalized))
189
+ return snapshots
190
+
191
+
192
+ def hash_normalized_code(text: str) -> str:
193
+ normalized = re.sub(r"\s+", " ", re.sub(r"\b[A-Za-z_][A-Za-z0-9_]*\b", "ID", text.strip()))
194
+ return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
195
+
196
+
197
+ def hash_text_block(text: str) -> str:
198
+ """Hash for text-block dedup: whitespace-only normalization, identifiers preserved.
199
+
200
+ Unlike hash_normalized_code (which replaces all identifiers with 'ID' for structural
201
+ similarity), this preserves actual names -- catching literal copy-paste rather than
202
+ structural coincidence (e.g. two unrelated dataclasses with the same field count).
203
+ """
204
+ normalized = re.sub(r"\s+", " ", text.strip())
205
+ return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
206
+
207
+
208
+ def extract_python_functions(text: str) -> list[tuple[str, int, int, str]]:
209
+ try:
210
+ tree = ast.parse(text)
211
+ except SyntaxError:
212
+ return []
213
+ lines = text.splitlines()
214
+ results: list[tuple[str, int, int, str]] = []
215
+ for node in ast.walk(tree):
216
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
217
+ continue
218
+ start = int(getattr(node, "lineno", 1))
219
+ end = int(getattr(node, "end_lineno", start))
220
+ snippet = "\n".join(lines[start - 1:end])
221
+ results.append((str(node.name), start, end, snippet))
222
+ return results
223
+
224
+
225
+ def max_nesting_depth(text: str) -> int:
226
+ try:
227
+ tree = ast.parse(text)
228
+ except SyntaxError:
229
+ return 0
230
+
231
+ def _depth(node: ast.AST, current: int = 0) -> int:
232
+ next_depth = current
233
+ if isinstance(node, (ast.For, ast.AsyncFor, ast.While, ast.If, ast.With, ast.Try, ast.Match)):
234
+ next_depth += 1
235
+ child_depths = [_depth(child, next_depth) for child in ast.iter_child_nodes(node)]
236
+ return max([next_depth, *child_depths], default=next_depth)
237
+
238
+ return _depth(tree, 0)
239
+
240
+
241
+ # ---------------------------------------------------------------------------
242
+ # F14c: Detector self-match suppression helpers
243
+ # ---------------------------------------------------------------------------
244
+ # Canonical implementation lives in ._ast_helpers (introduced in F14a).
245
+ # Re-export for backwards compatibility so gates can import these names from
246
+ # either ``..common`` or ``.._ast_helpers``.
247
+ from ._ast_helpers import ( # noqa: E402, F401 -- public re-export
248
+ collect_constant_container_literal_lines,
249
+ is_section_header_comment,
250
+ is_cli_surface_file,
251
+ collect_main_block_line_ranges,
252
+ line_in_ranges,
253
+ )