vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,847 @@
1
+ """Detect non-ASCII characters that crash Windows console (cp1252).
2
+
3
+ Windows default console encoding cannot render emoji, box-drawing, arrows,
4
+ smart quotes. Python print/raise/log crashes with UnicodeEncodeError.
5
+ This is a universal check -- applies to any project running on Windows.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import ast
10
+ import re
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ from vigil_forensic._shared import WINDOWS_CLI_RUNTIME_EXTENSIONS as _WINDOWS_CLI_RUNTIME_EXTENSIONS
15
+ from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity, RepairKind
16
+ from vigil_forensic.gate_models import PostExecGateContext
17
+ from ..source_analysis import is_source_file
18
+ from .common import build_check_result, build_finding, iter_touched_snapshots
19
+ from ._deployment_detector import (
20
+ detect_file_deployment,
21
+ detect_project_deployment,
22
+ get_explicit_deployment,
23
+ )
24
+ import logging
25
+ _log = logging.getLogger(__name__)
26
+
27
+ # subprocess calls that may need encoding=
28
+ _SUBPROCESS_CALL_RE = re.compile(
29
+ r'\bsubprocess\.(run|Popen|check_output|check_call)\s*\(',
30
+ )
31
+
32
+ # Characters outside cp1252 range (U+0100+)
33
+ _DANGEROUS_RE = re.compile(r'[\u0100-\uffff]')
34
+
35
+ # Pure comment lines — safe, never reach stdout.
36
+ # Python/shell/PowerShell use "#", JS/TS/Go/Java/C* use "//", SQL uses "--".
37
+ # Keyed by file extension (lowercase). Extensions not listed fall back to
38
+ # scanning every line (the textual sink detector then filters out lines that
39
+ # don't contain a recognized output function, so noise stays low).
40
+ _COMMENT_PREFIXES_BY_EXT: dict[str, tuple[str, ...]] = {
41
+ ".py": ("#",),
42
+ ".ps1": ("#",),
43
+ ".sh": ("#",),
44
+ ".bash": ("#",),
45
+ ".bat": ("rem ", "REM ", "::"),
46
+ ".cmd": ("rem ", "REM ", "::"),
47
+ ".js": ("//",),
48
+ ".mjs": ("//",),
49
+ ".cjs": ("//",),
50
+ ".ts": ("//",),
51
+ ".tsx": ("//",),
52
+ ".go": ("//",),
53
+ ".java": ("//",),
54
+ ".sql": ("--",),
55
+ ".ini": (";",),
56
+ }
57
+
58
+ # Legacy Python-comment regex (kept for back-compat with Python-AST path).
59
+ _COMMENT_RE = re.compile(r'^\s*#')
60
+
61
+ # High-risk Unicode ranges with human-readable names
62
+ _HIGH_RISK_RANGES = (
63
+ (0x2500, 0x257F, "box-drawing"),
64
+ (0x2190, 0x21FF, "arrows"),
65
+ (0x2014, 0x2014, "em-dash"),
66
+ (0x2013, 0x2013, "en-dash"),
67
+ (0x201C, 0x201D, "smart-quotes"),
68
+ (0x2018, 0x2019, "smart-apostrophes"),
69
+ (0x2705, 0x2705, "checkmark-emoji"),
70
+ (0x274C, 0x274C, "cross-emoji"),
71
+ (0x1F300, 0x1F9FF, "emoji"),
72
+ (0x2600, 0x26FF, "misc-symbols"),
73
+ (0x2700, 0x27BF, "dingbats"),
74
+ )
75
+
76
+ # Keywords that indicate text reaches stdout/stderr directly — HIGH risk on cp1252
77
+ _STDOUT_SINK_RE = re.compile(
78
+ r'print\s*\(|sys\.stdout\.write\s*\(|sys\.stderr\.write\s*\('
79
+ )
80
+
81
+ # Keywords that indicate logging sinks — bytes go through Python logging, not console codec
82
+ _LOGGING_SINK_RE = re.compile(
83
+ r'(?:^|[^a-zA-Z_])(?:logging|_log|_logger|log|logger)\s*\.\s*(?:debug|info|warning|error|critical|exception)\s*\('
84
+ )
85
+
86
+ # Cross-language console-output substrings — used by the textual fallback
87
+ # sink detector when AST parsing is unavailable (non-Python files, or Python
88
+ # files with a syntax error). Matching is substring-based on the already-
89
+ # uncommented line, so ordering/anchoring is not required.
90
+ # HIGH: anything that writes to stdout/stderr and therefore hits the cp1252
91
+ # console codec on Windows.
92
+ _TEXTUAL_STDOUT_SINKS: tuple[str, ...] = (
93
+ "print(", # Python, also common in JS/TS via CommonJS
94
+ "console.log(", # JS/TS
95
+ "console.error(", # JS/TS — writes to stderr, still hits cp1252 console
96
+ "console.warn(", # JS/TS — writes to stderr on Node; HIGH on Windows Node
97
+ "process.stdout.write(", # Node.js
98
+ "process.stderr.write(", # Node.js
99
+ "stderr.write(", # Generic (covers sys.stderr.write and Go os.Stderr.Write alias-style)
100
+ "sys.stdout.write(", # Python
101
+ "sys.stderr.write(", # Python
102
+ "printf ", # POSIX shell printf builtin (with space)
103
+ "printf(", # C / Go fmt.Printf when imported unqualified
104
+ "fmt.Print", # Go — matches fmt.Print, fmt.Println, fmt.Printf (substring)
105
+ "fmt.Fprint", # Go — matches fmt.Fprint, fmt.Fprintln, fmt.Fprintf
106
+ "log.Print", # Go standard library log package (Print, Println, Printf)
107
+ "System.out.print", # Java (matches print and println)
108
+ "System.err.print", # Java
109
+ "Write-Host", # PowerShell — writes to host, bypasses codec-safe stream
110
+ "Write-Output", # PowerShell — piped, but rendered when terminal sink
111
+ "Write-Error", # PowerShell — writes to error stream, hits cp1252 console
112
+ "echo ", # POSIX shell / batch
113
+ "echo\t", # POSIX shell with tab between echo and args
114
+ "echo(", # Some shells
115
+ )
116
+ # MEDIUM: logger-style sinks — encoding usually handled internally, but stale
117
+ # tooling may still barf; rated MEDIUM (WARN) just like the Python AST path.
118
+ _TEXTUAL_LOGGER_SINKS: tuple[str, ...] = (
119
+ "log.info", "log.debug", "log.warn", "log.warning", "log.error", "log.critical", "log.exception",
120
+ "logger.info", "logger.debug", "logger.warn", "logger.warning", "logger.error", "logger.critical", "logger.exception",
121
+ "console.info(", "console.debug(",
122
+ "Write-Verbose",
123
+ "Write-Warning",
124
+ "Write-Information",
125
+ )
126
+
127
+ # F18b / Sprint C3 (2026-04-23): canonical whitelist of extensions whose
128
+ # runtime output passes through a locale-dependent console lives in
129
+ # SYSTEM.shared_helpers.file_extensions.WINDOWS_CLI_RUNTIME_EXTENSIONS. Re-
130
+ # exported above as ``_WINDOWS_CLI_RUNTIME_EXTENSIONS`` so the Layer 1
131
+ # extension filter below continues to resolve the private name.
132
+
133
+
134
+ def _should_scan_for_encoding(
135
+ rel_path: str,
136
+ content: str | None = None,
137
+ project_dir: Path | None = None,
138
+ ) -> bool:
139
+ """Arbiter for 'is this file in scope for the encoding gate?'.
140
+
141
+ Two layers, evaluated in order:
142
+
143
+ 1. **Extension whitelist (F18b).** Only runtimes whose output passes
144
+ through a locale-dependent console (cp1252 on Windows). Python,
145
+ Java, C#, Go, Rust, shell, PowerShell, batch. TypeScript / JavaScript
146
+ / HTML / CSS / JSON / Markdown stay out regardless of deployment.
147
+
148
+ 2. **Deployment cascade (F19).** When the target project deploys only
149
+ to Linux (or a file explicitly imports Unix-only modules / has a
150
+ Unix shebang), cp1252 crashes cannot happen — skip the scan. The
151
+ cascade checks:
152
+
153
+ * Layer 3 — explicit ``.autoforensics/config.json`` /
154
+ ``AUTOFORENSICS_DEPLOYMENT`` env var.
155
+ * Layer 1 — per-file signals (shebang, Unix/Windows imports,
156
+ ``sys.platform`` guards).
157
+ * Layer 2 — project-level signals (pyproject classifiers,
158
+ Dockerfile, GitHub Actions runners, Linux-exclusive deps).
159
+
160
+ Precedence: explicit > file > project. When all layers return
161
+ 'unknown' we scan — a false positive is recoverable by allowlist;
162
+ a false negative hides a real bug.
163
+
164
+ Called per file. Project-level detection is cached inside the detector
165
+ module, so a rubik-scale scan (~2000 files) only touches pyproject /
166
+ workflows / Dockerfile once.
167
+ """
168
+ lower = rel_path.lower()
169
+ extension_match = False
170
+ for ext in _WINDOWS_CLI_RUNTIME_EXTENSIONS:
171
+ if lower.endswith(ext):
172
+ extension_match = True
173
+ break
174
+ if not extension_match:
175
+ return False
176
+
177
+ if project_dir is None:
178
+ # Legacy caller / tests that do not propagate project_dir — keep
179
+ # prior F18b behaviour (scan on extension match).
180
+ return True
181
+
182
+ # Layer 3 — explicit override. Wins over file and project signals.
183
+ explicit = get_explicit_deployment(project_dir)
184
+ if explicit is not None:
185
+ if explicit == "linux-only":
186
+ return False
187
+ # windows-only / cross-platform → scan.
188
+ return True
189
+
190
+ # Layer 1 — per-file signal. A clear Unix file (shebang, fcntl import)
191
+ # does not need scanning even in a cross-platform project; a clear
192
+ # Windows file always scans.
193
+ if content:
194
+ file_signal = detect_file_deployment(content)
195
+ if file_signal == "unix":
196
+ return False
197
+ if file_signal == "windows":
198
+ return True
199
+
200
+ # Layer 2 — project-level signal.
201
+ project_signal = detect_project_deployment(project_dir)
202
+ if project_signal == "linux-only":
203
+ return False
204
+ # windows-only / cross-platform / unknown → scan (conservative default).
205
+ return True
206
+
207
+
208
+ def _classify_textual_sink(line: str) -> str | None:
209
+ """Return 'stdout' | 'logger' | None for a non-AST line.
210
+
211
+ Pure substring scan against two tables; stdout sinks dominate logger
212
+ sinks when both appear. Used whenever AST parsing is unavailable:
213
+ - Non-Python files (.js, .ts, .go, .java, .sh, .ps1, ...) by design.
214
+ - Python files that fail to parse (syntax errors) — we still want to
215
+ flag obviously broken sources rather than silently skipping them.
216
+ """
217
+ for needle in _TEXTUAL_STDOUT_SINKS:
218
+ if needle in line:
219
+ return "stdout"
220
+ for needle in _TEXTUAL_LOGGER_SINKS:
221
+ if needle in line:
222
+ return "logger"
223
+ return None
224
+
225
+
226
+ def _is_comment_line(line: str, ext: str) -> bool:
227
+ """Language-aware comment detection.
228
+
229
+ Returns True when `line` is entirely a comment for the given extension.
230
+ Extensions with no known comment syntax return False (we then scan the
231
+ line; the textual sink detector filters out non-sink lines anyway).
232
+ """
233
+ prefixes = _COMMENT_PREFIXES_BY_EXT.get(ext.lower())
234
+ if not prefixes:
235
+ return False
236
+ stripped = line.lstrip()
237
+ if not stripped:
238
+ return False
239
+ return any(stripped.startswith(p) for p in prefixes)
240
+
241
+ # Safe non-ASCII codepoints that transcode cleanly via every modern codec
242
+ # including Windows cp1252 (they all exist in the cp1252 character table or
243
+ # have a canonical cp1252 equivalent). When a line's entire non-ASCII content
244
+ # falls inside this set, the line is not a crash risk and no finding is emitted.
245
+ _SAFE_UNICODE_CODEPOINTS: frozenset[int] = frozenset({
246
+ 0x2013, # en-dash
247
+ 0x2014, # em-dash
248
+ 0x2018, # left single smart quote
249
+ 0x2019, # right single smart quote
250
+ 0x201C, # left double smart quote
251
+ 0x201D, # right double smart quote
252
+ 0x2026, # horizontal ellipsis
253
+ 0x00A0, # non-breaking space
254
+ 0x00B0, # degree sign
255
+ 0x00B5, # micro sign
256
+ 0x00AB, # left guillemet
257
+ 0x00BB, # right guillemet
258
+ })
259
+
260
+ # Loggers that apply errors='replace' or utf-8 under the hood — MEDIUM risk
261
+ _LOGGER_METHOD_NAMES: frozenset[str] = frozenset({
262
+ "debug", "info", "warning", "warn", "error", "critical", "exception", "log",
263
+ })
264
+ _LOGGER_RECEIVER_NAMES: frozenset[str] = frozenset({
265
+ "_log", "_logger", "log", "logger", "logging",
266
+ })
267
+
268
+ # F9a-tighten (2026-04-23): chained-call logger pattern — a Call whose func is
269
+ # an Attribute whose receiver is ITSELF a Call with ``.getLogger`` as the
270
+ # inner attribute. Matches ``logging.getLogger(...).exception(...)`` and
271
+ # similar one-shot logger-factory chains.
272
+ _LOGGER_FACTORY_INNER_METHODS: frozenset[str] = frozenset({
273
+ "getLogger", "get_logger", "get_child_logger", "getChildLogger",
274
+ })
275
+
276
+ # F9a-tighten: string-transform wrapper methods we traverse through when the
277
+ # nearest enclosing Call is one of these. We then look at the GRANDPARENT
278
+ # Call to classify the sink. Covers ``print(s.format(lit))``,
279
+ # ``sys.stdout.write(' '.join(lits))``, etc.
280
+ _WRAPPER_METHOD_NAMES: frozenset[str] = frozenset({
281
+ "format", "join", "strip", "lstrip", "rstrip", "replace",
282
+ "upper", "lower", "title", "capitalize",
283
+ "encode", "decode",
284
+ "removeprefix", "removesuffix",
285
+ "zfill", "ljust", "rjust", "center",
286
+ })
287
+
288
+
289
+ def _classify_char(ch: str) -> str:
290
+ cp = ord(ch)
291
+ for low, high, name in _HIGH_RISK_RANGES:
292
+ if low <= cp <= high:
293
+ return name
294
+ return "non-cp1252"
295
+
296
+
297
+ def _is_test_path(rel_path: str, ctx: object = None) -> bool:
298
+ """True when rel_path is a test surface the encoding gate should skip.
299
+
300
+ Sprint C2 (2026-04-23): prefers ``ctx.project_context.test_topology``
301
+ when available. Preserves the original "encoding" filename exception —
302
+ any test file whose basename contains "encoding" remains scannable
303
+ (the gate's own test suite exercises raw Unicode as test data and
304
+ SHOULD be flagged).
305
+
306
+ Tests are run under pytest, which captures stdout and does not write to
307
+ the cp1252 console. Skipping test paths removes the dominant FP source
308
+ (fixture-string Cyrillic, docstring em-dash, etc.).
309
+ """
310
+ if not rel_path:
311
+ return False
312
+ normalized = rel_path.replace("\\", "/")
313
+ parts = normalized.split("/")
314
+ basename = parts[-1].lower()
315
+
316
+ # Exception: files whose basename advertises "encoding" test behavior
317
+ # are intentionally scanned, regardless of where they live.
318
+ if "encoding" in basename:
319
+ return False
320
+
321
+ topology = getattr(getattr(ctx, "project_context", None), "test_topology", None)
322
+ if topology is not None:
323
+ return topology.is_test_path(normalized)
324
+
325
+ # Legacy fallback — original path-fragment rule.
326
+ if "tests" not in parts:
327
+ return False
328
+ return True
329
+
330
+
331
+ def _classify_call_sink(call_node: ast.Call) -> str | None:
332
+ """Return 'stdout', 'logger', or None for a given Call node.
333
+
334
+ - 'stdout' : print(...), sys.stdout.write(...), sys.stderr.write(...),
335
+ os.write(1|2, ...) — crashes cp1252 console.
336
+ - 'logger' : _log.info(...), logger.debug(...), logging.warning(...),
337
+ AND chained-call factories like
338
+ ``logging.getLogger(__name__).exception(...)`` (blind-spot D
339
+ chained Call traversal).
340
+ - None : json.dumps(...), _append_trace(...), foo.bar(...), etc. —
341
+ not a console sink; finding suppressed entirely.
342
+ """
343
+ func = call_node.func
344
+ # print(...)
345
+ if isinstance(func, ast.Name) and func.id == "print":
346
+ return "stdout"
347
+ if isinstance(func, ast.Attribute):
348
+ attr = func.attr
349
+ value = func.value
350
+ # sys.stdout.write(...) / sys.stderr.write(...)
351
+ if attr == "write" and isinstance(value, ast.Attribute):
352
+ if value.attr in ("stdout", "stderr") and isinstance(value.value, ast.Name) and value.value.id == "sys":
353
+ return "stdout"
354
+ # os.write(1|2, ...)
355
+ if attr == "write" and isinstance(value, ast.Name) and value.id == "os":
356
+ if call_node.args:
357
+ first = call_node.args[0]
358
+ if isinstance(first, ast.Constant) and first.value in (1, 2):
359
+ return "stdout"
360
+ # Logger methods: _log.info / logger.debug / logging.warning / self._log.info etc.
361
+ if attr in _LOGGER_METHOD_NAMES:
362
+ # Accept plain Name receivers (_log, logger, logging).
363
+ if isinstance(value, ast.Name) and value.id in _LOGGER_RECEIVER_NAMES:
364
+ return "logger"
365
+ # Accept self.<logger>.method / cls.<logger>.method chains.
366
+ if isinstance(value, ast.Attribute) and value.attr in _LOGGER_RECEIVER_NAMES:
367
+ return "logger"
368
+ # F9a-tighten (2026-04-23): chained call
369
+ # ``logging.getLogger(__name__).exception(...)`` — the receiver
370
+ # (func.value) is itself a Call whose inner attribute is a
371
+ # known logger-factory name (``getLogger`` etc.). Classify the
372
+ # OUTER method (info/error/exception/etc.) as a logger sink.
373
+ if isinstance(value, ast.Call):
374
+ inner_func = value.func
375
+ if isinstance(inner_func, ast.Attribute) and inner_func.attr in _LOGGER_FACTORY_INNER_METHODS:
376
+ return "logger"
377
+ if isinstance(inner_func, ast.Name) and inner_func.id in _LOGGER_FACTORY_INNER_METHODS:
378
+ return "logger"
379
+ return None
380
+
381
+
382
+ def _collect_string_literal_sinks(source: str) -> dict[int, str]:
383
+ """Map 1-based line number -> sink classification for non-ASCII string
384
+ literals inside Call / Raise nodes.
385
+
386
+ Returns a dict with entries only for lines containing a str/JoinedStr
387
+ literal whose resolved sink classifies as 'stdout' or 'logger'. Lines
388
+ not in the dict mean either (a) no string literal in a Call on that line
389
+ or (b) the enclosing Call is a non-sink helper (json.dumps, etc.).
390
+
391
+ F9a-tighten (2026-04-23):
392
+ * Raise detection — a literal whose ancestor chain contains ``ast.Raise``
393
+ is classified as 'stdout' HIGH (the exception message lands on
394
+ stderr, which runs through the cp1252 codec on Windows).
395
+ * Grandparent walk — when the nearest enclosing Call is a wrapper
396
+ method (``.format`` / ``.join`` / ``.replace`` / etc.), we look at
397
+ the outer Call that receives the wrapper's result. This catches
398
+ ``print(fmt.format('cyr'))`` where the nearest Call is ``.format``
399
+ but the eventual sink is ``print``.
400
+
401
+ Empty dict on SyntaxError; caller falls back to "suppress all" behavior
402
+ for a file that cannot be parsed (we prefer FN over FP).
403
+ """
404
+ try:
405
+ tree = ast.parse(source)
406
+ except SyntaxError:
407
+ return {}
408
+
409
+ # Build parent map so we can walk a node's ancestors.
410
+ parent: dict[int, ast.AST] = {}
411
+ for node in ast.walk(tree):
412
+ for child in ast.iter_child_nodes(node):
413
+ parent[id(child)] = node
414
+
415
+ def _is_wrapper_call(call: ast.Call) -> bool:
416
+ """True if *call* is a transparent string-transform wrapper.
417
+
418
+ We skip through these and look at the grandparent Call instead.
419
+ """
420
+ f = call.func
421
+ if isinstance(f, ast.Attribute) and f.attr in _WRAPPER_METHOD_NAMES:
422
+ return True
423
+ # ``str.format(...)`` / ``str.join(...)`` — Name chain with attribute.
424
+ if isinstance(f, ast.Attribute) and isinstance(f.value, ast.Name) and f.value.id == "str":
425
+ if f.attr in _WRAPPER_METHOD_NAMES:
426
+ return True
427
+ return False
428
+
429
+ def _has_raise_ancestor(node: ast.AST) -> bool:
430
+ """True if *node*'s ancestor chain contains ``ast.Raise`` within the
431
+ same statement scope.
432
+
433
+ We stop at the first function/class boundary so a literal inside a
434
+ nested def/lambda is not mis-attributed to an outer raise.
435
+ """
436
+ cur: ast.AST | None = parent.get(id(node))
437
+ while cur is not None:
438
+ if isinstance(cur, ast.Raise):
439
+ return True
440
+ if isinstance(cur, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Lambda, ast.Module)):
441
+ return False
442
+ cur = parent.get(id(cur))
443
+ return False
444
+
445
+ def _resolve_sink(node: ast.AST) -> str | None:
446
+ """F9a-tighten: walk ancestors, skipping wrapper Calls, and classify.
447
+
448
+ Order of precedence (first hit wins):
449
+ 1. Raise ancestor in same statement → ``stdout`` (stderr sink).
450
+ 2. Nearest non-wrapper Call → ``_classify_call_sink``.
451
+ 3. Grandparent of wrapper Call — keep walking past the wrapper
452
+ until we hit a classifier-matching Call.
453
+ 4. None.
454
+ """
455
+ # Priority 1: Raise ancestor. Exception messages render via
456
+ # ``sys.stderr.write`` / ``traceback.print_exception`` which hit the
457
+ # cp1252 codec on Windows. Check before call classification so
458
+ # ``raise ValueError("cyr")`` classifies even though ``ValueError``
459
+ # itself is not a known sink.
460
+ if _has_raise_ancestor(node):
461
+ return "stdout"
462
+
463
+ # Walk upward, skipping wrapper Calls (grandparent traversal).
464
+ cur: ast.AST | None = parent.get(id(node))
465
+ while cur is not None:
466
+ if isinstance(cur, ast.Call):
467
+ if _is_wrapper_call(cur):
468
+ # Wrapper itself may classify (rare safety net).
469
+ direct = _classify_call_sink(cur)
470
+ if direct is not None:
471
+ return direct
472
+ # Skip past the wrapper: keep hunting for a real sink.
473
+ cur = parent.get(id(cur))
474
+ continue
475
+ # Non-wrapper Call — definitive classifier (sink or None).
476
+ return _classify_call_sink(cur)
477
+ cur = parent.get(id(cur))
478
+ return None
479
+
480
+ result: dict[int, str] = {}
481
+
482
+ def _record(node: ast.AST, sink: str) -> None:
483
+ start = getattr(node, "lineno", None)
484
+ end = getattr(node, "end_lineno", None) or start
485
+ if start is None:
486
+ return
487
+ for ln in range(start, (end or start) + 1):
488
+ # Upgrade: stdout dominates logger if multiple literals share line.
489
+ prev = result.get(ln)
490
+ if prev != "stdout":
491
+ result[ln] = sink
492
+
493
+ for node in ast.walk(tree):
494
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
495
+ sink = _resolve_sink(node)
496
+ if sink is None:
497
+ continue
498
+ _record(node, sink)
499
+ elif isinstance(node, ast.JoinedStr):
500
+ sink = _resolve_sink(node)
501
+ if sink is None:
502
+ continue
503
+ _record(node, sink)
504
+
505
+ return result
506
+
507
+
508
+ def _collect_docstring_lines(source: str) -> set[int]:
509
+ """Return 1-based line numbers belonging to any docstring in `source`.
510
+
511
+ AST-based: identifies module-, class-, and function-body first statement
512
+ when it is ``Expr(Constant(str))``. Docstring strings are compile-time
513
+ constants never written to stdout — we skip them entirely (no finding,
514
+ not even LOW). Returns empty set on SyntaxError so callers fall back to
515
+ standard non-docstring processing for the whole file.
516
+ """
517
+ try:
518
+ tree = ast.parse(source)
519
+ except SyntaxError:
520
+ return set()
521
+
522
+ docstring_lines: set[int] = set()
523
+
524
+ def _mark(node: ast.AST) -> None:
525
+ if not isinstance(node, ast.Expr):
526
+ return
527
+ val = node.value
528
+ if not isinstance(val, ast.Constant) or not isinstance(val.value, str):
529
+ return
530
+ start = getattr(val, "lineno", None)
531
+ end = getattr(val, "end_lineno", None) or start
532
+ if start is None:
533
+ return
534
+ for ln in range(start, (end or start) + 1):
535
+ docstring_lines.add(ln)
536
+
537
+ # Module docstring: first stmt of tree.body.
538
+ if tree.body:
539
+ _mark(tree.body[0])
540
+
541
+ # Class/function docstrings: first stmt of any Class/FunctionDef/AsyncFunctionDef body.
542
+ for node in ast.walk(tree):
543
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
544
+ if node.body:
545
+ _mark(node.body[0])
546
+
547
+ return docstring_lines
548
+
549
+
550
+ def run_encoding_checks(ctx: PostExecGateContext):
551
+ """Scan changed files for Windows-unsafe Unicode characters."""
552
+ # Only relevant on Windows
553
+ if sys.platform != "win32":
554
+ return build_check_result(
555
+ check_id="encoding_safety",
556
+ category=GateCategory.RUNTIME_BEHAVIOR,
557
+ notes=["Skipped: not running on Windows"],
558
+ )
559
+
560
+ findings = []
561
+ # Resolve project_dir once — detection helpers cache per-project but
562
+ # we still want a single Path instance per call (fewer attribute lookups).
563
+ project_dir = getattr(ctx, "project_dir", None)
564
+
565
+ for rel_path, snap in ctx.file_snapshots.items():
566
+ if not snap.exists or not snap.text:
567
+ continue
568
+ # F18b: whitelist filter + F19: deployment cascade. Only scan files
569
+ # whose runtime output passes through a locale-dependent console
570
+ # (Python, Java, C#, Go, Rust, shell, PowerShell, batch) AND whose
571
+ # deployment target is not provably Linux-only. TypeScript /
572
+ # JavaScript / markup languages always skip; Linux-only deployments
573
+ # also skip because cp1252 crashes cannot occur there.
574
+ if not _should_scan_for_encoding(rel_path, snap.text, project_dir):
575
+ continue
576
+ # Part 1: skip test paths entirely (pytest captures stdout, not the
577
+ # cp1252 console). Keep files whose basename contains "encoding" so
578
+ # the gate's own test suite can still be scanned. Sprint C2: ctx
579
+ # threaded through so _is_test_path can consult TestTopology.
580
+ if _is_test_path(rel_path, ctx):
581
+ continue
582
+
583
+ all_lines = snap.text.splitlines()
584
+ is_python = rel_path.endswith(".py")
585
+ # Extension used for comment-prefix dispatch. We use the raw suffix
586
+ # rather than going through get_language_id() because comment syntax
587
+ # is per-extension (e.g. .bat differs from .cmd only in rare forms,
588
+ # .jsx shares JS syntax, etc.).
589
+ ext = ""
590
+ dot = rel_path.rfind(".")
591
+ if dot >= 0:
592
+ ext = rel_path[dot:].lower()
593
+
594
+ # Docstrings are compile-time constants, never reach stdout — skip
595
+ # them entirely (no finding). AST-based so we correctly identify
596
+ # module/class/function first-statement Expr(Constant(str)).
597
+ # Python-only concept; other languages have no docstring semantics.
598
+ docstring_lines = _collect_docstring_lines(snap.text) if is_python else set()
599
+
600
+ # Part 3: AST-based sink map. For Python, line -> 'stdout' | 'logger'.
601
+ # Lines not in the map either have no string literal inside a Call,
602
+ # or the enclosing Call is a non-console helper (json.dumps, etc.) —
603
+ # we suppress those findings entirely.
604
+ #
605
+ # When ast.parse fails (non-Python file by extension, OR a .py file
606
+ # with a syntax error) `_collect_string_literal_sinks` returns {}
607
+ # and we fall back to the textual-sink detector below. We prefer
608
+ # false-negatives-on-sink-classification over silently skipping a
609
+ # broken or non-Python source file.
610
+ sink_map = _collect_string_literal_sinks(snap.text) if is_python else {}
611
+
612
+ # For non-Python files we always go through the textual fallback.
613
+ # For Python files with a non-empty sink_map we use AST. For Python
614
+ # files whose sink_map came back empty (empty file -> empty dict;
615
+ # parse error -> empty dict) we also go textual. We disambiguate
616
+ # "empty because parse failed" vs "empty because no sinks present"
617
+ # cheaply by attempting a parse here and caching the result.
618
+ ast_available = is_python
619
+ if is_python:
620
+ try:
621
+ ast.parse(snap.text)
622
+ except SyntaxError:
623
+ ast_available = False
624
+
625
+ for line_num, line in enumerate(all_lines, 1):
626
+ # Language-aware comment skip. For extensions without a known
627
+ # comment syntax (none today, but e.g. .html, .json), we scan
628
+ # the whole line and let the sink detector filter.
629
+ if _is_comment_line(line, ext):
630
+ continue
631
+
632
+ # Skip docstring lines entirely — no finding emitted.
633
+ if line_num in docstring_lines:
634
+ continue
635
+
636
+ # Respect explicit per-line allowlist.
637
+ if "noqa: encoding" in line or "noqa:encoding" in line:
638
+ continue
639
+
640
+ matches = _DANGEROUS_RE.findall(line)
641
+ if not matches:
642
+ continue
643
+
644
+ # Part 2: split matches into safe (cp1252-compatible) and unsafe.
645
+ # When ALL chars on the line are safe → skip; safe chars transcode
646
+ # cleanly so there is no crash risk regardless of sink.
647
+ # This rule is language-agnostic: safe codepoints stay safe
648
+ # regardless of the file's language.
649
+ unsafe_chars = [ch for ch in matches if ord(ch) not in _SAFE_UNICODE_CODEPOINTS]
650
+ if not unsafe_chars:
651
+ continue
652
+
653
+ # Sink resolution:
654
+ # - Python + AST-parsable: use AST map (precise, few FPs).
655
+ # - Otherwise: simple textual substring match against the
656
+ # cross-language sink tables. Lines with no recognized
657
+ # output function are skipped entirely — they are not a
658
+ # crash risk even if they contain unsafe unicode (e.g.
659
+ # a JS string constant never passed to console.log).
660
+ if ast_available:
661
+ sink = sink_map.get(line_num)
662
+ if sink is None:
663
+ # Either the literal is not inside any Call, or the Call
664
+ # is a non-console helper. Suppress entirely — these are
665
+ # the dominant FP source (_append_trace, json.dumps,
666
+ # module-level constants, return values, etc.).
667
+ continue
668
+ is_stdout_sink = sink == "stdout"
669
+ is_logging_sink = sink == "logger"
670
+ else:
671
+ sink = _classify_textual_sink(line)
672
+ if sink is None:
673
+ continue
674
+ is_stdout_sink = sink == "stdout"
675
+ is_logging_sink = sink == "logger"
676
+
677
+ for ch in set(unsafe_chars):
678
+ category = _classify_char(ch)
679
+
680
+ if is_stdout_sink:
681
+ # Direct stdout/stderr — crashes cp1252 console
682
+ severity = GateSeverity.HIGH
683
+ impact = GateImpact.REVISE
684
+ sink_label = "stdout/stderr sink (print/sys.write)"
685
+ executor_action = (
686
+ "replace unicode with ASCII in print/stderr — crashes on cp1252 console"
687
+ )
688
+ elif is_logging_sink:
689
+ # Logger handles encoding internally — no crash, but stale tooling may barf
690
+ severity = GateSeverity.MEDIUM
691
+ impact = GateImpact.WARN
692
+ sink_label = "logging sink"
693
+ executor_action = (
694
+ "consider ASCII for consistency; logger doesn't crash on utf8 but stale tooling may"
695
+ )
696
+ else:
697
+ # Unreachable for Python (we `continue`d above) but kept
698
+ # for defensive completeness if future shell branch adds
699
+ # an "unknown" pathway.
700
+ severity = GateSeverity.MEDIUM
701
+ impact = GateImpact.WARN
702
+ sink_label = "code (unknown sink)"
703
+ executor_action = (
704
+ "consider ASCII for consistency; unknown whether this reaches cp1252 console"
705
+ )
706
+
707
+ findings.append(build_finding(
708
+ check_id="encoding.windows_unsafe_char",
709
+ category=GateCategory.RUNTIME_BEHAVIOR,
710
+ title=f"Windows-unsafe U+{ord(ch):04X} ({category}) in {rel_path}:{line_num}",
711
+ severity=severity,
712
+ impact=impact,
713
+ summary=(
714
+ f"Character U+{ord(ch):04X} ({category}) in {sink_label}. "
715
+ f"Windows cp1252 console will crash with UnicodeEncodeError if this reaches stdout/stderr."
716
+ ),
717
+ recommendation=(
718
+ f"Replace with ASCII equivalent in {rel_path} line {line_num}. "
719
+ f"Common fixes: em-dash->--, arrows->->, checkmark->[OK], cross->[X]"
720
+ ),
721
+ evidence=(EvidenceReference(
722
+ kind="probe",
723
+ path=rel_path,
724
+ detail=f"Character U+{ord(ch):04X} ({category}) at line {line_num}",
725
+ ok=False,
726
+ ),),
727
+ repair_kind=RepairKind.FIX_ENCODING.value,
728
+ executor_action=executor_action,
729
+ proof_required="Proper encoding",
730
+ allowlist_allowed=False,
731
+ ))
732
+
733
+ return build_check_result(
734
+ check_id="encoding_safety",
735
+ category=GateCategory.RUNTIME_BEHAVIOR,
736
+ findings=findings,
737
+ notes=[f"Scanned {len(ctx.file_snapshots)} files for Windows-unsafe characters"],
738
+ )
739
+
740
+
741
+ def _extract_call_block(lines: list[str], start_line: int, max_lines: int = 40) -> str:
742
+ """Extract argument block of a call starting at start_line (1-based).
743
+
744
+ Tracks paren depth so we stop at the matching close paren instead of
745
+ grabbing N lines and accidentally spanning into the next call.
746
+ Used as fallback when AST parsing is unavailable (syntax errors, etc.).
747
+ """
748
+ depth = 0
749
+ block: list[str] = []
750
+ for i in range(start_line - 1, min(start_line - 1 + max_lines, len(lines))):
751
+ line = lines[i]
752
+ block.append(line)
753
+ depth += line.count("(") - line.count(")")
754
+ if depth <= 0 and i > start_line - 1:
755
+ break
756
+ return " ".join(block)
757
+
758
+
759
+ def _extract_call_kwargs(file_content: str, call_lineno: int) -> set[str] | None:
760
+ """Return the set of keyword argument names for the call at call_lineno.
761
+
762
+ Uses AST so the result is exact regardless of how many lines the call
763
+ spans. Returns None when the file cannot be parsed (syntax error) so
764
+ the caller can fall back to the regex-based approach.
765
+ """
766
+ try:
767
+ tree = ast.parse(file_content)
768
+ except SyntaxError:
769
+ return None
770
+
771
+ for node in ast.walk(tree):
772
+ if isinstance(node, ast.Call) and node.lineno == call_lineno:
773
+ return {kw.arg for kw in node.keywords if kw.arg is not None}
774
+ return set()
775
+
776
+
777
+ def run_subprocess_encoding_checks(ctx: PostExecGateContext):
778
+ """Detect subprocess calls with text=True but missing encoding parameter.
779
+
780
+ On Windows, text=True without encoding= defaults to the system locale
781
+ (cp1252), crashing with UnicodeEncodeError on non-ASCII git output
782
+ (branch names, file paths, commit messages).
783
+
784
+ Fix: add encoding='utf-8', errors='replace' to every subprocess call
785
+ that uses text=True.
786
+ """
787
+ findings = []
788
+
789
+ for snapshot in iter_touched_snapshots(ctx):
790
+ if not snapshot.exists or not snapshot.text:
791
+ continue
792
+ if not is_source_file(snapshot.path):
793
+ continue
794
+
795
+ lines = snapshot.text.splitlines()
796
+
797
+ for match in _SUBPROCESS_CALL_RE.finditer(snapshot.text):
798
+ call_name = match.group(1)
799
+ line_num = snapshot.text.count("\n", 0, match.start()) + 1
800
+
801
+ kwargs = _extract_call_kwargs(snapshot.text, line_num)
802
+ if kwargs is not None:
803
+ # AST path: exact keyword extraction, no line-cap FP.
804
+ has_text_true = "text" in kwargs
805
+ has_encoding = "encoding" in kwargs
806
+ # AST gives us keyword names but not their values; we still
807
+ # need to verify text=True (not text=False). Re-check with
808
+ # regex only when text kwarg is present.
809
+ if has_text_true:
810
+ block = _extract_call_block(lines, line_num)
811
+ has_text_true = bool(re.search(r'\btext\s*=\s*True\b', block))
812
+ else:
813
+ # Fallback for files with syntax errors: regex over block.
814
+ block = _extract_call_block(lines, line_num)
815
+ has_text_true = bool(re.search(r'\btext\s*=\s*True\b', block))
816
+ has_encoding = bool(re.search(r'\bencoding\s*=', block))
817
+
818
+ if has_text_true and not has_encoding:
819
+ findings.append(build_finding(
820
+ check_id="encoding.subprocess_missing_encoding",
821
+ category=GateCategory.RUNTIME_BEHAVIOR,
822
+ title=f"subprocess.{call_name}(text=True) missing encoding= in {snapshot.path}:{line_num}",
823
+ severity=GateSeverity.HIGH,
824
+ impact=GateImpact.REVISE,
825
+ summary=(
826
+ f"subprocess.{call_name}() at line {line_num} uses text=True without encoding=. "
827
+ f"On Windows defaults to cp1252 -- crashes with UnicodeEncodeError on non-ASCII "
828
+ f"git output (branch names, file paths, commit messages)."
829
+ ),
830
+ recommendation=(
831
+ f"Add encoding='utf-8', errors='replace' to subprocess.{call_name}() "
832
+ f"in {snapshot.path} line {line_num}."
833
+ ),
834
+ evidence=[
835
+ EvidenceReference(kind="file", path=snapshot.path, detail=f"line:{line_num}"),
836
+ ],
837
+ repair_kind=RepairKind.FIX_ENCODING.value,
838
+ executor_action="Fix encoding issues",
839
+ proof_required="Proper encoding",
840
+ allowlist_allowed=False,
841
+ ))
842
+
843
+ return build_check_result(
844
+ check_id="subprocess_encoding",
845
+ category=GateCategory.RUNTIME_BEHAVIOR,
846
+ findings=findings,
847
+ )