vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
  2. vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
  3. vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
  4. vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
  5. vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
  7. vigil_forensic/__init__.py +224 -0
  8. vigil_forensic/_git_utils.py +178 -0
  9. vigil_forensic/_shared.py +510 -0
  10. vigil_forensic/_stubs.py +156 -0
  11. vigil_forensic/gate_checks/__init__.py +1 -0
  12. vigil_forensic/gate_checks/_ast_helpers.py +629 -0
  13. vigil_forensic/gate_checks/_deployment_detector.py +573 -0
  14. vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
  15. vigil_forensic/gate_checks/authority_checks.py +95 -0
  16. vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
  17. vigil_forensic/gate_checks/broad_except_checks.py +301 -0
  18. vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
  19. vigil_forensic/gate_checks/common.py +253 -0
  20. vigil_forensic/gate_checks/config_safety_checks.py +704 -0
  21. vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
  22. vigil_forensic/gate_checks/conflict_checks.py +193 -0
  23. vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
  24. vigil_forensic/gate_checks/context_health_checks.py +289 -0
  25. vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
  26. vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
  27. vigil_forensic/gate_checks/duplication_checks.py +387 -0
  28. vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
  29. vigil_forensic/gate_checks/empty_output_checks.py +87 -0
  30. vigil_forensic/gate_checks/encoding_checks.py +847 -0
  31. vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
  32. vigil_forensic/gate_checks/fallback_checks.py +41 -0
  33. vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
  34. vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
  35. vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
  36. vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
  37. vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
  38. vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
  39. vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
  40. vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
  41. vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
  42. vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
  43. vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
  44. vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
  45. vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
  46. vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
  47. vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
  48. vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
  49. vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
  50. vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
  51. vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
  52. vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
  53. vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
  54. vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
  55. vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
  56. vigil_forensic/gate_checks/hallucination_checks.py +566 -0
  57. vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
  58. vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
  59. vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
  60. vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
  61. vigil_forensic/gate_checks/ml_checks.py +318 -0
  62. vigil_forensic/gate_checks/performance_checks.py +106 -0
  63. vigil_forensic/gate_checks/project_specific_runner.py +691 -0
  64. vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
  65. vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
  66. vigil_forensic/gate_checks/reliability_checks.py +389 -0
  67. vigil_forensic/gate_checks/reporting_checks.py +55 -0
  68. vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
  69. vigil_forensic/gate_checks/security_injection_checks.py +332 -0
  70. vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
  71. vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
  72. vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
  73. vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
  74. vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
  75. vigil_forensic/gate_checks/test_quality_checks.py +946 -0
  76. vigil_forensic/gate_checks/testing_checks.py +149 -0
  77. vigil_forensic/gate_checks/toctou_checks.py +367 -0
  78. vigil_forensic/gate_checks/type_checking_checks.py +316 -0
  79. vigil_forensic/gate_models.py +392 -0
  80. vigil_forensic/gate_packs/__init__.py +1 -0
  81. vigil_forensic/gate_packs/universal.py +179 -0
  82. vigil_forensic/gate_profile.json +31 -0
  83. vigil_forensic/gate_registry.py +21 -0
  84. vigil_forensic/language_profiles.py +219 -0
  85. vigil_forensic/meta_findings.py +207 -0
  86. vigil_forensic/self_audit.py +725 -0
  87. vigil_forensic/source_analysis.py +175 -0
  88. vigil_mapper/__init__.py +103 -0
  89. vigil_mapper/_ast_helpers_minimal.py +229 -0
  90. vigil_mapper/_extract_imports_impl.py +123 -0
  91. vigil_mapper/_file_count_guard.py +129 -0
  92. vigil_mapper/_git_utils.py +178 -0
  93. vigil_mapper/_runtime_ast.py +438 -0
  94. vigil_mapper/_runtime_dispatch.py +137 -0
  95. vigil_mapper/_seed_helpers.py +82 -0
  96. vigil_mapper/authority_builder.py +1102 -0
  97. vigil_mapper/cli_entry.py +731 -0
  98. vigil_mapper/conflict_builder.py +818 -0
  99. vigil_mapper/data_contract_builder.py +446 -0
  100. vigil_mapper/findings_builder.py +716 -0
  101. vigil_mapper/fingerprint.py +53 -0
  102. vigil_mapper/hotspot_builder.py +539 -0
  103. vigil_mapper/map_common.py +449 -0
  104. vigil_mapper/map_errors.py +55 -0
  105. vigil_mapper/map_models.py +431 -0
  106. vigil_mapper/map_models_ext.py +206 -0
  107. vigil_mapper/map_models_findings.py +130 -0
  108. vigil_mapper/map_storage.py +455 -0
  109. vigil_mapper/parse_cache.py +795 -0
  110. vigil_mapper/refactor_boundary_builder.py +266 -0
  111. vigil_mapper/runtime_builder.py +527 -0
  112. vigil_mapper/runtime_tracer.py +243 -0
  113. vigil_mapper/runtime_tracer_entry.py +199 -0
  114. vigil_mapper/semantic_diff.py +71 -0
  115. vigil_mapper/source_adapters/__init__.py +109 -0
  116. vigil_mapper/source_adapters/_base.py +264 -0
  117. vigil_mapper/source_adapters/_ir.py +156 -0
  118. vigil_mapper/source_adapters/_lexer.py +309 -0
  119. vigil_mapper/source_adapters/_patterns.py +212 -0
  120. vigil_mapper/source_adapters/_treesitter.py +182 -0
  121. vigil_mapper/source_adapters/go.py +553 -0
  122. vigil_mapper/source_adapters/java.py +541 -0
  123. vigil_mapper/source_adapters/javascript.py +626 -0
  124. vigil_mapper/source_adapters/python.py +325 -0
  125. vigil_mapper/source_adapters/typescript.py +749 -0
  126. vigil_mapper/structural_builder.py +586 -0
  127. vigil_mcp/__init__.py +1 -0
  128. vigil_mcp/_jobs.py +587 -0
  129. vigil_mcp/_paths.py +93 -0
  130. vigil_mcp/forensic_server.py +419 -0
  131. vigil_mcp/map_server.py +452 -0
@@ -0,0 +1,1143 @@
1
+ """Atomic-write safety detector (Finding G.2 plan v7; FP reduction F.9b, F.10, F.16a).
2
+
3
+ Detects write_text / write_bytes / open(..., "w"|"wb"|"a"|"ab") calls in a
4
+ function body that lack a surrounding tmpfile+rename atomic pattern.
5
+
6
+ A write is considered safe when the same function body contains:
7
+ - a Path.replace or os.replace / os.rename call, AND
8
+ - at least one reference to a .tmp suffix path (name ending in ".tmp" or
9
+ variable name containing "tmp").
10
+
11
+ Sprint B3 (2026-04-23) — ArtifactRoleMap migration (non-destructive):
12
+
13
+ When ``ctx.project_context.artifact_roles`` is available, each write site
14
+ receives a role classification from ``SYSTEM.shared_helpers.artifact_role``
15
+ and the gate derives tri-state applicability via ``_applicability_for_role``:
16
+
17
+ shared_state / config / manifest → applicable (atomicity matters)
18
+ per_run_output / temp / log → not_applicable (structurally irrelevant)
19
+ cache / unknown → unknown (reviewer judges)
20
+
21
+ Confidence is adjusted by risk signals that do NOT gate applicability:
22
+ * cross-file read-back → +0.10 (real race window)
23
+ * same-file-only read-back → -0.10 (single-process round-trip)
24
+ * enclosing function is init → -0.15 (one-time execution)
25
+
26
+ When ``artifact_roles`` is None (pre-Sprint-B3 callers), the legacy
27
+ classification path is preserved — same behaviour as F.9b/F.10/F.16a.
28
+
29
+ Legacy heuristics (read-back manifest, state-file variable names, per-call
30
+ unique markers, init-path downgrade) are preserved INSIDE ArtifactRoleMap
31
+ (where appropriate) AND as the legacy fallback path. Risk signals
32
+ (read-back, init-path) stay visible in the gate as confidence adjusters —
33
+ never collapsed into applicability.
34
+
35
+ Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
36
+ """
37
+ from __future__ import annotations
38
+
39
+ import ast
40
+ import logging
41
+ from pathlib import Path
42
+ from typing import Iterable
43
+
44
+ from vigil_forensic._shared import EvidenceReference, GateCategory, GateImpact, GateSeverity, RepairKind
45
+ from vigil_forensic.gate_models import PostExecGateContext
46
+ from ..source_analysis import is_source_file
47
+ from .common import build_check_result, build_finding, normalize_path
48
+ from ._ast_helpers import parse_python_source_or_emit_finding
49
+
50
+ _log = logging.getLogger(__name__)
51
+
52
+ # Method names that represent a direct write without explicit tmpfile routing
53
+ WRITE_METHODS = frozenset({
54
+ "write_text",
55
+ "write_bytes",
56
+ })
57
+
58
+ # open() mode strings that indicate a write (not read-only)
59
+ WRITE_MODES = frozenset({"w", "wb", "a", "ab", "w+", "wb+", "a+", "ab+"})
60
+
61
+ # open() mode strings that indicate a read (used by read-manifest builder)
62
+ READ_MODES = frozenset({"r", "rb", "rt", "r+", "rb+"})
63
+
64
+ # Method / function names that indicate an atomic rename/replace finalisation
65
+ REPLACE_FUNCS = frozenset({
66
+ "replace",
67
+ "rename",
68
+ })
69
+
70
+ # os-module names that expose rename/replace as free functions
71
+ OS_REPLACE_NAMES = frozenset({
72
+ "os.replace",
73
+ "os.rename",
74
+ "os.renames",
75
+ "shutil.move",
76
+ })
77
+
78
+ # Path-method names that indicate a file read (used by read-manifest builder)
79
+ READ_METHODS = frozenset({
80
+ "read_text",
81
+ "read_bytes",
82
+ })
83
+
84
+ # Function-name patterns that identify single-writer init/bootstrap paths
85
+ # (Part 3: downgrade MEDIUM → LOW because there's only one writer).
86
+ _INIT_PATH_PREFIXES = ("_init_", "configure_", "setup_", "ensure_")
87
+ _INIT_PATH_EXACT = frozenset({"_bootstrap", "bootstrap", "_configure", "_setup"})
88
+
89
+ # F9b-tighten (2026-04-23): persistence-naming prefixes — when a non-literal
90
+ # write occurs inside a function whose name starts with one of these, we treat
91
+ # it as a probable persistence site (manual review needed rather than silent
92
+ # drop). See Fix A in task tracker 2026-04-23.
93
+ _PERSISTENCE_PREFIXES = (
94
+ "save_", "persist_", "write_", "dump_", "flush_", "store_",
95
+ "commit_", "sync_",
96
+ )
97
+
98
+ # F.16a (2026-04-23): Names / call-targets that prove the write-target path is
99
+ # per-call-unique. When any of these appears in the path-derivation AST, the
100
+ # finding is suppressed (no race possible — every invocation writes to a
101
+ # distinct filename).
102
+ #
103
+ # Two detection sources:
104
+ # * _UNIQUE_CALL_NAMES: dotted/bare call names like ``time.time``, ``uuid4``.
105
+ # * _UNIQUE_VAR_NAMES : variable names that hold a per-call-unique token,
106
+ # typically substituted into an f-string/format.
107
+
108
+ _UNIQUE_CALL_NAMES = frozenset({
109
+ # time-based
110
+ "time", # time.time() → Attribute call; attr == "time"
111
+ "time_ns",
112
+ "monotonic",
113
+ "monotonic_ns",
114
+ "perf_counter",
115
+ "perf_counter_ns",
116
+ "now", # datetime.now / datetime.utcnow consumer
117
+ "utcnow",
118
+ "today",
119
+ # uuid-based
120
+ "uuid1",
121
+ "uuid3",
122
+ "uuid4",
123
+ "uuid5",
124
+ "token_hex", # secrets.token_hex
125
+ "token_urlsafe",
126
+ "token_bytes",
127
+ # random/sequence
128
+ "randbits",
129
+ "getrandbits",
130
+ "next", # next(counter) — iterator sequence
131
+ })
132
+
133
+ _UNIQUE_VAR_NAMES = frozenset({
134
+ "session_num",
135
+ "session_number",
136
+ "attempt_id",
137
+ "attempt_num",
138
+ "attempt",
139
+ "iteration",
140
+ "iter_num",
141
+ "timestamp",
142
+ "ts",
143
+ "ts_ms",
144
+ "ts_ns",
145
+ "now",
146
+ "now_ts",
147
+ "nonce",
148
+ "request_id",
149
+ "run_id",
150
+ "correlation_id",
151
+ "trace_id",
152
+ "pid",
153
+ })
154
+
155
+ # F.16a (2026-04-23): Variable-name hints that suggest a state file (canonical
156
+ # write target with a fixed path). When a non-literal write target binds to a
157
+ # variable matching these, we keep the finding at MEDIUM even without a
158
+ # persistence-named enclosing function.
159
+
160
+ _STATE_FILE_NAME_FRAGMENTS = (
161
+ "state_path",
162
+ "_state_path",
163
+ "state_file",
164
+ "_state_file",
165
+ "config_path",
166
+ "_config_path",
167
+ "config_file",
168
+ "_config_file",
169
+ "cache_path",
170
+ "_cache_path",
171
+ "cache_file",
172
+ "_cache_file",
173
+ "lock_path",
174
+ "_lock_path",
175
+ "lock_file",
176
+ "_lock_file",
177
+ "ledger_path",
178
+ "ledger_file",
179
+ "session_store",
180
+ "db_path",
181
+ "manifest_path",
182
+ "manifest_file",
183
+ "status_path",
184
+ "status_file",
185
+ "active_path",
186
+ "active_file",
187
+ "answer_file",
188
+ "heartbeat_path",
189
+ "heartbeat_file",
190
+ "sidecar",
191
+ )
192
+
193
+
194
+ # ---------------------------------------------------------------------------
195
+ # Path classifiers (Part 1)
196
+ # ---------------------------------------------------------------------------
197
+
198
+ def _is_test_or_libs_path(path: str, ctx: object = None) -> bool:
199
+ """True if *path* should be skipped as a test-fixture or vendored-libs file.
200
+
201
+ Sprint C2 (2026-04-23): prefers ``ctx.project_context.test_topology.
202
+ is_test_path(rel_path)`` when a ``PostExecGateContext`` is threaded
203
+ through. Legacy path-fragment check preserved as fallback so callers
204
+ that don't have a ctx (older tests, direct invocations) continue to
205
+ work exactly as before.
206
+
207
+ Matches (legacy fallback):
208
+ - anything under SYSTEM/dev/tests/
209
+ - anything containing /tests/ as a path component
210
+ - anything under SYSTEM/libs/ (vendored deps)
211
+ """
212
+ p = (path or "").replace("\\", "/")
213
+ # SYSTEM/libs is always skipped regardless of topology — vendored deps
214
+ # are never inside the project's test surface but also never inside the
215
+ # code surface the gate cares about.
216
+ if p.startswith("SYSTEM/libs/") or "/SYSTEM/libs/" in p:
217
+ return True
218
+ topology = getattr(getattr(ctx, "project_context", None), "test_topology", None)
219
+ if topology is not None:
220
+ if topology.is_test_path(p):
221
+ return True
222
+ # Topology says "not a test" — trust it. Don't fall back to the
223
+ # basename check because that would re-introduce the false positive
224
+ # topology is designed to eliminate.
225
+ return False
226
+ # Legacy fallback: no ctx / no topology available.
227
+ if p.startswith("SYSTEM/dev/tests/"):
228
+ return True
229
+ if "/tests/" in p or p.startswith("tests/"):
230
+ return True
231
+ return False
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # AST helpers
236
+ # ---------------------------------------------------------------------------
237
+
238
+ def _get_call_name(node: ast.Call) -> str | None:
239
+ """Return the bare method/function name from a Call node."""
240
+ if isinstance(node.func, ast.Attribute):
241
+ return node.func.attr
242
+ if isinstance(node.func, ast.Name):
243
+ return node.func.id
244
+ return None
245
+
246
+
247
+ def _get_dotted_name(node: ast.Call) -> str | None:
248
+ """Return 'module.attr' for calls like os.replace(...)."""
249
+ if isinstance(node.func, ast.Attribute):
250
+ if isinstance(node.func.value, ast.Name):
251
+ return f"{node.func.value.id}.{node.func.attr}"
252
+ return None
253
+
254
+
255
+ def _get_literal_path_arg(node: ast.Call) -> str | None:
256
+ """Return the string literal that names the file path, or None.
257
+
258
+ Dispatch by call form:
259
+ - ``open(path, mode)`` → literal path is ``node.args[0]``
260
+ - ``x.write_text(content)`` → literal path is the **receiver** (node.func.value)
261
+ - ``x.write_bytes(content)`` / ``x.read_text()`` / ``x.read_bytes()`` → same
262
+ - ``Path("lit").write_text(...)`` → receiver is a Call; unwrap Path(...)
263
+ """
264
+ name = _get_call_name(node)
265
+
266
+ # open(path, mode) — literal path is args[0]
267
+ if name == "open":
268
+ if node.args:
269
+ first = node.args[0]
270
+ if isinstance(first, ast.Constant) and isinstance(first.value, str):
271
+ return first.value
272
+ return None
273
+
274
+ # Method-style path ops: write_text / write_bytes / read_text / read_bytes
275
+ # The path is the receiver, not args[0] (which is content / encoding / etc).
276
+ if isinstance(node.func, ast.Attribute):
277
+ receiver = node.func.value
278
+ # Case 1: receiver itself is a literal string, e.g. "path/to/file".read_text()
279
+ # (rare in practice but handle defensively)
280
+ if isinstance(receiver, ast.Constant) and isinstance(receiver.value, str):
281
+ return receiver.value
282
+ # Case 2: Path("lit").write_text(...) / Path("lit").read_text()
283
+ if isinstance(receiver, ast.Call):
284
+ callee = _get_call_name(receiver)
285
+ if callee == "Path" and receiver.args:
286
+ inner = receiver.args[0]
287
+ if isinstance(inner, ast.Constant) and isinstance(inner.value, str):
288
+ return inner.value
289
+ return None
290
+
291
+
292
+ def _is_write_call(node: ast.Call) -> tuple[bool, int]:
293
+ """Return (True, lineno) if the call is a bare write_text/write_bytes."""
294
+ name = _get_call_name(node)
295
+ if name in WRITE_METHODS:
296
+ return True, node.lineno
297
+ return False, 0
298
+
299
+
300
+ def _is_open_write(node: ast.Call) -> tuple[bool, int]:
301
+ """Return (True, lineno) if open() is called with a write mode.
302
+
303
+ Handles positional mode arg and keyword mode= arg.
304
+ """
305
+ name = _get_call_name(node)
306
+ if name != "open":
307
+ return False, 0
308
+
309
+ # positional: open(path, "w")
310
+ if len(node.args) >= 2:
311
+ mode_arg = node.args[1]
312
+ if isinstance(mode_arg, ast.Constant) and mode_arg.value in WRITE_MODES:
313
+ return True, node.lineno
314
+
315
+ # keyword: open(path, mode="w")
316
+ for kw in node.keywords:
317
+ if kw.arg == "mode" and isinstance(kw.value, ast.Constant):
318
+ if kw.value.value in WRITE_MODES:
319
+ return True, node.lineno
320
+
321
+ return False, 0
322
+
323
+
324
+ def _has_atomic_pattern(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
325
+ """Return True if the function body contains a replace/rename call AND
326
+ a reference to a '.tmp'-suffix string or 'tmp'-containing variable name.
327
+ """
328
+ has_replace = False
329
+ has_tmp_ref = False
330
+
331
+ for node in ast.walk(func_node):
332
+ if isinstance(node, ast.Call):
333
+ # Method-style: x.replace(...) / x.rename(...)
334
+ if _get_call_name(node) in REPLACE_FUNCS:
335
+ has_replace = True
336
+ # Free-function style: os.replace(...) / os.rename(...)
337
+ dotted = _get_dotted_name(node)
338
+ if dotted and dotted in OS_REPLACE_NAMES:
339
+ has_replace = True
340
+
341
+ # String literal containing ".tmp"
342
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
343
+ if ".tmp" in node.value:
344
+ has_tmp_ref = True
345
+
346
+ # Variable / attribute name containing "tmp"
347
+ if isinstance(node, ast.Name) and "tmp" in node.id.lower():
348
+ has_tmp_ref = True
349
+ if isinstance(node, ast.Attribute) and "tmp" in node.attr.lower():
350
+ has_tmp_ref = True
351
+
352
+ return has_replace and has_tmp_ref
353
+
354
+
355
+ # ---------------------------------------------------------------------------
356
+ # Read-source manifest (Part 2)
357
+ # ---------------------------------------------------------------------------
358
+
359
+ def _collect_read_literals(tree: ast.AST) -> set[str]:
360
+ """Return the set of string literals that appear as read-source targets.
361
+
362
+ Recognised patterns (each yields the literal string argument):
363
+ - open("lit") # default mode → read
364
+ - open("lit", "r" / "rb" / ...)
365
+ - open("lit", mode="r")
366
+ - Path("lit").read_text() / .read_bytes()
367
+ - json.load(open("lit")) # indirect, handled by open() above
368
+ - json.loads(Path("lit").read_text()) # handled by read_text above
369
+ - x.read_text() / .read_bytes() where receiver is Path("lit")
370
+ """
371
+ literals: set[str] = set()
372
+
373
+ for node in ast.walk(tree):
374
+ if not isinstance(node, ast.Call):
375
+ continue
376
+
377
+ name = _get_call_name(node)
378
+
379
+ # open(path, mode) — classify mode
380
+ if name == "open" and node.args:
381
+ first = node.args[0]
382
+ if not (isinstance(first, ast.Constant) and isinstance(first.value, str)):
383
+ continue
384
+ mode: str | None = None
385
+ if len(node.args) >= 2:
386
+ mode_arg = node.args[1]
387
+ if isinstance(mode_arg, ast.Constant) and isinstance(mode_arg.value, str):
388
+ mode = mode_arg.value
389
+ for kw in node.keywords:
390
+ if kw.arg == "mode" and isinstance(kw.value, ast.Constant):
391
+ if isinstance(kw.value.value, str):
392
+ mode = kw.value.value
393
+ # No mode → default "r" (read). Explicit read mode → read.
394
+ if mode is None or mode in READ_MODES:
395
+ literals.add(first.value)
396
+ continue
397
+
398
+ # *.read_text() / *.read_bytes()
399
+ if name in READ_METHODS:
400
+ lit = _get_literal_path_arg(node)
401
+ if lit is not None:
402
+ literals.add(lit)
403
+
404
+ return literals
405
+
406
+
407
+ def _build_read_manifest(
408
+ project_dir: Path,
409
+ changed_files: Iterable[str],
410
+ file_snapshots: dict,
411
+ ) -> dict[str, set[str]]:
412
+ """Build a dict: literal_path_string -> set of file paths that read it.
413
+
414
+ Sources:
415
+ - ``file_snapshots`` when available (already-loaded text).
416
+ - Falls back to reading the file from disk.
417
+
418
+ Fail-open: per-file parse/IO errors logged DEBUG, skipped.
419
+ """
420
+ manifest: dict[str, set[str]] = {}
421
+
422
+ for raw_path in changed_files:
423
+ normalized = normalize_path(raw_path)
424
+ if not is_source_file(normalized):
425
+ continue
426
+
427
+ src: str | None = None
428
+ snap = file_snapshots.get(normalized) if file_snapshots else None
429
+ if snap is not None and getattr(snap, "text", None):
430
+ src = snap.text
431
+ else:
432
+ try:
433
+ src = (project_dir / normalized).read_text(encoding="utf-8")
434
+ except (OSError, UnicodeDecodeError) as exc:
435
+ _log.debug("atomic_write: read-manifest IO for %s: %s", normalized, exc)
436
+ continue
437
+
438
+ try:
439
+ tree = ast.parse(src)
440
+ except SyntaxError as exc:
441
+ _log.debug("atomic_write: read-manifest parse %s: %s", normalized, exc)
442
+ continue
443
+
444
+ for lit in _collect_read_literals(tree):
445
+ manifest.setdefault(lit, set()).add(normalized)
446
+
447
+ return manifest
448
+
449
+
450
+ # ---------------------------------------------------------------------------
451
+ # Write collection
452
+ # ---------------------------------------------------------------------------
453
+
454
+ def _is_init_path_function(func_name: str) -> bool:
455
+ """True if *func_name* matches the single-writer init/bootstrap heuristic."""
456
+ if not func_name:
457
+ return False
458
+ if func_name in _INIT_PATH_EXACT:
459
+ return True
460
+ return any(func_name.startswith(prefix) for prefix in _INIT_PATH_PREFIXES)
461
+
462
+
463
+ def _is_persistence_named_function(func_name: str) -> bool:
464
+ """F9b-tighten: True if *func_name* looks like a persistence sink.
465
+
466
+ Non-literal writes inside such functions are NOT silently dropped — we
467
+ emit a MEDIUM finding with "manual review needed" note. This catches
468
+ blind-spot A: variable write targets (e.g. `artifact_path.write_text(...)`)
469
+ that previously bypassed the read-back manifest check entirely.
470
+ """
471
+ if not func_name:
472
+ return False
473
+ return any(func_name.startswith(prefix) for prefix in _PERSISTENCE_PREFIXES)
474
+
475
+
476
+ def _collect_unsafe_writes(
477
+ func_node: ast.FunctionDef | ast.AsyncFunctionDef,
478
+ file_path: str,
479
+ ) -> list[dict]:
480
+ """Return raw finding dicts for each unguarded write in the function.
481
+
482
+ Each dict includes:
483
+ - file, write_func, line (as before)
484
+ - func_name: enclosing function name (for Part 3 init-path heuristic)
485
+ - write_literal: the string literal written to, if any (for Part 2)
486
+ - target_expr: AST node for the write-target expression (F.16a path-hint)
487
+ - target_var_name: bare variable/attribute name bound to the target
488
+ (F.16a state-file-name heuristic)
489
+ - func_node: reference to the enclosing function (F.16a assignment walk)
490
+ """
491
+ raw: list[dict] = []
492
+
493
+ # If the function already has the atomic pattern, none of its writes are flagged
494
+ if _has_atomic_pattern(func_node):
495
+ return raw
496
+
497
+ func_name = getattr(func_node, "name", "") or ""
498
+
499
+ for node in ast.walk(func_node):
500
+ if not isinstance(node, ast.Call):
501
+ continue
502
+
503
+ flagged, lineno = _is_write_call(node)
504
+ if flagged:
505
+ target_expr = _extract_write_target_expr(node)
506
+ raw.append({
507
+ "file": file_path,
508
+ "write_func": _get_call_name(node),
509
+ "line": lineno,
510
+ "func_name": func_name,
511
+ "write_literal": _get_literal_path_arg(node),
512
+ "target_expr": target_expr,
513
+ "target_var_name": _extract_target_var_name(target_expr),
514
+ "func_node": func_node,
515
+ })
516
+ continue
517
+
518
+ flagged, lineno = _is_open_write(node)
519
+ if flagged:
520
+ # Determine the mode string for the finding message
521
+ mode = "w"
522
+ if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant):
523
+ mode = node.args[1].value
524
+ for kw in node.keywords:
525
+ if kw.arg == "mode" and isinstance(kw.value, ast.Constant):
526
+ mode = kw.value.value
527
+ target_expr = _extract_write_target_expr(node)
528
+ raw.append({
529
+ "file": file_path,
530
+ "write_func": f'open(..., "{mode}")',
531
+ "line": lineno,
532
+ "func_name": func_name,
533
+ "write_literal": _get_literal_path_arg(node),
534
+ "target_expr": target_expr,
535
+ "target_var_name": _extract_target_var_name(target_expr),
536
+ "func_node": func_node,
537
+ })
538
+
539
+ return raw
540
+
541
+
542
+ # ---------------------------------------------------------------------------
543
+ # F.16a: per-call-unique path detection (path-hint heuristic)
544
+ # ---------------------------------------------------------------------------
545
+
546
+ def _expr_contains_unique_marker(expr: ast.AST | None) -> bool:
547
+ """Return True if *expr* (or any subtree) contains a per-call-unique marker.
548
+
549
+ Recognised markers:
550
+ * Call to a function whose bare or dotted name ends in one of
551
+ ``_UNIQUE_CALL_NAMES`` (``time.time``, ``uuid.uuid4``,
552
+ ``datetime.now`` …).
553
+ * Reference to a Name whose id is in ``_UNIQUE_VAR_NAMES``
554
+ (``session_num``, ``attempt_id``, ``timestamp`` …). This catches
555
+ f-strings like ``f"audit_{timestamp}.json"`` and format args like
556
+ ``f"session_{session_num:03d}.json"``.
557
+
558
+ Safe for any expression node; returns False on None.
559
+ """
560
+ if expr is None:
561
+ return False
562
+ for node in ast.walk(expr):
563
+ if isinstance(node, ast.Call):
564
+ name = _get_call_name(node)
565
+ if name and name in _UNIQUE_CALL_NAMES:
566
+ return True
567
+ elif isinstance(node, ast.Name):
568
+ if node.id in _UNIQUE_VAR_NAMES:
569
+ return True
570
+ elif isinstance(node, ast.Attribute):
571
+ # e.g. self.session_num — treat attribute suffix as a name hint.
572
+ if node.attr in _UNIQUE_VAR_NAMES:
573
+ return True
574
+ return False
575
+
576
+
577
+ def _var_name_suggests_state_file(var_name: str | None) -> bool:
578
+ """True if *var_name* suggests a canonical state-file path.
579
+
580
+ Matches whole-name or suffix fragments in ``_STATE_FILE_NAME_FRAGMENTS``.
581
+ """
582
+ if not var_name:
583
+ return False
584
+ lowered = var_name.lower()
585
+ for frag in _STATE_FILE_NAME_FRAGMENTS:
586
+ if lowered == frag or lowered.endswith("_" + frag) or lowered.endswith(frag):
587
+ return True
588
+ return False
589
+
590
+
591
+ def _extract_write_target_expr(call_node: ast.Call) -> ast.AST | None:
592
+ """Return the AST expression that evaluates to the write target path.
593
+
594
+ For ``path.write_text(...)`` / ``path.write_bytes(...)``: the *receiver*
595
+ (``call_node.func.value``) — e.g. a Name ``path``, a BinOp
596
+ ``gates_dir / "status.json"``, or a Call ``Path("...")``.
597
+
598
+ For ``open(path, "w")``: the first positional arg.
599
+
600
+ Returns None when the target shape is unexpected.
601
+ """
602
+ name = _get_call_name(call_node)
603
+ if name == "open":
604
+ if call_node.args:
605
+ return call_node.args[0]
606
+ return None
607
+ if isinstance(call_node.func, ast.Attribute) and name in WRITE_METHODS:
608
+ return call_node.func.value
609
+ return None
610
+
611
+
612
+ def _extract_target_var_name(target_expr: ast.AST | None) -> str | None:
613
+ """Return the binding variable name for a write-target expression, or None.
614
+
615
+ Handles:
616
+ * Name → id
617
+ * Attribute (self.foo_path) → attr
618
+ * BinOp / Call (inline) → None (no single binding name)
619
+ """
620
+ if isinstance(target_expr, ast.Name):
621
+ return target_expr.id
622
+ if isinstance(target_expr, ast.Attribute):
623
+ return target_expr.attr
624
+ return None
625
+
626
+
627
+ def _find_assignment_rhs(
628
+ func_node: ast.FunctionDef | ast.AsyncFunctionDef,
629
+ var_name: str,
630
+ before_lineno: int,
631
+ ) -> ast.AST | None:
632
+ """Return the RHS of the most recent assignment to *var_name* before *before_lineno*.
633
+
634
+ Walks the function body in source order. Handles:
635
+ * ``x = expr`` (ast.Assign with Name target)
636
+ * ``x: T = expr`` (ast.AnnAssign)
637
+ * ``self.x = expr`` (Attribute target, matched by attr name)
638
+
639
+ Returns the last-before-lineno matching RHS AST node, or None if no
640
+ assignment found. Used by the per-call-unique heuristic to analyse how
641
+ the write-target variable was derived.
642
+ """
643
+ last_rhs: ast.AST | None = None
644
+ for node in ast.walk(func_node):
645
+ if isinstance(node, ast.Assign):
646
+ if node.lineno >= before_lineno:
647
+ continue
648
+ for tgt in node.targets:
649
+ if isinstance(tgt, ast.Name) and tgt.id == var_name:
650
+ last_rhs = node.value
651
+ break
652
+ if isinstance(tgt, ast.Attribute) and tgt.attr == var_name:
653
+ last_rhs = node.value
654
+ break
655
+ elif isinstance(node, ast.AnnAssign):
656
+ if node.lineno >= before_lineno or node.value is None:
657
+ continue
658
+ tgt = node.target
659
+ if isinstance(tgt, ast.Name) and tgt.id == var_name:
660
+ last_rhs = node.value
661
+ elif isinstance(tgt, ast.Attribute) and tgt.attr == var_name:
662
+ last_rhs = node.value
663
+ return last_rhs
664
+
665
+
666
+ def _collect_tempdir_bindings(
667
+ func_node: ast.FunctionDef | ast.AsyncFunctionDef,
668
+ ) -> set[str]:
669
+ """Return the set of variable names bound to tempfile.* results in the function.
670
+
671
+ Captures:
672
+ * ``x = tempfile.mkdtemp(...)`` / ``tempfile.mkstemp(...)``
673
+ * ``with tempfile.TemporaryDirectory(...) as x:``
674
+ * ``x = Path(tempfile.mkdtemp(...))`` — Path() wrapper unwrapped
675
+
676
+ These bindings hold filesystem paths whose leaf component is guaranteed
677
+ unique by the OS, so any write derived from them is per-call-unique and
678
+ concurrency-safe (no concurrent process can guess the name).
679
+ """
680
+ tempdir_names: set[str] = set()
681
+
682
+ def _call_is_tempfile(call: ast.Call) -> bool:
683
+ # tempfile.mkdtemp / tempfile.mkstemp / tempfile.TemporaryDirectory
684
+ if isinstance(call.func, ast.Attribute) and isinstance(call.func.value, ast.Name):
685
+ if call.func.value.id == "tempfile" and call.func.attr in {
686
+ "mkdtemp", "mkstemp", "TemporaryDirectory", "NamedTemporaryFile",
687
+ }:
688
+ return True
689
+ # Bare imports: mkdtemp(...), TemporaryDirectory(...)
690
+ if isinstance(call.func, ast.Name) and call.func.id in {
691
+ "mkdtemp", "mkstemp", "TemporaryDirectory", "NamedTemporaryFile",
692
+ }:
693
+ return True
694
+ # Path(tempfile.mkdtemp(...)) — unwrap
695
+ if isinstance(call.func, ast.Name) and call.func.id == "Path" and call.args:
696
+ inner = call.args[0]
697
+ if isinstance(inner, ast.Call) and _call_is_tempfile(inner):
698
+ return True
699
+ return False
700
+
701
+ for node in ast.walk(func_node):
702
+ if isinstance(node, ast.Assign):
703
+ if isinstance(node.value, ast.Call) and _call_is_tempfile(node.value):
704
+ for tgt in node.targets:
705
+ if isinstance(tgt, ast.Name):
706
+ tempdir_names.add(tgt.id)
707
+ elif isinstance(node, ast.AnnAssign):
708
+ if (
709
+ node.value is not None
710
+ and isinstance(node.value, ast.Call)
711
+ and _call_is_tempfile(node.value)
712
+ and isinstance(node.target, ast.Name)
713
+ ):
714
+ tempdir_names.add(node.target.id)
715
+ elif isinstance(node, (ast.With, ast.AsyncWith)):
716
+ for item in node.items:
717
+ if (
718
+ isinstance(item.context_expr, ast.Call)
719
+ and _call_is_tempfile(item.context_expr)
720
+ and isinstance(item.optional_vars, ast.Name)
721
+ ):
722
+ tempdir_names.add(item.optional_vars.id)
723
+
724
+ # Transitive closure: pick up re-bindings whose RHS references a tempdir
725
+ # name already in the set, e.g. ``temp_root = Path(temp_dir)`` or
726
+ # ``tmp_proj = Path(td)``. We iterate to a fixed point (bounded: the
727
+ # graph is finite and strictly growing).
728
+ for _ in range(8): # defensive cap; realistic chains are depth 1-2.
729
+ grew = False
730
+ for node in ast.walk(func_node):
731
+ if isinstance(node, ast.Assign):
732
+ if _expr_references_any_name(node.value, tempdir_names):
733
+ for tgt in node.targets:
734
+ if isinstance(tgt, ast.Name) and tgt.id not in tempdir_names:
735
+ tempdir_names.add(tgt.id)
736
+ grew = True
737
+ elif isinstance(node, ast.AnnAssign) and node.value is not None:
738
+ if (
739
+ _expr_references_any_name(node.value, tempdir_names)
740
+ and isinstance(node.target, ast.Name)
741
+ and node.target.id not in tempdir_names
742
+ ):
743
+ tempdir_names.add(node.target.id)
744
+ grew = True
745
+ if not grew:
746
+ break
747
+ return tempdir_names
748
+
749
+
750
+ def _expr_references_any_name(expr: ast.AST | None, names: set[str]) -> bool:
751
+ """True if *expr* (or any subtree) contains a Name reference in *names*."""
752
+ if expr is None or not names:
753
+ return False
754
+ for node in ast.walk(expr):
755
+ if isinstance(node, ast.Name) and node.id in names:
756
+ return True
757
+ return False
758
+
759
+
760
+ def _path_is_per_call_unique(
761
+ target_expr: ast.AST | None,
762
+ func_node: ast.FunctionDef | ast.AsyncFunctionDef | None,
763
+ ) -> bool:
764
+ """Return True if the write-target path is provably per-call-unique.
765
+
766
+ Strategy:
767
+ 1. Inspect *target_expr* itself (e.g. inline
768
+ ``(gates_dir / f"audit_{time.time()}.json")``).
769
+ 2. If the target is a bare variable binding, walk back to its most
770
+ recent assignment RHS and inspect that.
771
+ 3. Tempdir chain: if the write-target expression references any
772
+ variable bound to a ``tempfile.*`` result (``mkdtemp``,
773
+ ``TemporaryDirectory`` …), the path is OS-guaranteed unique and
774
+ single-writer — suppress.
775
+
776
+ A positive hit means every invocation of the enclosing function writes to
777
+ a distinct filename, which makes a crash-torn partial write unobservable
778
+ by any reader (the reader needs the exact same filename and the writer
779
+ never reuses one). The finding is suppressed.
780
+ """
781
+ if target_expr is None:
782
+ return False
783
+
784
+ # Step 1 — check the expression itself.
785
+ if _expr_contains_unique_marker(target_expr):
786
+ return True
787
+
788
+ # Step 2 — if the expression is a Name / Attribute, walk back to the
789
+ # binding's assignment RHS and check there.
790
+ var_name = _extract_target_var_name(target_expr)
791
+ if var_name and func_node is not None:
792
+ rhs = _find_assignment_rhs(
793
+ func_node,
794
+ var_name,
795
+ before_lineno=getattr(target_expr, "lineno", 10 ** 9),
796
+ )
797
+ if _expr_contains_unique_marker(rhs):
798
+ return True
799
+ # Also check if the RHS itself references a tempdir binding — e.g.
800
+ # ``schema_path = temp_root / "schema.json"`` where ``temp_root``
801
+ # came from ``with TemporaryDirectory() as temp_dir``.
802
+ if func_node is not None:
803
+ tempdir_names = _collect_tempdir_bindings(func_node)
804
+ if _expr_references_any_name(rhs, tempdir_names):
805
+ return True
806
+
807
+ # Step 3 — inline expression referencing a tempdir binding, e.g.
808
+ # ``(clean_config_dir / "settings.json").write_text(...)`` where
809
+ # ``clean_config_dir = Path(tempfile.mkdtemp(...))``.
810
+ if func_node is not None:
811
+ tempdir_names = _collect_tempdir_bindings(func_node)
812
+ if _expr_references_any_name(target_expr, tempdir_names):
813
+ return True
814
+
815
+ return False
816
+
817
+
818
+ # ---------------------------------------------------------------------------
819
+ # Severity / skip classification
820
+ # ---------------------------------------------------------------------------
821
+
822
+ def _classify_severity(
823
+ hit: dict,
824
+ read_manifest: dict[str, set[str]],
825
+ ) -> tuple[GateSeverity | None, str | None]:
826
+ """Return (severity, extra_note) for the hit, or (None, None) to skip.
827
+
828
+ Applies Parts 2, 3, and F9b-tighten:
829
+ * Part 2 (literal target):
830
+ - not read anywhere → skip
831
+ - read only in same file → LOW
832
+ - read across files → MEDIUM (keep)
833
+ * F9b-tighten (non-literal target):
834
+ - func name matches _PERSISTENCE_PREFIXES → MEDIUM with note
835
+ - otherwise → LOW with note
836
+ * Part 3 (init/bootstrap): downgrade MEDIUM → LOW (applies after).
837
+
838
+ Previously a non-literal write slipped past Part 2 and Part 3 left it at
839
+ its default MEDIUM silently. The new path ALWAYS emits at least a LOW
840
+ finding (blind-spot A) so the artefact is visible to reviewers.
841
+ """
842
+ write_file = hit.get("file") or ""
843
+ write_literal = hit.get("write_literal")
844
+ func_name = hit.get("func_name") or ""
845
+
846
+ base_severity: GateSeverity = GateSeverity.MEDIUM
847
+ extra_note: str | None = None
848
+
849
+ if write_literal is not None:
850
+ # Part 2 (literal target + read-back analysis).
851
+ readers = read_manifest.get(write_literal, set())
852
+ if not readers:
853
+ # write-only artefact: log output, temp file — no race risk.
854
+ return None, None
855
+ cross_file_readers = readers - {write_file}
856
+ if not cross_file_readers:
857
+ # Same-file reader only: single-writer single-reader, downgrade.
858
+ base_severity = GateSeverity.LOW
859
+ else:
860
+ # Non-literal write target. Apply F.16a path-hint heuristic first:
861
+ # 1. If the path is per-call-unique (timestamp, uuid, session_num
862
+ # marker in its derivation) → SUPPRESS (no race possible).
863
+ # 2. Else if the binding variable name suggests a state file
864
+ # (``state_path``, ``status_path``, ``lock_file`` …) → MEDIUM.
865
+ # 3. Else fall back to F.10 function-name heuristic:
866
+ # - persistence-named function → MEDIUM+note
867
+ # - otherwise → LOW+note
868
+ target_expr = hit.get("target_expr")
869
+ func_node = hit.get("func_node")
870
+ target_var_name = hit.get("target_var_name")
871
+
872
+ if _path_is_per_call_unique(target_expr, func_node):
873
+ # F.16a: per-call-unique filename → no race possible, suppress.
874
+ return None, None
875
+
876
+ if _var_name_suggests_state_file(target_var_name):
877
+ base_severity = GateSeverity.MEDIUM
878
+ extra_note = (
879
+ f"non-literal target, variable name '{target_var_name}' "
880
+ "suggests state file, manual review needed"
881
+ )
882
+ elif _is_persistence_named_function(func_name):
883
+ base_severity = GateSeverity.MEDIUM
884
+ extra_note = (
885
+ "non-literal target, function name suggests persistence, "
886
+ "manual review needed"
887
+ )
888
+ else:
889
+ base_severity = GateSeverity.LOW
890
+ extra_note = "non-literal write target, read-back unknown"
891
+
892
+ # Part 3 (init/bootstrap path downgrade) applies to both branches.
893
+ if _is_init_path_function(func_name):
894
+ if base_severity == GateSeverity.MEDIUM:
895
+ base_severity = GateSeverity.LOW
896
+
897
+ return base_severity, extra_note
898
+
899
+
900
+ # ---------------------------------------------------------------------------
901
+ # Sprint B3 (2026-04-23) — ArtifactRoleMap applicability mapping
902
+ # ---------------------------------------------------------------------------
903
+
904
+ # Role → (applicability, reason-template) map. per Sprint B3 P1 rule, this
905
+ # derives ONLY from structural role — risk signals (read-back, init-path)
906
+ # influence confidence, not applicability.
907
+ _APPLICABILITY_BY_ROLE = {
908
+ "shared_state": ("applicable", ""),
909
+ "config": ("applicable", ""),
910
+ "manifest": ("applicable", ""),
911
+ "per_run_output": ("not_applicable", "role: per_run_output"),
912
+ "temp": ("not_applicable", "role: temp"),
913
+ "log": ("not_applicable", "role: log"),
914
+ "cache": ("unknown", "role: cache"),
915
+ "unknown": ("unknown", "role: unknown"),
916
+ }
917
+
918
+
919
+ def _applicability_for_role(role: str) -> tuple[str, str]:
920
+ """Return (applicability, applicability_reason) for a role.
921
+
922
+ Unknown roles fall back to ``unknown`` applicability so the reviewer
923
+ sees the finding flagged rather than silently suppressed.
924
+ """
925
+ return _APPLICABILITY_BY_ROLE.get(role, ("unknown", "role classification failed"))
926
+
927
+
928
+ def _severity_for_applicability(applicability: str) -> "GateSeverity":
929
+ """Role-driven default severity (legacy path keeps its own tuning)."""
930
+ if applicability == "applicable":
931
+ return GateSeverity.MEDIUM
932
+ return GateSeverity.LOW
933
+
934
+
935
+ def _confidence_with_risk_signals(wtc) -> float:
936
+ """Adjust detector confidence by risk signals — never gates applicability.
937
+
938
+ * cross-file read-back → +0.10 (real race window)
939
+ * same-file-only read-back → -0.10 (single-process round-trip)
940
+ * enclosing init path → -0.15 (one-time execution)
941
+ """
942
+ conf = float(wtc.confidence)
943
+ if wtc.has_read_back_cross_file:
944
+ conf = min(conf + 0.10, 1.0)
945
+ elif wtc.has_read_back_same_file:
946
+ conf = max(conf - 0.10, 0.0)
947
+ if wtc.is_in_init_path:
948
+ conf = max(conf - 0.15, 0.0)
949
+ return conf
950
+
951
+
952
+ # ---------------------------------------------------------------------------
953
+ # Public gate entry-point
954
+ # ---------------------------------------------------------------------------
955
+
956
+ def run_atomic_write_safety_checks(ctx: PostExecGateContext):
957
+ """Detect write calls without an atomic tmpfile+rename pattern.
958
+
959
+ For each .py file in ctx.changed_files_observed:
960
+ 1. Skip if the path lives under tests or vendored libs.
961
+ 2. Parse the AST.
962
+ 3. Walk all function defs (including nested ones).
963
+ 4. For each function lacking a replace/rename + .tmp reference, collect
964
+ candidate unsafe writes.
965
+ 5. Classify each hit:
966
+ * if ``ctx.project_context.artifact_roles`` is available, derive
967
+ applicability from the ArtifactRole and adjust confidence with
968
+ read-back + init-path signals (Sprint B3).
969
+ * otherwise fall back to legacy heuristic path (F.9b/F.10/F.16a).
970
+
971
+ Fail-open: parse errors / missing files -> DEBUG log, skip, never raise.
972
+ """
973
+ findings = []
974
+
975
+ # Sprint B3: prefer role map from ProjectContext when available.
976
+ artifact_map = getattr(getattr(ctx, "project_context", None), "artifact_roles", None)
977
+
978
+ # Legacy prerequisite: build the read-source manifest once up front. Still
979
+ # needed for the fallback path and for non-literal heuristics that the
980
+ # ArtifactRoleMap may not attempt at the gate-evidence granularity.
981
+ read_manifest = _build_read_manifest(
982
+ project_dir=ctx.project_dir,
983
+ changed_files=ctx.changed_files_observed,
984
+ file_snapshots=getattr(ctx, "file_snapshots", {}) or {},
985
+ )
986
+
987
+ for raw_path in ctx.changed_files_observed:
988
+ normalized = normalize_path(raw_path)
989
+ if not is_source_file(normalized):
990
+ continue
991
+
992
+ # Skip tests and vendored libs — these paths don't warrant
993
+ # atomic-write enforcement. Sprint C2: ctx threaded through so the
994
+ # helper can consult ProjectContext.test_topology when present.
995
+ if _is_test_or_libs_path(normalized, ctx):
996
+ continue
997
+
998
+ abs_path = ctx.project_dir / normalized
999
+ try:
1000
+ src = abs_path.read_text(encoding="utf-8")
1001
+ except (OSError, UnicodeDecodeError) as exc:
1002
+ _log.debug("atomic_write: failed to read %s: %s", normalized, exc)
1003
+ continue
1004
+
1005
+ # B4 (2026-04-23): replaces silent `except SyntaxError` — now emits
1006
+ # meta.syntax_parse_error so broken Python is not invisible.
1007
+ tree = parse_python_source_or_emit_finding(
1008
+ src,
1009
+ rel_path=normalized,
1010
+ emit_finding=findings.append,
1011
+ emitting_gate="atomic_write_safety",
1012
+ )
1013
+ if tree is None:
1014
+ continue
1015
+
1016
+ for node in ast.walk(tree):
1017
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
1018
+ continue
1019
+ raw_hits = _collect_unsafe_writes(node, normalized)
1020
+ for hit in raw_hits:
1021
+ if artifact_map is not None:
1022
+ finding = _build_finding_from_role_map(
1023
+ hit=hit,
1024
+ artifact_map=artifact_map,
1025
+ rel_path=normalized,
1026
+ )
1027
+ if finding is None:
1028
+ continue # no classification → fall back below
1029
+ findings.append(finding)
1030
+ continue
1031
+
1032
+ # Legacy fallback path (pre-Sprint-B3 callers, no project_context).
1033
+ severity, extra_note = _classify_severity(hit, read_manifest)
1034
+ if severity is None:
1035
+ continue # legacy: write-only suppression
1036
+ findings.append(_build_legacy_finding(hit, severity, extra_note))
1037
+
1038
+ return build_check_result(
1039
+ check_id="atomic_write_safety",
1040
+ category=GateCategory.DRIFT,
1041
+ findings=findings,
1042
+ )
1043
+
1044
+
1045
+ def _build_finding_from_role_map(
1046
+ *,
1047
+ hit: dict,
1048
+ artifact_map,
1049
+ rel_path: str,
1050
+ ):
1051
+ """Sprint B3 path: apply ArtifactRoleMap to a single write hit.
1052
+
1053
+ Returns a GateFinding with tri-state applicability derived from the role,
1054
+ and confidence adjusted by the (preserved) risk signals. Returns ``None``
1055
+ when the role map has no classification for this site (signals the
1056
+ caller to use the legacy fallback).
1057
+ """
1058
+ wtc = artifact_map.classify_site(rel_path, hit["line"])
1059
+ if wtc is None:
1060
+ return None
1061
+
1062
+ applicability, app_reason = _applicability_for_role(wtc.role)
1063
+ severity = _severity_for_applicability(applicability)
1064
+ confidence = _confidence_with_risk_signals(wtc)
1065
+
1066
+ # Risk-signal suffix for human summaries — visible to the reviewer.
1067
+ tags: list[str] = [f"role={wtc.role}"]
1068
+ if wtc.has_read_back_cross_file:
1069
+ tags.append("cross-file read-back")
1070
+ elif wtc.has_read_back_same_file:
1071
+ tags.append("same-file read-back")
1072
+ if wtc.is_in_init_path:
1073
+ tags.append("init path")
1074
+
1075
+ summary = (
1076
+ f"{hit['file']}: {hit['write_func']}() at line {hit['line']} writes "
1077
+ "directly without a tmpfile+rename guard -- partial writes or "
1078
+ f"corruption on crash are possible. ({', '.join(tags)})"
1079
+ )
1080
+
1081
+ return build_finding(
1082
+ check_id="atomic_write_safety.missing_tmpfile_rename",
1083
+ category=GateCategory.DRIFT,
1084
+ title="Non-atomic write: missing tmpfile+rename pattern",
1085
+ severity=severity,
1086
+ impact=GateImpact.REVISE,
1087
+ summary=summary,
1088
+ recommendation=(
1089
+ "Write to a .tmp sibling file first, then atomically rename "
1090
+ "it to the target (Path.replace / os.replace). This prevents "
1091
+ "readers from observing a partially-written file."
1092
+ ),
1093
+ evidence=[
1094
+ EvidenceReference(
1095
+ kind="file",
1096
+ path=hit["file"],
1097
+ detail=f"write={hit['write_func']}:L{hit['line']}",
1098
+ )
1099
+ ],
1100
+ repair_kind=RepairKind.REFACTOR.value,
1101
+ executor_action="Use atomic write pattern",
1102
+ proof_required="Atomic writes verified",
1103
+ allowlist_allowed=False,
1104
+ confidence=confidence,
1105
+ applicability=applicability,
1106
+ applicability_reason=app_reason,
1107
+ analysis_mode="ast",
1108
+ )
1109
+
1110
+
1111
+ def _build_legacy_finding(hit: dict, severity: "GateSeverity", extra_note):
1112
+ """Legacy finding builder (pre-Sprint-B3 path)."""
1113
+ summary = (
1114
+ f"{hit['file']}: {hit['write_func']}() at line "
1115
+ f"{hit['line']} writes directly without a tmpfile+rename "
1116
+ "guard -- partial writes or corruption on crash are possible."
1117
+ )
1118
+ if extra_note:
1119
+ summary = f"{summary} ({extra_note})"
1120
+ return build_finding(
1121
+ check_id="atomic_write_safety.missing_tmpfile_rename",
1122
+ category=GateCategory.DRIFT,
1123
+ title="Non-atomic write: missing tmpfile+rename pattern",
1124
+ severity=severity,
1125
+ impact=GateImpact.REVISE,
1126
+ summary=summary,
1127
+ recommendation=(
1128
+ "Write to a .tmp sibling file first, then atomically rename "
1129
+ "it to the target (Path.replace / os.replace). This prevents "
1130
+ "readers from observing a partially-written file."
1131
+ ),
1132
+ evidence=[
1133
+ EvidenceReference(
1134
+ kind="file",
1135
+ path=hit["file"],
1136
+ detail=f"write={hit['write_func']}:L{hit['line']}",
1137
+ )
1138
+ ],
1139
+ repair_kind=RepairKind.REFACTOR.value,
1140
+ executor_action="Use atomic write pattern",
1141
+ proof_required="Atomic writes verified",
1142
+ allowlist_allowed=False,
1143
+ )