specfuse-loop 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
specfuse/loop/loop.py ADDED
@@ -0,0 +1,3504 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Copyright 2026 Specfuse contributors
4
+ # Licensed under the Apache License, Version 2.0. See LICENSE.
5
+ #
6
+ """
7
+ Specfuse loop driver — single-repo, exploded-layout edition.
8
+
9
+ A "dumb driver, smart spec" loop in the Ralph tradition. Intelligence lives in the
10
+ work-unit files and the verification gates, never here. Per feature, per gate, the
11
+ driver:
12
+
13
+ 1. reads the task GRAPH from PLAN.md (gate order, WU membership, dependencies),
14
+ 2. finds the next ready work units in the current gate,
15
+ 3. dispatches each as a FRESH `claude -p` session with its declared model,
16
+ handing it the WU file's prompt body,
17
+ 4. acts as the exit oracle by running the WU's verification ITSELF,
18
+ 5. on pass: makes one squashed, trailer-carrying commit per WU,
19
+ 6. on fail: re-dispatches a fresh session carrying the failure evidence, up to
20
+ MAX_ATTEMPTS, then escalates (blocked_human) and halts the gate,
21
+ 7. when every WU in the gate is done — including the closing sequence
22
+ (retrospective -> lessons -> docs -> plan-next) — marks the gate
23
+ awaiting_review and stops for human reflection.
24
+
25
+ Ownership (one fact, one home):
26
+ - PLAN.md owns the SHAPE : gates, which WUs are in them, dependency edges.
27
+ - GATE-NN owns the GATE : gate status + definition of done + reflection.
28
+ - WU-*.md owns ITSELF : type, model, status, attempts + the prompt body.
29
+
30
+ Durable state lives in those files, git history, and the per-feature event log —
31
+ never in a context window. Each dispatch is a fresh session. That is the Ralph
32
+ property, kept at work-unit granularity because units are crafted to land in one pass.
33
+
34
+ Two things differ in the orchestrator and are isolated behind `Backend`:
35
+ - STATE BACKEND : status in WU/GATE files here; GitHub issue labels + registry there.
36
+ - DISPATCH : subprocess here; inbox + polling loop there.
37
+ Swap those; everything else is portable.
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import argparse
43
+ import datetime as dt
44
+ import fcntl
45
+ import json
46
+ import logging
47
+ import re
48
+ import subprocess
49
+ import sys
50
+ import time
51
+ from dataclasses import dataclass, field
52
+ from pathlib import Path
53
+
54
+ from . import _miniyaml
55
+ from .gate_eval import evaluate_auto_close, AutoCloseDecision
56
+
57
+ SPECFUSE_DIR = Path(".specfuse")
58
+ REPO_ROOT = SPECFUSE_DIR.parent
59
+ FEATURES_DIR = SPECFUSE_DIR / "features"
60
+ VERIFICATION_PATH = SPECFUSE_DIR / "verification.yml"
61
+ DRIVER_VERSION = "0.2.0"
62
+ # Oldest scaffold layout this driver can drive. init.sh stamps the scaffold's own
63
+ # version into `.specfuse/VERSION`; check_scaffold_version() fails loud at startup if
64
+ # the consumer's scaffold is older than this, pointing at `specfuse upgrade`. Bump
65
+ # this only when a scaffold-format change makes an older `.specfuse/` undriveable.
66
+ MIN_SCAFFOLD_VERSION = "0.2.0"
67
+ SCAFFOLD_VERSION_PATH = SPECFUSE_DIR / "VERSION"
68
+ MAX_ATTEMPTS = 3 # spinning threshold: 3 failed verification cycles -> escalate
69
+
70
+ # How to launch a fresh agent. {model} and {effort} are filled per WU; prompt is piped on stdin.
71
+ CLAUDE_CMD = ["claude", "-p", "--model", "{model}", "--effort", "{effort}"]
72
+
73
+ VALID_EFFORT = frozenset({"low", "medium", "high", "xhigh", "max"})
74
+
75
+ # Family aliases accepted in WU frontmatter's `model:` field.
76
+ # The CLI resolves them to the latest concrete model at dispatch time;
77
+ # the loop passes the value verbatim — no expansion here.
78
+ MODEL_ALIASES = frozenset({"sonnet", "opus", "haiku"})
79
+
80
+ # Defaults applied by load_wu when `model` or `effort` are absent from WU frontmatter.
81
+ # A WU that declares either field explicitly overrides these. Keys cover every VALID_TYPES value.
82
+ MODEL_BY_TYPE = {
83
+ "implementation": "sonnet",
84
+ "retrospective": "sonnet",
85
+ "lessons": "sonnet",
86
+ "docs": "sonnet",
87
+ "plan-next": "opus",
88
+ "close": "opus",
89
+ "close-intermediate": "opus",
90
+ }
91
+ EFFORT_BY_TYPE = {
92
+ "implementation": "medium",
93
+ "retrospective": "low",
94
+ "lessons": "low",
95
+ "docs": "low",
96
+ "plan-next": "high",
97
+ "close": "high",
98
+ "close-intermediate": "high",
99
+ }
100
+
101
+ # Which verification gate set (a key in verification.yml) applies to each WU type.
102
+ GATES_FOR_TYPE = {
103
+ "implementation": "code",
104
+ "retrospective": "doc",
105
+ "lessons": "doc",
106
+ "docs": "doc",
107
+ "plan-next": "plannext",
108
+ # `close` collapses the four closing ceremonies into one session for any terminal gate
109
+ # (single- or multi-gate); `close-intermediate` is the equivalent for non-terminal gates,
110
+ # leaving `plan-next` as a separate dispatch.
111
+ # Both reuse the `plannext` gate set: lint_plan.py verifies structural integrity post-close.
112
+ "close": "plannext",
113
+ "close-intermediate": "plannext",
114
+ }
115
+
116
+ VERDICT_VALUES = frozenset({"met", "met_locally", "partially_met", "not_met"})
117
+
118
+ # Statuses the driver will dispatch. `draft` is excluded on purpose: plan-next
119
+ # writes the next gate's WUs as drafts, and a human must arm them first.
120
+ DISPATCHABLE = {"pending", "ready"}
121
+ DONE = "done"
122
+
123
+
124
+ def verdict_permits_terminal_flips(verdict: str | None) -> bool:
125
+ """Return True iff verdict == 'met'; False for every other value including None."""
126
+ return verdict == "met"
127
+
128
+
129
+ # --------------------------------------------------------------------------- #
130
+ # Data model #
131
+ # --------------------------------------------------------------------------- #
132
+
133
+
134
+ @dataclass
135
+ class WorkUnit:
136
+ wu_id: str
137
+ file: Path
138
+ depends_on: list[str] # from the PLAN.md graph
139
+ type: str # from the WU file frontmatter
140
+ model: str
141
+ status: str
142
+ attempts: int
143
+ title: str
144
+ body: str # the prompt handed to the session
145
+ effort: str = "medium" # low|medium|high|xhigh|max — passed as --effort to claude -p
146
+ # OPTIONAL sandbox-escape. When `unsandboxed: true` in WU frontmatter,
147
+ # driver appends `--dangerously-skip-permissions` to the claude -p
148
+ # invocation. Requires `unsandboxed_rationale` string in same frontmatter
149
+ # (one-line justification, written to events.jsonl as the audit signal).
150
+ # load_wu refuses to load a WU with unsandboxed=True and no rationale.
151
+ unsandboxed: bool = False
152
+ unsandboxed_rationale: str = ""
153
+ verdict: str | None = None
154
+ produces_driver_helper: list[str] = field(default_factory=list)
155
+ # OPTIONAL author-declared deliverable contract. Names the file path(s) this
156
+ # WU is contracted to yield. Distinct from `files_changed` (RESULT-block
157
+ # runtime claim) and `produces_driver_helper` (driver symbols, lint-only):
158
+ # `produces` names files and IS machine-enforced by FEAT-2026-0022/T02's
159
+ # presence gate (each path must exist and be non-empty at completion).
160
+ produces: list[str] = field(default_factory=list)
161
+ # OPTIONAL extra verification gate sets, unioned onto the WU-type-selected set
162
+ # by verify(). Names index into verification.yml the same way the type sets do
163
+ # (e.g. `extra_gates: [live-verify]`). A name absent from verification.yml is a
164
+ # CONFIGURATION ERROR, never a silent pass. See issue #62.
165
+ extra_gates: list[str] = field(default_factory=list)
166
+
167
+
168
+ @dataclass
169
+ class GateNode:
170
+ number: int
171
+ file: Path
172
+ status: str # from the GATE file frontmatter
173
+ refs: list[dict] = field(default_factory=list) # [{id, file, depends_on}]
174
+
175
+
176
+ # --------------------------------------------------------------------------- #
177
+ # Frontmatter helpers #
178
+ # --------------------------------------------------------------------------- #
179
+
180
+ FM = re.compile(r"^---\s*$")
181
+
182
+
183
+ def read_frontmatter(path: Path) -> tuple[dict, str]:
184
+ """Return (frontmatter_dict, body_text)."""
185
+ lines = path.read_text().splitlines()
186
+ if not lines or not FM.match(lines[0]):
187
+ return {}, path.read_text()
188
+ j = 1
189
+ while j < len(lines) and not FM.match(lines[j]):
190
+ j += 1
191
+ fm = _miniyaml.parse("\n".join(lines[1:j])) or {}
192
+ body = "\n".join(lines[j + 1 :])
193
+ return fm, body
194
+
195
+
196
+ def write_frontmatter_field(path: Path, key: str, value) -> None:
197
+ """Replace (or insert) a single key in a file's YAML frontmatter, leaving the
198
+ body untouched. This is the whole reason the exploded layout is nicer than one
199
+ shared file: status writes are clean single-file edits, not regex surgery."""
200
+ lines = path.read_text().splitlines()
201
+ if not lines or not FM.match(lines[0]):
202
+ raise ValueError(f"{path} has no frontmatter")
203
+ j = 1
204
+ while j < len(lines) and not FM.match(lines[j]):
205
+ j += 1
206
+ block = lines[1:j]
207
+ pat = re.compile(rf"^{re.escape(key)}:")
208
+ for idx, line in enumerate(block):
209
+ if pat.match(line):
210
+ block[idx] = f"{key}: {value}"
211
+ break
212
+ else:
213
+ block.append(f"{key}: {value}")
214
+ new = ["---", *block, "---", *lines[j + 1 :]]
215
+ path.write_text("\n".join(new) + "\n")
216
+
217
+
218
+ # --------------------------------------------------------------------------- #
219
+ # Plan / gate / WU loading #
220
+ # --------------------------------------------------------------------------- #
221
+
222
+
223
+ def find_feature(arg: str | None) -> Path:
224
+ if arg:
225
+ d = FEATURES_DIR / arg if not arg.startswith(".") else Path(arg)
226
+ if not (d / "PLAN.md").exists():
227
+ sys.exit(f"No PLAN.md under {d}")
228
+ return d
229
+ actives = []
230
+ done_pending_wrap = []
231
+ for d in sorted(FEATURES_DIR.glob("*/")):
232
+ plan = d / "PLAN.md"
233
+ if plan.exists():
234
+ fm, _ = read_frontmatter(plan)
235
+ if fm.get("status") == "active":
236
+ actives.append(d)
237
+ elif fm.get("status") == "done":
238
+ # Surface done features that may not have been wrapped yet.
239
+ # Conservative heuristic: a RETROSPECTIVE.md exists (close
240
+ # ceremony ran). Operator decides via /wrap-feature whether
241
+ # push + PR are pending.
242
+ if (d / "RETROSPECTIVE.md").is_file():
243
+ done_pending_wrap.append(d)
244
+ if len(actives) == 1:
245
+ return actives[0]
246
+ if not actives:
247
+ msg = "No active feature. Set a feature's PLAN.md status to 'active'.\n"
248
+ if done_pending_wrap:
249
+ names = ", ".join(d.name for d in done_pending_wrap[-3:])
250
+ msg += (
251
+ f" - /wrap-feature finalize a recently-closed feature "
252
+ f"(push branch + open PR + merge advisory).\n"
253
+ f" Candidates: {names}\n"
254
+ )
255
+ msg += (
256
+ " - /pick-feature choose a 'planned' feature from the roadmap and activate it\n"
257
+ " - /draft-feature scaffold a new feature (gates + gate-1 work units)\n"
258
+ " - /arm-gate if a feature halted at a gate boundary awaiting review"
259
+ )
260
+ sys.exit(msg)
261
+ sys.exit(f"Multiple active features; pass --feature. Found: "
262
+ f"{[d.name for d in actives]}")
263
+
264
+
265
+ def load_graph(feature_dir: Path) -> tuple[dict, list[GateNode]]:
266
+ """Parse PLAN.md: feature frontmatter + the `gates` graph block."""
267
+ fm, body = read_frontmatter(feature_dir / "PLAN.md")
268
+ m = re.search(r"```ya?ml\s*\n(.*?)\n```", body, re.DOTALL)
269
+ if not m:
270
+ sys.exit("PLAN.md has no ```yaml graph block.")
271
+ graph = _miniyaml.parse(m.group(1)) or {}
272
+ gates: list[GateNode] = []
273
+ for g in graph.get("gates", []):
274
+ gate_file = feature_dir / g["file"]
275
+ gfm, _ = read_frontmatter(gate_file) if gate_file.exists() else ({}, "")
276
+ gates.append(
277
+ GateNode(
278
+ number=g["gate"],
279
+ file=gate_file,
280
+ status=gfm.get("status", "open"),
281
+ refs=g.get("work_units", []) or [],
282
+ )
283
+ )
284
+ return fm, gates
285
+
286
+
287
+ def load_wu(feature_dir: Path, ref: dict) -> WorkUnit:
288
+ path = feature_dir / ref["file"]
289
+ fm, body = read_frontmatter(path)
290
+ title_m = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
291
+ wu_type = fm.get("type", "implementation")
292
+ effort = fm.get("effort")
293
+ if effort is None:
294
+ effort = EFFORT_BY_TYPE.get(wu_type, "medium")
295
+ elif effort not in VALID_EFFORT:
296
+ raise ValueError(
297
+ f"{path}: invalid effort '{effort}' — must be one of "
298
+ f"{sorted(VALID_EFFORT)}"
299
+ )
300
+ wu_model = fm.get("model")
301
+ if wu_model is None:
302
+ wu_model = MODEL_BY_TYPE.get(wu_type, "claude-sonnet-4-6")
303
+ unsandboxed = bool(fm.get("unsandboxed", False))
304
+ unsandboxed_rationale = str(fm.get("unsandboxed_rationale", "") or "").strip()
305
+ if unsandboxed and not unsandboxed_rationale:
306
+ raise ValueError(
307
+ f"{path}: `unsandboxed: true` requires a non-empty "
308
+ f"`unsandboxed_rationale` in the same frontmatter. Sandbox-escape "
309
+ f"is auditable; the rationale is the audit signal."
310
+ )
311
+ verdict: str | None = None
312
+ if wu_type in {"close", "close-intermediate"}:
313
+ verdict = fm.get("verdict") or None
314
+ raw_pdh = fm.get("produces_driver_helper")
315
+ if raw_pdh is None:
316
+ produces_driver_helper: list[str] = []
317
+ elif isinstance(raw_pdh, str):
318
+ produces_driver_helper = [raw_pdh]
319
+ elif isinstance(raw_pdh, list):
320
+ produces_driver_helper = raw_pdh
321
+ else:
322
+ raise ValueError(
323
+ f"{path}: `produces_driver_helper` must be a string or list of strings, "
324
+ f"got {type(raw_pdh).__name__!r}"
325
+ )
326
+ raw_produces = fm.get("produces")
327
+ if raw_produces is None:
328
+ produces: list[str] = []
329
+ elif isinstance(raw_produces, str):
330
+ produces = [raw_produces]
331
+ elif isinstance(raw_produces, list):
332
+ produces = raw_produces
333
+ else:
334
+ raise ValueError(
335
+ f"{path}: `produces` must be a string or list of strings, "
336
+ f"got {type(raw_produces).__name__!r}"
337
+ )
338
+ raw_extra_gates = fm.get("extra_gates")
339
+ if raw_extra_gates is None:
340
+ extra_gates: list[str] = []
341
+ elif isinstance(raw_extra_gates, str):
342
+ extra_gates = [raw_extra_gates]
343
+ elif isinstance(raw_extra_gates, list):
344
+ extra_gates = [str(g) for g in raw_extra_gates]
345
+ else:
346
+ raise ValueError(
347
+ f"{path}: `extra_gates` must be a string or list of strings, "
348
+ f"got {type(raw_extra_gates).__name__!r}"
349
+ )
350
+ return WorkUnit(
351
+ wu_id=ref["id"],
352
+ file=path,
353
+ depends_on=list(ref.get("depends_on", []) or []),
354
+ type=wu_type,
355
+ model=wu_model,
356
+ effort=effort,
357
+ status=fm.get("status", "pending"),
358
+ attempts=int(fm.get("attempts", 0)),
359
+ title=title_m.group(1).strip() if title_m else ref["id"],
360
+ body=body.strip(),
361
+ unsandboxed=unsandboxed,
362
+ unsandboxed_rationale=unsandboxed_rationale,
363
+ verdict=verdict,
364
+ produces_driver_helper=produces_driver_helper,
365
+ produces=produces,
366
+ extra_gates=extra_gates,
367
+ )
368
+
369
+
370
+ # --------------------------------------------------------------------------- #
371
+ # State backend seam #
372
+ # --------------------------------------------------------------------------- #
373
+
374
+
375
+ class Backend:
376
+ """All status mutation goes through here. The orchestrator subclasses this to
377
+ write GitHub issue labels and registry frontmatter instead of these files —
378
+ nothing above this line changes."""
379
+
380
+ def set_wu(self, wu: WorkUnit, key: str, value) -> None:
381
+ write_frontmatter_field(wu.file, key, value)
382
+ setattr(wu, "status" if key == "status" else key, value) # keep memory in sync
383
+
384
+ def set_gate(self, gate: GateNode, status: str) -> None:
385
+ # Materialize the gate file if missing — PLAN.md may reference a gate
386
+ # whose markdown was never authored (e.g. plan-next drafted an empty
387
+ # follow-up gate that the human never filled in). Without this, the
388
+ # first set_gate on a missing file crashes write_frontmatter_field
389
+ # with FileNotFoundError and the whole feature halts unrecoverably.
390
+ if not gate.file.is_file():
391
+ gate.file.parent.mkdir(parents=True, exist_ok=True)
392
+ gate.file.write_text(
393
+ f"---\ngate: {gate.number}\nstatus: {status}\n---\n\n"
394
+ f"# Gate {gate.number}\n\n"
395
+ f"_Stub created by loop.set_gate because PLAN.md referenced "
396
+ f"this gate but no markdown file existed. Body intentionally "
397
+ f"minimal; edit if you want a real Definition of Done._\n"
398
+ )
399
+ gate.status = status
400
+ return
401
+ write_frontmatter_field(gate.file, "status", status)
402
+ gate.status = status
403
+
404
+ def on_feature_start(self, feature_id: str, feat_fm: dict) -> None:
405
+ """Called once per run(), before any dispatch, even on no-op polls."""
406
+
407
+ def on_gate_passed(self, feature_id: str, gate_number: int) -> None:
408
+ """Called after a gate's WUs are all done and the gate flips to awaiting_review."""
409
+
410
+ def on_feature_complete(self, feature_id: str) -> None:
411
+ """Called when all gates are passed and the feature is fully complete."""
412
+
413
+
414
+ def make_backend(feat_fm: dict) -> Backend:
415
+ """Factory: returns GitHubBackend when source_issue_url is a GitHub issue URL."""
416
+ source_url = feat_fm.get("source_issue_url", "") or ""
417
+ # Pattern: https://github.com/<owner>/<repo>/issues/<number>
418
+ _m = re.match(r"^https://github\.com/([^/]+/[^/]+)/issues/(\d+)$", source_url)
419
+ if _m:
420
+ from .gh_backend import GitHubBackend
421
+ return GitHubBackend(repo=_m.group(1), issue_number=int(_m.group(2)))
422
+ return Backend()
423
+
424
+
425
+ # --------------------------------------------------------------------------- #
426
+ # Event log (per feature) #
427
+ # --------------------------------------------------------------------------- #
428
+
429
+
430
+ def build_event(event_type: str, correlation_id: str, payload: dict) -> dict:
431
+ """Build a single event record. Pure — no I/O. Buffered in memory during a
432
+ WU's lifecycle and flushed to disk at outcome time so a `git reset --hard`
433
+ between attempts doesn't silently lose events that were appended."""
434
+ return {
435
+ "timestamp": dt.datetime.now(dt.timezone.utc).isoformat(),
436
+ "correlation_id": correlation_id,
437
+ "event_type": event_type,
438
+ "source": "driver",
439
+ "source_version": DRIVER_VERSION,
440
+ "payload": payload,
441
+ }
442
+
443
+
444
+ def flush_events(events_path: Path, events: list) -> None:
445
+ """Append a batch of buffered events to the JSONL log."""
446
+ if not events:
447
+ return
448
+ with events_path.open("a") as fh:
449
+ for evt in events:
450
+ fh.write(json.dumps(evt) + "\n")
451
+
452
+
453
+ # --------------------------------------------------------------------------- #
454
+ # Attempt-outcome event helpers (FEAT-2026-0016/T01) #
455
+ # --------------------------------------------------------------------------- #
456
+
457
+
458
+ def parse_gate_failure_signature(stdout: str) -> tuple[str, str]:
459
+ """Extract (failure_class, failure_signature) from gate runner stdout.
460
+
461
+ Scans for '### <gate>: FAIL' markers and maps them to a failure class.
462
+ Returns ('other', 'no_gate_marker') when no marker is found.
463
+ Both returned values are non-empty strings.
464
+ """
465
+ _GATE_CLASS_MAP = {
466
+ "tests": "tests",
467
+ "lint": "lint",
468
+ "security": "security",
469
+ "coverage": "coverage",
470
+ }
471
+ marker_re = re.compile(r"^### (\w+): FAIL", re.MULTILINE)
472
+ m = marker_re.search(stdout)
473
+ if not m:
474
+ return "other", "no_gate_marker"
475
+ gate_name = m.group(1)
476
+ failure_class = _GATE_CLASS_MAP.get(gate_name, "other")
477
+ after_lines = stdout[m.end():].splitlines()[:50]
478
+ after_text = "\n".join(after_lines)
479
+ _SIG_PATTERNS: dict[str, re.Pattern[str]] = {
480
+ "tests": re.compile(r"^FAIL: (test_\S+)", re.MULTILINE),
481
+ "lint": re.compile(r"\b([A-Z]\d{3,4})\b"),
482
+ "security": re.compile(r"Issue: \[(B\d+)"),
483
+ "coverage": re.compile(r"^([^\s]+\.py)\s+\d+\s+\d+", re.MULTILINE),
484
+ }
485
+ pattern = _SIG_PATTERNS.get(failure_class)
486
+ if pattern:
487
+ sm = pattern.search(after_text)
488
+ if sm:
489
+ sig = sm.group(1)
490
+ return failure_class, sig if sig else "unknown"
491
+ for line in after_lines:
492
+ stripped = line.strip()
493
+ if stripped:
494
+ return failure_class, stripped[:100]
495
+ return failure_class, "unknown"
496
+
497
+
498
+ def detect_spinning_signature_repeat(
499
+ current: tuple[str | None, str | None],
500
+ prior: tuple[str | None, str | None] | None,
501
+ ) -> bool:
502
+ """Return True iff the same (failure_class, failure_signature) repeats.
503
+
504
+ Returns False when prior is None (first failure — nothing to compare).
505
+ Returns False when either element of current is None.
506
+ Returns False when current or prior is the no_gate_marker sentinel to
507
+ avoid false-positive halts on parser-opaque failures (AC4).
508
+ """
509
+ _SENTINEL = ("other", "no_gate_marker")
510
+ if prior is None:
511
+ return False
512
+ if current[0] is None or current[1] is None:
513
+ return False
514
+ if current == _SENTINEL or prior == _SENTINEL:
515
+ return False
516
+ return current == prior
517
+
518
+
519
+ def extract_failure_excerpt(stdout: str, max_chars: int = 500) -> str:
520
+ """Return the last max_chars of failure-relevant lines from gate stdout.
521
+
522
+ Relevant lines contain FAIL, Error, Exception, or Traceback.
523
+ Falls back to the last max_chars of all stdout when no such lines exist.
524
+ Trims to a UTF-8 safe boundary.
525
+ """
526
+ _KW = re.compile(r"FAIL|Error|Exception|Traceback", re.IGNORECASE)
527
+ relevant = [ln for ln in stdout.splitlines() if _KW.search(ln)]
528
+ text = "\n".join(relevant) if relevant else stdout
529
+ encoded = text.encode("utf-8")
530
+ if len(encoded) <= max_chars:
531
+ return text
532
+ return encoded[-max_chars:].decode("utf-8", errors="ignore")
533
+
534
+
535
+ def emit_attempt_outcome(
536
+ wu: WorkUnit,
537
+ attempt: int,
538
+ outcome: str,
539
+ usage: dict,
540
+ *,
541
+ failure_class: str | None = None,
542
+ failure_signature: str | None = None,
543
+ failure_excerpt: str | None = None,
544
+ files_touched: list[str] | None = None,
545
+ agent_status: str | None = None,
546
+ agent_blocked_reason: str | None = None,
547
+ extras: dict | None = None,
548
+ ) -> dict:
549
+ """Build a standardized attempt_outcome event dict (v1 payload shape).
550
+
551
+ # T01's own events lack standardized payload; bootstrap gap
552
+
553
+ Caller appends the returned dict to wu_events; flush_events runs at
554
+ the existing flush point. This helper does NOT call flush_events
555
+ itself — preserves the 'one flush per outcome-cycle' invariant.
556
+
557
+ extras: optional additional fields merged into the payload last.
558
+ Used to preserve outcome-specific fields (e.g. assertion, summary)
559
+ that are not part of the standard schema.
560
+ """
561
+ payload: dict = {
562
+ "attempt": attempt,
563
+ "outcome": outcome,
564
+ "duration_seconds": usage.get("duration_seconds", 0.0),
565
+ "cost_usd": usage.get("cost_usd", 0.0),
566
+ "input_tokens": usage.get("input_tokens", 0),
567
+ "output_tokens": usage.get("output_tokens", 0),
568
+ "cache_read_input_tokens": usage.get("cache_read_input_tokens", 0),
569
+ "cache_creation_input_tokens": usage.get("cache_creation_input_tokens", 0),
570
+ "model": wu.model,
571
+ "effort": wu.effort,
572
+ "failure_class": failure_class,
573
+ "failure_signature": failure_signature,
574
+ "failure_excerpt": failure_excerpt,
575
+ "files_touched": files_touched if files_touched is not None else [],
576
+ "agent_status": agent_status,
577
+ "agent_blocked_reason": agent_blocked_reason,
578
+ "re_arm_count": getattr(wu, "re_arm_count", 0),
579
+ }
580
+ if extras:
581
+ payload.update(extras)
582
+ return build_event("attempt_outcome", wu.wu_id, payload)
583
+
584
+
585
+ # --------------------------------------------------------------------------- #
586
+ # Git #
587
+ # --------------------------------------------------------------------------- #
588
+
589
+
590
+ def git(*args: str) -> str:
591
+ return subprocess.run(["git", *args], capture_output=True, text=True,
592
+ check=True).stdout.strip()
593
+
594
+
595
+ def git_diff_names(head_before: str, head_after: str) -> list[str]:
596
+ """Return file paths changed between two refs via git diff --name-only.
597
+
598
+ When head_after is 'HEAD', also appends untracked files from
599
+ git ls-files --others --exclude-standard (per [driver/files_changed-guard]
600
+ LEARNINGS). Returns an empty list on any git error.
601
+ """
602
+ try:
603
+ names = subprocess.run(
604
+ ["git", "diff", "--name-only", head_before, head_after],
605
+ capture_output=True, text=True, check=True,
606
+ ).stdout.strip().splitlines()
607
+ if head_after == "HEAD":
608
+ untracked = subprocess.run(
609
+ ["git", "ls-files", "--others", "--exclude-standard"],
610
+ capture_output=True, text=True, check=True,
611
+ ).stdout.strip().splitlines()
612
+ names = names + [f for f in untracked if f]
613
+ return [f for f in names if f]
614
+ except subprocess.CalledProcessError:
615
+ return []
616
+
617
+
618
+ def require_git_ready() -> None:
619
+ """Driver squashes per WU on top of HEAD, so the repo needs an initial commit."""
620
+ in_repo = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
621
+ capture_output=True, text=True)
622
+ if in_repo.returncode != 0:
623
+ sys.exit("Not a git repository. Run `git init` from the repo root first.")
624
+ has_head = subprocess.run(["git", "rev-parse", "HEAD"],
625
+ capture_output=True, text=True)
626
+ if has_head.returncode != 0:
627
+ sys.exit("Git repository has no commits yet. The driver squashes per work "
628
+ "unit on top of HEAD; create an initial commit first "
629
+ "(e.g., `git commit --allow-empty -m 'init'`).")
630
+
631
+
632
+ class FeatureBranchError(RuntimeError):
633
+ """Raised when the feature branch cannot be entered safely.
634
+
635
+ Carries an actionable, human-readable message — including git's own
636
+ captured stderr when a checkout fails — instead of letting a bare
637
+ subprocess.CalledProcessError (which swallows stderr) escape main().
638
+ """
639
+
640
+
641
+ def _tracked_dirty_paths() -> set[str]:
642
+ """Paths with TRACKED, uncommitted changes (staged or unstaged), repo-relative.
643
+
644
+ Untracked files (porcelain `??`) are excluded: they never block a create
645
+ (`checkout -B`) and carry harmlessly, so counting them would spuriously
646
+ flag a leftover events.jsonl as an "unexpected" change. The dirty-tree
647
+ failure in #48 is tracked local modifications ("your local changes would
648
+ be overwritten by checkout"), which is exactly what this set captures.
649
+ """
650
+ out = subprocess.run(
651
+ ["git", "status", "--porcelain"],
652
+ capture_output=True, text=True, check=True,
653
+ ).stdout
654
+ paths: set[str] = set()
655
+ for line in out.splitlines():
656
+ if not line.strip():
657
+ continue
658
+ if line[:2] == "??":
659
+ continue # untracked — carries harmlessly, never blocks checkout
660
+ path = line[3:]
661
+ if " -> " in path: # rename: "old -> new"
662
+ path = path.split(" -> ", 1)[1]
663
+ paths.add(path.strip().strip('"'))
664
+ return paths
665
+
666
+
667
+ def _expected_flip_paths(feature_dir: "Path | None") -> set[str]:
668
+ """The paths /pick-feature legitimately leaves dirty before the loop runs:
669
+ `.specfuse/roadmap.md` and the active feature's `PLAN.md`.
670
+ """
671
+ expected = {".specfuse/roadmap.md"}
672
+ if feature_dir is not None:
673
+ try:
674
+ top = subprocess.run(
675
+ ["git", "rev-parse", "--show-toplevel"],
676
+ capture_output=True, text=True, check=True,
677
+ ).stdout.strip()
678
+ rel = (Path(feature_dir) / "PLAN.md").resolve().relative_to(
679
+ Path(top).resolve()
680
+ )
681
+ expected.add(str(rel))
682
+ except (subprocess.CalledProcessError, ValueError):
683
+ pass # can't resolve PLAN path — fall back to roadmap-only
684
+ return expected
685
+
686
+
687
+ def _checked_checkout(checkout_args: list[str], action: str) -> str:
688
+ """Run a `git checkout ...` guarded: on non-zero exit raise FeatureBranchError
689
+ carrying git's stderr, instead of a bare CalledProcessError that hides it.
690
+ """
691
+ proc = subprocess.run(
692
+ ["git", *checkout_args], capture_output=True, text=True,
693
+ )
694
+ if proc.returncode != 0:
695
+ stderr = proc.stderr.strip() or proc.stdout.strip() or "(no git output)"
696
+ raise FeatureBranchError(f"{action} failed: {stderr}")
697
+ return proc.stdout.strip()
698
+
699
+
700
+ def ensure_feature_branch(feat_fm: dict, feature_dir: "Path | None" = None) -> None:
701
+ """Ensure HEAD is on the feature's declared branch, creating it if needed.
702
+
703
+ The methodology assigns each feature its own branch (PLAN.md frontmatter's
704
+ `branch` field). Without this, per-WU squash commits land on whatever
705
+ branch the user happened to be on, violating per-feature isolation.
706
+
707
+ Idempotent: no-op if already on the declared branch. If the branch
708
+ doesn't exist locally, creates it from the current HEAD (`git checkout -B`),
709
+ which carries the expected /pick-feature flips (roadmap.md + PLAN.md) onto
710
+ the new branch.
711
+
712
+ Robust to the two real-world states that used to crash with a bare
713
+ CalledProcessError (#48):
714
+
715
+ * **Dirty tree.** Tracked changes confined to the expected /pick-feature
716
+ flips are carried onto a freshly created branch. Tracked changes to any
717
+ OTHER path stop the driver with a message naming them (silently moving
718
+ unrelated edits onto a feature branch is worse than failing loudly).
719
+ * **Stale divergent branch.** A pre-existing branch that is not an ancestor
720
+ of HEAD is surfaced rather than silently checked out; resolution policy
721
+ (reuse / recreate / abort) is left to the human.
722
+
723
+ Any checkout failure raises FeatureBranchError carrying git's stderr.
724
+ """
725
+ branch = feat_fm.get("branch")
726
+ if not branch:
727
+ return # not declared — defensive (lint_plan normally requires it)
728
+ current = subprocess.run(
729
+ ["git", "branch", "--show-current"],
730
+ capture_output=True, text=True,
731
+ ).stdout.strip()
732
+ if current == branch:
733
+ return
734
+ exists = subprocess.run(
735
+ ["git", "rev-parse", "--verify", branch],
736
+ capture_output=True, text=True,
737
+ ).returncode == 0
738
+ if exists:
739
+ # Surface a stale branch that diverged from the current base instead of
740
+ # silently reusing it. `merge-base --is-ancestor B HEAD` exits 0 iff B
741
+ # is an ancestor of HEAD (i.e. HEAD already contains B — safe to reuse).
742
+ is_ancestor = subprocess.run(
743
+ ["git", "merge-base", "--is-ancestor", branch, "HEAD"],
744
+ capture_output=True, text=True,
745
+ ).returncode == 0
746
+ if not is_ancestor:
747
+ raise FeatureBranchError(
748
+ f"branch '{branch}' exists and diverges from HEAD (not an "
749
+ f"ancestor). Refusing to silently check out a stale branch; "
750
+ f"resolve manually (rebase, recreate, or delete it) and re-run."
751
+ )
752
+ _checked_checkout(["checkout", branch], f"checkout of existing branch '{branch}'")
753
+ print(f"Switched to feature branch '{branch}' (was on '{current}').")
754
+ else:
755
+ # Create-from-HEAD carries the working tree onto the new branch. Only
756
+ # the expected /pick-feature flips may ride along; anything else stops.
757
+ dirty = _tracked_dirty_paths()
758
+ unexpected = dirty - _expected_flip_paths(feature_dir)
759
+ if unexpected:
760
+ raise FeatureBranchError(
761
+ "working tree has uncommitted changes to unexpected paths: "
762
+ + ", ".join(sorted(unexpected))
763
+ + f". Refusing to carry them onto new branch '{branch}'. "
764
+ "Commit or stash them first, then re-run."
765
+ )
766
+ _checked_checkout(["checkout", "-B", branch], f"create of branch '{branch}'")
767
+ print(f"Created feature branch '{branch}' from '{current}'.")
768
+
769
+
770
+ def acquire_tree_lock(specfuse_dir: Path):
771
+ """Open .specfuse/.loop.lock and acquire a non-blocking exclusive flock.
772
+
773
+ Returns the open file object; caller keeps it alive for the process
774
+ lifetime — the kernel auto-releases on fd close or process exit (SIGKILL
775
+ included), so no stale-lock cleanup is ever needed.
776
+ Raises BlockingIOError if another process already holds the lock.
777
+ """
778
+ lock_path = specfuse_dir / ".loop.lock"
779
+ lock_path.parent.mkdir(parents=True, exist_ok=True)
780
+ fd = lock_path.open("w")
781
+ try:
782
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
783
+ except BlockingIOError:
784
+ fd.close()
785
+ raise
786
+ return fd
787
+
788
+
789
+ def write_cost_to_wu(backend, wu: WorkUnit, cum_usage: dict) -> None:
790
+ """Write cumulative cost/token/duration fields to the WU's frontmatter at
791
+ outcome time. duration_seconds is always written when present; cost/token
792
+ fields are written only when a positive cost_usd or non-zero token counts
793
+ were captured."""
794
+ if "duration_seconds" in cum_usage:
795
+ backend.set_wu(wu, "duration_seconds",
796
+ round(cum_usage["duration_seconds"], 3))
797
+ if cum_usage.get("cost_usd", 0) <= 0 and not cum_usage.get("input_tokens") \
798
+ and not cum_usage.get("output_tokens"):
799
+ return
800
+ backend.set_wu(wu, "cost_usd", round(cum_usage["cost_usd"], 6))
801
+ backend.set_wu(wu, "input_tokens", cum_usage["input_tokens"])
802
+ backend.set_wu(wu, "output_tokens", cum_usage["output_tokens"])
803
+
804
+
805
+ def detect_rearm_dispatch(wu: WorkUnit) -> bool:
806
+ """Return True when wu is a re-arm dispatch whose prior cycle's cost has
807
+ not yet been folded into the cumulative accumulators.
808
+
809
+ Reads re_arm_count and cost_usd from the WU's on-disk frontmatter because
810
+ load_wu does not load those fields into the WorkUnit object.
811
+ Returns False for first-time dispatches (re_arm_count absent or 0) and for
812
+ re-arms where cost was already folded (cost_usd == 0 after a prior fold).
813
+ """
814
+ fm, _ = read_frontmatter(wu.file)
815
+ re_arm_count = fm.get("re_arm_count", 0)
816
+ if not isinstance(re_arm_count, int) or re_arm_count <= 0:
817
+ return False
818
+ cost_usd = fm.get("cost_usd", 0)
819
+ return isinstance(cost_usd, (int, float)) and float(cost_usd) > 0
820
+
821
+
822
+ def fold_cumulative_on_rearm(wu: WorkUnit, backend: Backend) -> None:
823
+ """Fold the prior dispatch cycle's cost/token/duration into cumulative fields.
824
+
825
+ Called once per re-arm before the new cycle's attempt loop begins.
826
+ Reads per-cycle fields (cost_usd, duration_seconds, input_tokens,
827
+ output_tokens) written by the prior write_cost_to_wu call, accumulates
828
+ them into cumulative_* counterparts (initialising to 0 when absent), then
829
+ resets the per-cycle fields so the new cycle's write_cost_to_wu starts
830
+ from zero.
831
+
832
+ Backward-compatible: existing WUs with no cumulative_* fields initialise
833
+ from 0 — no KeyError on first re-arm of a WU that pre-dates this contract.
834
+ """
835
+ fm, _ = read_frontmatter(wu.file)
836
+ prior_cost = float(fm.get("cost_usd") or 0)
837
+ prior_duration = float(fm.get("duration_seconds") or 0)
838
+ prior_input = int(fm.get("input_tokens") or 0)
839
+ prior_output = int(fm.get("output_tokens") or 0)
840
+
841
+ cum_cost = float(fm.get("cumulative_cost_usd") or 0) + prior_cost
842
+ cum_duration = float(fm.get("cumulative_duration_seconds") or 0) + prior_duration
843
+ cum_input = int(fm.get("cumulative_input_tokens") or 0) + prior_input
844
+ cum_output = int(fm.get("cumulative_output_tokens") or 0) + prior_output
845
+
846
+ backend.set_wu(wu, "cumulative_cost_usd", round(cum_cost, 6))
847
+ backend.set_wu(wu, "cumulative_duration_seconds", round(cum_duration, 3))
848
+ backend.set_wu(wu, "cumulative_input_tokens", cum_input)
849
+ backend.set_wu(wu, "cumulative_output_tokens", cum_output)
850
+
851
+ # Reset per-cycle fields so the new cycle's write_cost_to_wu starts clean.
852
+ backend.set_wu(wu, "cost_usd", 0.0)
853
+ backend.set_wu(wu, "duration_seconds", 0.0)
854
+ backend.set_wu(wu, "input_tokens", 0)
855
+ backend.set_wu(wu, "output_tokens", 0)
856
+
857
+
858
+ def gate_budget_usd(gate_file: Path) -> float | None:
859
+ """Return the optional cumulative-cost ceiling declared on a GATE.md.
860
+
861
+ Reads `cost_budget_usd` from the GATE file's frontmatter. Returns the float
862
+ when set, None when the field is absent. A present-but-non-numeric value is
863
+ a configuration error and raises ValueError naming the gate file — the
864
+ fail-loud posture matches verify()'s missing-gate-set treatment.
865
+ """
866
+ fm, _ = read_frontmatter(gate_file)
867
+ if "cost_budget_usd" not in fm:
868
+ return None
869
+ val = fm["cost_budget_usd"]
870
+ if isinstance(val, bool) or not isinstance(val, (int, float)):
871
+ raise ValueError(
872
+ f"{gate_file}: cost_budget_usd must be numeric, got {val!r}"
873
+ )
874
+ return float(val)
875
+
876
+
877
+ def gate_spent_usd(plan: dict, gate: dict, feature_dir: Path) -> float:
878
+ """Sum cost_usd across the gate's done WUs (closing-sequence included).
879
+
880
+ Reads each WU file's frontmatter from `gate["work_units"]` and adds
881
+ `cost_usd` when the WU's status is "done". WUs whose frontmatter omits
882
+ cost_usd — cost tracking off, or the attempt didn't record a cost —
883
+ contribute 0.0. `plan` is the feature frontmatter dict and is accepted for
884
+ signature symmetry with the broader gate-budget helpers; the spent total
885
+ is derived from WU files alone.
886
+ """
887
+ del plan # signature symmetry — sum is derived from WU files only
888
+ total = 0.0
889
+ for ref in gate.get("work_units") or []:
890
+ wu_file = ref.get("file")
891
+ if not wu_file:
892
+ continue
893
+ wu_path = feature_dir / wu_file
894
+ if not wu_path.exists():
895
+ continue
896
+ fm, _ = read_frontmatter(wu_path)
897
+ if fm.get("status") != "done":
898
+ continue
899
+ cost = fm.get("cost_usd")
900
+ if isinstance(cost, bool):
901
+ continue
902
+ if isinstance(cost, (int, float)):
903
+ total += float(cost)
904
+ return total
905
+
906
+
907
+ def _should_halt_for_budget(plan: dict, gate: dict, feature_dir: Path) -> bool:
908
+ """Run-loop predicate: should the per-gate budget brake fire before the
909
+ next WU dispatch? True when a budget is declared and the gate's spent
910
+ total has reached or exceeded it. False otherwise (including no budget)."""
911
+ gate_file = feature_dir / gate["file"]
912
+ budget = gate_budget_usd(gate_file)
913
+ if budget is None:
914
+ return False
915
+ return gate_spent_usd(plan, gate, feature_dir) >= budget
916
+
917
+
918
+ class BookkeepingCommitError(RuntimeError):
919
+ """Raised when commit_bookkeeping's `git commit` is rejected (non-zero exit).
920
+
921
+ Sibling of SquashCommitError (issue #51) for the driver's bookkeeping
922
+ commits (gate status flips + events.jsonl audit). Before this, the
923
+ bookkeeping commit used `check=True` and a pre-commit hook rejection escaped
924
+ run()/main() as a bare CalledProcessError with git's stderr swallowed — an
925
+ unhandled traceback. It now raises this readable error carrying git's
926
+ stderr instead. Surfaced FEAT-2026-0024: a leak-scan FINDINGS line quoting
927
+ `git@github.com` was captured into events.jsonl and re-tripped the hook on
928
+ the awaiting_review bookkeeping commit (the address is now allowlisted; this
929
+ guard remains so a genuine bookkeeping leak fails loud, not cryptic).
930
+ """
931
+
932
+
933
+ def commit_bookkeeping(paths: list, message: str) -> str | None:
934
+ """Stage specific paths and create a chore(loop) bookkeeping commit.
935
+
936
+ Used for state we want durable that is NOT part of a WU's squash commit:
937
+ the WU's `blocked_human` status flip, the events.jsonl append for that
938
+ block, the gate's `awaiting_review` status flip, and (on spinning) the
939
+ per-attempt failure notes flushed out of memory.
940
+
941
+ The bug this exists to prevent: writes to the working tree don't survive
942
+ a subsequent `git reset --hard`. Status flips written but not committed
943
+ silently revert. Anything that should persist must be committed.
944
+
945
+ No-op if nothing to commit (path missing or no diff).
946
+ """
947
+ existing = [str(p) for p in paths if Path(p).exists()]
948
+ if not existing:
949
+ return None
950
+ # -f: caller curates the path list (driver-managed bookkeeping state); some
951
+ # paths intentionally live under `.specfuse/**/work/` which the scaffold
952
+ # gitignores. Force-add bypasses the ignore for these known paths only.
953
+ git("add", "-f", *existing)
954
+ if not git("status", "--porcelain"):
955
+ return None # all paths were already in their committed state
956
+ res = subprocess.run(
957
+ ["git", "commit", "-m", message], capture_output=True, text=True,
958
+ )
959
+ if res.returncode != 0:
960
+ raise BookkeepingCommitError(
961
+ f"bookkeeping commit was rejected (exit {res.returncode}) — "
962
+ f"usually a pre-commit hook rejecting the staged bookkeeping state.\n"
963
+ f"--- git stderr ---\n{res.stderr.strip()}\n"
964
+ f"--- git stdout ---\n{res.stdout.strip()}"
965
+ )
966
+ return git("rev-parse", "HEAD")
967
+
968
+
969
+ def reset_preserving_events(head_before: str, events_path: Path) -> None:
970
+ """`git reset --hard <head_before>` without losing events.jsonl content.
971
+
972
+ The hard-reset is the methodology's "wipe agent's edits before we write our
973
+ bookkeeping" move. But events.jsonl can carry flushed-but-not-yet-committed
974
+ entries from a PRIOR WU whose flush happened after its squash commit (the
975
+ passed path flushes events AFTER the squash). Those entries sit on disk
976
+ waiting for the NEXT WU's `commit_bookkeeping` to capture them. A bare
977
+ `git reset --hard` between WUs rolls events.jsonl back to its last-
978
+ committed state, silently dropping the prior WU's lifecycle events.
979
+
980
+ Surfaced FEAT-2026-0015/T02 (commits 52a176a / 74d1911): T02 ran clean,
981
+ its task_started + task_completed events were flushed post-squash, then
982
+ T03 blocked → bare hard-reset wiped them. Same loss recurred when T02H
983
+ completed clean and T03 was re-armed.
984
+
985
+ This helper:
986
+ 1. Reads events.jsonl content (if any) into memory.
987
+ 2. Performs the hard-reset (drops the agent's working-tree edits).
988
+ 3. Writes the preserved events.jsonl back to disk.
989
+
990
+ Subsequent `flush_events` calls then append to the preserved content;
991
+ `commit_bookkeeping` captures the full history.
992
+ """
993
+ saved = events_path.read_text() if events_path.is_file() else None
994
+ git("reset", "--hard", head_before)
995
+ if saved is not None:
996
+ events_path.write_text(saved)
997
+
998
+
999
+ class SquashCommitError(RuntimeError):
1000
+ """Raised when squash_commit's `git commit` is rejected (non-zero exit).
1001
+
1002
+ The usual cause is a pre-commit hook (e.g. the leak-scan hook) rejecting the
1003
+ squash. The message carries git's stderr/stdout — which `capture_output`
1004
+ would otherwise swallow — so the caller can record an actionable failure
1005
+ note instead of crashing on a bare CalledProcessError. See issue #51.
1006
+ """
1007
+
1008
+
1009
+ def squash_commit(wu: WorkUnit, head_before: str) -> str | None:
1010
+ if git("rev-parse", "HEAD") != head_before:
1011
+ git("reset", "--soft", head_before) # fold away any commits the agent made
1012
+ if not git("status", "--porcelain"):
1013
+ return None
1014
+ git("add", "-A")
1015
+ msg = f"feat: {wu.title}\n\nFeature: {wu.wu_id}"
1016
+ res = subprocess.run(
1017
+ ["git", "commit", "-m", msg], capture_output=True, text=True,
1018
+ )
1019
+ if res.returncode != 0:
1020
+ raise SquashCommitError(
1021
+ f"git commit for {wu.wu_id} was rejected (exit {res.returncode}) — "
1022
+ f"usually a pre-commit hook rejecting the squash.\n"
1023
+ f"--- git stderr ---\n{res.stderr.strip()}\n"
1024
+ f"--- git stdout ---\n{res.stdout.strip()}"
1025
+ )
1026
+ return git("rev-parse", "HEAD")
1027
+
1028
+
1029
+ # --------------------------------------------------------------------------- #
1030
+ # Dispatch + verification #
1031
+ # --------------------------------------------------------------------------- #
1032
+
1033
+ PROMPT_PREAMBLE = """\
1034
+ You are executing a single Specfuse work unit. Read .specfuse/rules/ in full before \
1035
+ acting; they are binding. Do NOT run any git command — the driver owns all commits \
1036
+ and bookkeeping. Edit files only. End your turn with the RESULT block defined in \
1037
+ .specfuse/rules/result-contract.md. Verification is run by the driver, not by you; \
1038
+ report honestly.
1039
+ """
1040
+
1041
+ CAVEMAN_DIRECTIVE = """\
1042
+ ## Output terseness directive
1043
+ Drop articles (a/an/the), filler words (just/really/basically/actually/simply), \
1044
+ pleasantries (sure/certainly/of course/happy to), and hedging. \
1045
+ Avoid prose narration between tool calls. \
1046
+ Omit any end-of-turn summary. \
1047
+ Write code blocks and the fenced RESULT block normally — do not abbreviate them. \
1048
+ Quote error strings exactly as they appear.
1049
+ """
1050
+
1051
+ _CAVEMAN_EFFORT = frozenset({"low", "medium"})
1052
+
1053
+
1054
+ def truncate_failure_note(note: str, max_lines: int = 200,
1055
+ max_chars: int = 8000) -> str:
1056
+ """Return note unchanged when within limits; otherwise head+marker+tail.
1057
+
1058
+ Splits budget 50/50 by line count, clamped by char budget too. Marker is
1059
+ plain ASCII with no triple-backtick so RESULT-block parsing is unaffected.
1060
+ """
1061
+ if len(note) <= max_chars and note.count("\n") < max_lines:
1062
+ return note
1063
+ lines = note.splitlines()
1064
+ n = len(lines)
1065
+ line_budget = min(max_lines, n - 1)
1066
+ head_count = line_budget // 2
1067
+ tail_count = line_budget - head_count
1068
+ half_char_budget = max_chars // 2
1069
+ while head_count > 0 and sum(len(ln) + 1 for ln in lines[:head_count]) > half_char_budget:
1070
+ head_count -= 1
1071
+ while tail_count > 0 and sum(len(ln) + 1 for ln in lines[n - tail_count:]) > half_char_budget:
1072
+ tail_count -= 1
1073
+ head_lines = lines[:head_count]
1074
+ tail_lines = lines[n - tail_count:] if tail_count > 0 else []
1075
+ elided_lines = n - head_count - tail_count
1076
+ elided_chars = len(note) - sum(len(ln) + 1 for ln in head_lines) \
1077
+ - sum(len(ln) + 1 for ln in tail_lines)
1078
+ marker = f"\n... [{elided_lines} lines / {elided_chars} chars elided] ...\n"
1079
+ return "\n".join(head_lines) + marker + "\n".join(tail_lines)
1080
+
1081
+
1082
+ def dispatch(wu: WorkUnit, failure_note: str | None,
1083
+ cost_tracking: bool = True) -> tuple[str, dict | None]:
1084
+ """Run a fresh agent session for this WU.
1085
+
1086
+ When `cost_tracking` is True (default), requests JSON output from
1087
+ `claude -p` so the cost / token-usage block can be extracted. Returns
1088
+ (result_text, usage_dict_or_None). On any JSON parse failure or
1089
+ unexpected shape, usage is None — the result_text is still returned so
1090
+ the RESULT-block parser and verify() can do their normal work.
1091
+ """
1092
+ preamble = (PROMPT_PREAMBLE + "\n\n" + CAVEMAN_DIRECTIVE
1093
+ if wu.effort in _CAVEMAN_EFFORT else PROMPT_PREAMBLE)
1094
+ prompt = preamble + "\n\n" + wu.body
1095
+ if failure_note:
1096
+ prompt += ("\n\n## Previous attempt failed verification\n"
1097
+ "A prior fresh attempt failed the gates below. Diagnose and fix; "
1098
+ "do not repeat the same approach.\n\n"
1099
+ + truncate_failure_note(failure_note))
1100
+ cmd = [p.replace("{model}", wu.model).replace("{effort}", wu.effort)
1101
+ for p in CLAUDE_CMD]
1102
+ if wu.unsandboxed:
1103
+ # Per-WU sandbox-escape. Audited via the unsandboxed_dispatch event
1104
+ # emitted in run()'s attempt loop; rationale lives in WU frontmatter.
1105
+ # Inserted after `-p` so it composes with --model/--effort/--output-format.
1106
+ cmd.insert(2, "--dangerously-skip-permissions")
1107
+ if cost_tracking:
1108
+ cmd += ["--output-format", "json"]
1109
+ proc = subprocess.run(cmd, input=prompt, capture_output=True, text=True)
1110
+ raw = proc.stdout or ""
1111
+ if not cost_tracking:
1112
+ return raw, None
1113
+ return parse_claude_json_output(raw)
1114
+
1115
+
1116
+ def parse_claude_json_output(raw: str) -> tuple[str, dict | None]:
1117
+ """Parse Claude CLI's `--output-format=json` envelope.
1118
+
1119
+ Tolerant: any shape drift returns (raw, None) so the caller falls back
1120
+ to text-mode RESULT-block parsing. Extracts `total_cost_usd`,
1121
+ `input_tokens`, `output_tokens`, and cache-token counts when present.
1122
+ """
1123
+ try:
1124
+ data = json.loads(raw)
1125
+ except (json.JSONDecodeError, ValueError):
1126
+ return raw, None
1127
+ if not isinstance(data, dict):
1128
+ return raw, None
1129
+ result_text = data.get("result", "")
1130
+ if not isinstance(result_text, str):
1131
+ result_text = raw
1132
+ usage: dict = {}
1133
+ cost = data.get("total_cost_usd")
1134
+ if isinstance(cost, (int, float)):
1135
+ usage["cost_usd"] = float(cost)
1136
+ u = data.get("usage")
1137
+ if isinstance(u, dict):
1138
+ for key in ("input_tokens", "output_tokens",
1139
+ "cache_read_input_tokens", "cache_creation_input_tokens"):
1140
+ if isinstance(u.get(key), int):
1141
+ usage[key] = u[key]
1142
+ return result_text, (usage if usage else None)
1143
+
1144
+
1145
+ def verify_files_changed(result: dict, head_before: str) -> list[str]:
1146
+ """Return claimed `files_changed` paths that show no diff against head_before.
1147
+
1148
+ The RESULT-block contract lets the agent declare which paths its work
1149
+ touched. This guard, run before squash_commit, checks each claimed path
1150
+ actually differs from HEAD's pre-attempt SHA. A path that does not exist
1151
+ on disk is reported as "unchanged" — it cannot have a diff to commit.
1152
+
1153
+ Returns an empty list when all claimed paths show real diffs, OR when
1154
+ `files_changed` is absent / empty (the opt-out: pre-existing WUs and the
1155
+ worked example do not always declare it; absence MUST NOT fire the
1156
+ guard).
1157
+
1158
+ See FEAT-2026-0008 / RETROSPECTIVE for the failure mode this exists to
1159
+ catch — T04 and T08 of FEAT-2026-0007 declared files_changed naming
1160
+ source paths their attempts never touched.
1161
+ """
1162
+ paths = result.get("files_changed") or []
1163
+ if not isinstance(paths, list) or not paths:
1164
+ return []
1165
+ unchanged: list[str] = []
1166
+ for raw in paths:
1167
+ path = str(raw)
1168
+ if not Path(path).exists():
1169
+ unchanged.append(path)
1170
+ continue
1171
+ rc = subprocess.run(
1172
+ ["git", "diff", "--quiet", head_before, "--", path],
1173
+ capture_output=True,
1174
+ ).returncode
1175
+ if rc == 0:
1176
+ # `git diff` only sees tracked content — a freshly created file
1177
+ # is invisible to it even though it's a real change vs
1178
+ # head_before. Probe ls-files --others to catch the
1179
+ # newly-created-untracked case; without this, agent-created new
1180
+ # files (.tf, .sh, .md the WU just added) get flagged as
1181
+ # "unchanged" and the WU spins to blocked_human even though the
1182
+ # deliverable is present and correct.
1183
+ ls = subprocess.run(
1184
+ ["git", "ls-files", "--others", "--exclude-standard",
1185
+ "--", path],
1186
+ capture_output=True, text=True,
1187
+ ).stdout.strip()
1188
+ if not ls:
1189
+ unchanged.append(path)
1190
+ return unchanged
1191
+
1192
+
1193
+ # Smoke-import runner (FEAT-2026-0008/T03). The conservative pattern matches
1194
+ # ONLY a `python3 -c "from X import Y"` line. The agent-authored WU body may
1195
+ # declare an existence check naming new symbols this WU just minted; the
1196
+ # driver runs each match after a successful verify() + squash and rolls back
1197
+ # the squash if any smoke import raises. Free-form `python3 -c` lines are
1198
+ # NOT executed — running arbitrary agent-authored Python in the driver
1199
+ # process would be a security regression (see WU escalation trigger 2).
1200
+ SMOKE_IMPORT_RE = re.compile(
1201
+ r'''^\s*python3?\s+-c\s+(["'])from\s+\S+\s+import\s+\S+\1\s*$'''
1202
+ )
1203
+
1204
+
1205
+ def extract_smoke_imports(wu_body: str) -> list[str]:
1206
+ """Return WU-body lines matching the conservative import-smoke pattern.
1207
+
1208
+ Each returned element is the full command string ready for
1209
+ `subprocess.run(shell=True, ...)`. Order preserved. Lines that look
1210
+ similar but do not match — `python -c "import X"`, `python -c
1211
+ "print(...)"`, prose — are skipped.
1212
+ """
1213
+ out: list[str] = []
1214
+ for line in wu_body.splitlines():
1215
+ if SMOKE_IMPORT_RE.match(line):
1216
+ out.append(line.strip())
1217
+ return out
1218
+
1219
+
1220
+ def run_smoke_imports(commands: list[str], cwd: Path) -> tuple[bool, str]:
1221
+ """Run each smoke-import command in `cwd` in declared order.
1222
+
1223
+ Returns `(True, "")` if every command exits 0. On the first non-zero
1224
+ exit, returns `(False, summary)` where `summary` names the failing
1225
+ command and its stderr — short, suitable for an event payload and a
1226
+ retry failure_note. Subsequent commands are not run; one failure is
1227
+ enough to fail the attempt.
1228
+
1229
+ Inherits the driver's PATH so the active venv's `python3` resolves
1230
+ (the methodology requires the driver to be invoked from within an
1231
+ active venv per `[loop-driver-operation]`).
1232
+ """
1233
+ for cmd in commands:
1234
+ proc = subprocess.run( # nosec B602
1235
+ cmd, shell=True, capture_output=True, text=True, cwd=str(cwd),
1236
+ )
1237
+ if proc.returncode != 0:
1238
+ summary = (
1239
+ f"smoke import failed (exit {proc.returncode}):\n"
1240
+ f" $ {cmd}\n"
1241
+ f"stderr:\n{proc.stderr.strip()}"
1242
+ )
1243
+ return False, summary
1244
+ return True, ""
1245
+
1246
+
1247
+ def is_zero_token_attempt(usage: dict | None) -> bool:
1248
+ """Did the dispatched session bill zero input tokens?
1249
+
1250
+ Returns True iff `usage` is a dict whose `input_tokens` key is exactly 0.
1251
+ A zero-token attempt means the agent never produced output (often due to a
1252
+ transient CLI / quota / connectivity failure that the SDK reports as a
1253
+ success with empty content); its RESULT block — if present — is
1254
+ hallucinated upstream and must not be trusted.
1255
+
1256
+ Returns False for `usage is None` (cost tracking disabled — preserve prior
1257
+ behavior for users who opt out), for a dict missing `input_tokens`, and
1258
+ for any positive integer. The guard is opt-in via the cost-tracking flag:
1259
+ when the operator runs with cost tracking off, `dispatch()` always returns
1260
+ `usage=None` and this function always returns False.
1261
+
1262
+ See FEAT-2026-0008 / RETROSPECTIVE for the failure mode this exists to
1263
+ catch — a zero-token attempt in FEAT-2026-0007/T08H landed `status: done`
1264
+ despite the agent never running.
1265
+ """
1266
+ if not isinstance(usage, dict):
1267
+ return False
1268
+ return usage.get("input_tokens") == 0
1269
+
1270
+
1271
+ RESULT_BLOCK_RE = re.compile(r"```result\s*\n(.*?)\n```", re.DOTALL)
1272
+
1273
+
1274
+ def parse_result_block(stdout: str) -> dict | None:
1275
+ """Return the parsed final ```result``` block from stdout, or None.
1276
+
1277
+ The result-contract rule (`.specfuse/rules/result-contract.md`) requires the
1278
+ agent to end its turn with a single fenced `result` block. Be forgiving:
1279
+ agents may discuss before it, may emit other fenced blocks elsewhere, may
1280
+ produce malformed YAML. Any of those returns None and the caller falls back
1281
+ to verify() as the exit oracle. Crashing the loop on a garbled agent output
1282
+ would defeat the purpose of having a separate oracle in the first place.
1283
+ """
1284
+ if not stdout:
1285
+ return None
1286
+ matches = list(RESULT_BLOCK_RE.finditer(stdout))
1287
+ if not matches:
1288
+ return None
1289
+ body = matches[-1].group(1) # LAST result block — agents may discuss before it
1290
+ try:
1291
+ parsed = _miniyaml.parse(body)
1292
+ except Exception: # noqa: BLE001 - intentional: see comment below
1293
+ # Broad catch is deliberate AND scoped to this site only. The agent's
1294
+ # stdout is the least-trusted input in the system (free-form LLM text
1295
+ # supposedly ending in a fenced result block); the forgiving contract
1296
+ # here is "anything malformed degrades to verify() decides, never
1297
+ # crashes the driver." A MiniYAMLError covers documented-subset
1298
+ # violations, but the parser is hand-rolled and could in principle
1299
+ # raise IndexError/ValueError/etc. on a sufficiently weird input —
1300
+ # those must also degrade, not crash a real driver run.
1301
+ # Every OTHER _miniyaml.parse site (read_frontmatter, load_graph,
1302
+ # load_verification, and the linter) reads operator-authored config
1303
+ # and intentionally keeps the strict MiniYAMLError-only handling so
1304
+ # malformed config files fail loudly, per verify()'s fail-closed
1305
+ # philosophy. Do not broaden those.
1306
+ return None
1307
+ return parsed if isinstance(parsed, dict) else None
1308
+
1309
+
1310
+ def agent_reported_blocked(stdout: str) -> tuple[bool, str | None]:
1311
+ """Did the agent explicitly emit `status: blocked` in its RESULT block?
1312
+
1313
+ Returns (True, blocked_reason) only when a well-formed block names
1314
+ `status: blocked`. Missing block, malformed block, or any other status
1315
+ falls through to (False, None) — the driver then runs verify() as usual.
1316
+ """
1317
+ parsed = parse_result_block(stdout)
1318
+ if not parsed or parsed.get("status") != "blocked":
1319
+ return False, None
1320
+ reason = parsed.get("blocked_reason")
1321
+ return True, (str(reason) if reason is not None else None)
1322
+
1323
+
1324
+ def load_verification() -> dict:
1325
+ if not VERIFICATION_PATH.exists():
1326
+ return {}
1327
+ return _miniyaml.parse(VERIFICATION_PATH.read_text()) or {}
1328
+
1329
+
1330
+ def verify(wu: WorkUnit, feature_dir: Path,
1331
+ cfg: dict | None = None) -> tuple[bool, str]:
1332
+ """Driver runs the gates itself — the exit oracle. Agent self-report is advisory.
1333
+
1334
+ Empty or missing gate set for the WU's type is a CONFIGURATION failure (not a
1335
+ pass): a misconfigured verification.yml must not silently let work through.
1336
+ The failure message names the configuration cause so a human reading the log
1337
+ knows to fix verification.yml, not the work unit. `cfg` is injectable for
1338
+ testing; in production it is read from VERIFICATION_PATH.
1339
+ """
1340
+ if cfg is None:
1341
+ cfg = load_verification()
1342
+ set_name = GATES_FOR_TYPE.get(wu.type, "code")
1343
+ gate_set = cfg.get(set_name) or []
1344
+ if not gate_set:
1345
+ return False, (
1346
+ f"CONFIGURATION ERROR: no '{set_name}' gates configured in "
1347
+ f".specfuse/verification.yml for work-unit type '{wu.type}'. "
1348
+ f"This is not a work-unit failure — fix verification.yml and re-run."
1349
+ )
1350
+ # Union any author-declared extra_gates sets onto the type-selected set,
1351
+ # deduping by gate name so a set shared between the type default and an extra
1352
+ # entry is not run twice (issue #62). An extra_gates name absent from
1353
+ # verification.yml is a CONFIGURATION ERROR — same class as an empty type set,
1354
+ # never a silent pass.
1355
+ gate_set = list(gate_set)
1356
+ seen_names = {g["name"] for g in gate_set}
1357
+ for extra_name in wu.extra_gates:
1358
+ if extra_name == set_name:
1359
+ continue # already the type-selected set
1360
+ extra_set = cfg.get(extra_name)
1361
+ if not extra_set:
1362
+ return False, (
1363
+ f"CONFIGURATION ERROR: work unit declares `extra_gates: "
1364
+ f"[{extra_name}]` but no '{extra_name}' gates are configured in "
1365
+ f".specfuse/verification.yml. This is not a work-unit failure — "
1366
+ f"fix verification.yml (or the WU's extra_gates) and re-run."
1367
+ )
1368
+ for gate in extra_set:
1369
+ if gate["name"] in seen_names:
1370
+ continue
1371
+ seen_names.add(gate["name"])
1372
+ gate_set.append(gate)
1373
+ results, ok_all = [], True
1374
+ for gate in gate_set:
1375
+ command = gate["command"].replace("{feature_dir}", str(feature_dir))
1376
+ # shell=True is intentional: gate commands are authored by the user in
1377
+ # verification.yml and routinely use shell features (pipes, &&, glob,
1378
+ # redirects — e.g. `dotnet build && dotnet test --no-build`). The input
1379
+ # is the project's own config, not untrusted external data.
1380
+ proc = subprocess.run( # nosec B602
1381
+ command, shell=True, capture_output=True, text=True,
1382
+ )
1383
+ ok = proc.returncode == 0
1384
+ ok_all = ok_all and ok
1385
+ tail = (proc.stdout + proc.stderr).strip().splitlines()[-15:]
1386
+ results.append(f"### {gate['name']}: {'PASS' if ok else 'FAIL'}\n"
1387
+ f"```\n$ {command}\n" + "\n".join(tail) + "\n```")
1388
+ return ok_all, "\n\n".join(results)
1389
+
1390
+
1391
+ def execute_unit_attempt(
1392
+ wu: WorkUnit,
1393
+ feature_dir: Path,
1394
+ failure_note: str | None,
1395
+ *,
1396
+ dispatch_fn=None,
1397
+ verify_fn=None,
1398
+ cost_tracking: bool = True,
1399
+ head_before: str | None = None,
1400
+ ) -> tuple[str, object, dict | None]:
1401
+ """One dispatch + parse + (if not blocked) verify cycle.
1402
+
1403
+ Factored out of run() so the parse-and-decision logic is unit-testable
1404
+ without spawning a real agent — pass stub callables for dispatch_fn and
1405
+ verify_fn from a test.
1406
+
1407
+ Returns (outcome, payload, usage) where outcome is one of:
1408
+ "zero_token" — usage reports input_tokens=0 (agent never
1409
+ ran); payload is None
1410
+ "blocked" — agent explicitly emitted status: blocked
1411
+ "passed" — verify() passed AND the files_changed
1412
+ guard found nothing to flag
1413
+ "failed" — verify() failed
1414
+ "files_changed_mismatch" — verify() passed but the RESULT's
1415
+ files_changed list names paths that show
1416
+ no diff against head_before; payload is
1417
+ the list of unchanged paths
1418
+
1419
+ `usage` is the per-attempt cost/token dict from the agent dispatch when
1420
+ `cost_tracking` is True and the agent returned a parseable usage block;
1421
+ None otherwise (or when the dispatch_fn stub returns a plain string).
1422
+
1423
+ Backward-compatible dispatch_fn contract: stubs may return either a
1424
+ plain `str` (treated as text-only, usage=None) or `(str, dict|None)`.
1425
+
1426
+ `head_before` is the pre-attempt HEAD SHA the files_changed guard
1427
+ diffs against. None disables the guard — preserved for unit tests that
1428
+ exercise this function in isolation without a git working tree.
1429
+ """
1430
+ if verify_fn is None:
1431
+ verify_fn = verify
1432
+ if dispatch_fn is None:
1433
+ result = dispatch(wu, failure_note, cost_tracking)
1434
+ else:
1435
+ result = dispatch_fn(wu, failure_note)
1436
+ if isinstance(result, tuple):
1437
+ stdout, usage = result
1438
+ else:
1439
+ stdout, usage = result, None
1440
+ # Zero-token guard runs BEFORE RESULT-block parsing: the agent did not
1441
+ # produce output, so any block in stdout is hallucinated upstream and
1442
+ # must not be trusted (FEAT-2026-0008/T01). Opt-in via cost tracking —
1443
+ # when disabled, usage is None and is_zero_token_attempt returns False.
1444
+ if is_zero_token_attempt(usage):
1445
+ return "zero_token", None, usage
1446
+ is_blocked, reason = agent_reported_blocked(stdout or "")
1447
+ if is_blocked:
1448
+ return "blocked", reason, usage
1449
+ passed, evidence = verify_fn(wu, feature_dir)
1450
+ if not passed:
1451
+ return "failed", evidence, usage
1452
+ # files_changed guard (FEAT-2026-0008/T02): the agent's RESULT claim
1453
+ # gets diffed against head_before BEFORE squash_commit. A non-empty
1454
+ # mismatch flags the attempt as a verification failure even though
1455
+ # verify() reported PASS — gates can't see "the diff is empty" when
1456
+ # the gate commands operate on files unrelated to the WU's scope.
1457
+ if head_before is not None:
1458
+ parsed = parse_result_block(stdout or "")
1459
+ if parsed:
1460
+ unchanged = verify_files_changed(parsed, head_before)
1461
+ if unchanged:
1462
+ return "files_changed_mismatch", unchanged, usage
1463
+ return "passed", evidence, usage
1464
+
1465
+
1466
+ # --------------------------------------------------------------------------- #
1467
+ # Roadmap row parser (header-name based) — issue #15 #
1468
+ # --------------------------------------------------------------------------- #
1469
+
1470
+
1471
+ def _parse_roadmap_row(roadmap_text: str, feature_id: str) -> dict | None:
1472
+ """Find feature_id's row in roadmap.md and return columns mapped by header name.
1473
+
1474
+ Looks up the first markdown table header row containing a 'Status' cell,
1475
+ parses column names by name (not by positional index), then finds the
1476
+ feature_id data row after the header. Tolerates any column count and any
1477
+ ordering, including project-specific columns like 'Priority' or 'Budget'.
1478
+
1479
+ Returns a dict on success:
1480
+ 'columns': {col_name: stripped_value, ...}
1481
+ 'cell_spans': {col_name: (start, end), ...} absolute offsets into
1482
+ roadmap_text spanning the BETWEEN-PIPES content (suitable
1483
+ for whole-cell rewrites).
1484
+ 'row_span': (start, end) absolute offsets of the full row line.
1485
+
1486
+ Returns None if no table header containing 'Status' is found, or if no
1487
+ feature_id row exists after that header.
1488
+ """
1489
+ # Locate the table header — a line `| col1 | col2 | ... |` whose cells
1490
+ # include the literal 'Status'. The header row appears immediately above
1491
+ # the markdown separator line; we use 'Status' in its cells as the marker.
1492
+ header_re = re.compile(r"^\|([^\n]*)\|\s*$", re.MULTILINE)
1493
+ header_m = None
1494
+ col_names: list[str] = []
1495
+ for m in header_re.finditer(roadmap_text):
1496
+ cells = [c.strip() for c in m.group(1).split("|")]
1497
+ if "Status" in cells:
1498
+ header_m = m
1499
+ col_names = cells
1500
+ break
1501
+ if header_m is None:
1502
+ return None
1503
+
1504
+ # Locate the feature_id data row AFTER the header.
1505
+ row_re = re.compile(
1506
+ r"^\|\s*" + re.escape(feature_id) + r"\s*\|[^\n]*$",
1507
+ re.MULTILINE,
1508
+ )
1509
+ row_m = row_re.search(roadmap_text, pos=header_m.end())
1510
+ if not row_m:
1511
+ return None
1512
+
1513
+ row_text = row_m.group(0)
1514
+ row_start_abs = row_m.start()
1515
+
1516
+ # Pipe positions inside the row identify cell boundaries.
1517
+ pipes = [i for i, ch in enumerate(row_text) if ch == "|"]
1518
+ if len(pipes) < len(col_names) + 1:
1519
+ # Malformed row — fewer cells than the header declares.
1520
+ return None
1521
+
1522
+ columns: dict[str, str] = {}
1523
+ cell_spans: dict[str, tuple[int, int]] = {}
1524
+ for col_idx, col_name in enumerate(col_names):
1525
+ cell_start_rel = pipes[col_idx] + 1
1526
+ cell_end_rel = pipes[col_idx + 1]
1527
+ raw = row_text[cell_start_rel:cell_end_rel]
1528
+ columns[col_name] = raw.strip()
1529
+ cell_spans[col_name] = (
1530
+ row_start_abs + cell_start_rel,
1531
+ row_start_abs + cell_end_rel,
1532
+ )
1533
+
1534
+ return {
1535
+ "columns": columns,
1536
+ "cell_spans": cell_spans,
1537
+ "row_span": (row_start_abs, row_m.end()),
1538
+ }
1539
+
1540
+
1541
+ # --------------------------------------------------------------------------- #
1542
+ # Auto-archive helper #
1543
+ # --------------------------------------------------------------------------- #
1544
+
1545
+
1546
+ def auto_archive_feature(feature_id: str, repo_root: Path) -> str:
1547
+ """Re-implement roadmap-archive single-feature algorithm (Steps 1–6) in-driver.
1548
+
1549
+ Returns "archived", "already archived", or "refused: <reason>".
1550
+ No git operations; touches only roadmap.md and roadmap-archive.md under repo_root.
1551
+ """
1552
+ roadmap_path = repo_root / ".specfuse" / "roadmap.md"
1553
+ archive_path = repo_root / ".specfuse" / "roadmap-archive.md"
1554
+
1555
+ feat_id_lower = feature_id.lower()
1556
+ anchor = f'<a id="{feat_id_lower}"></a>'
1557
+ back_link = f'[→ archive](roadmap-archive.md#{feat_id_lower})'
1558
+ marker = "<!-- Archived sections appended below -->"
1559
+
1560
+ # Step 1 — read and validate table row (header-name based; issue #15)
1561
+ if not roadmap_path.exists():
1562
+ return f"refused: {roadmap_path} not found"
1563
+ roadmap_text = roadmap_path.read_text()
1564
+
1565
+ parsed = _parse_roadmap_row(roadmap_text, feature_id)
1566
+ if parsed is None:
1567
+ return f"refused: {feature_id} not found in roadmap"
1568
+
1569
+ status = parsed["columns"].get("Status", "")
1570
+ detail = parsed["columns"].get("Detail", "")
1571
+
1572
+ if "roadmap-archive.md#" in detail:
1573
+ return "already archived"
1574
+ if status not in ("done", "abandoned"):
1575
+ return f"refused: status={status}"
1576
+
1577
+ # Step 2 — extract inline section
1578
+ section_re = re.compile(
1579
+ r'^(## ' + re.escape(feature_id) + r'[^\n]*(?:\n(?!## )[^\n]*)*\n?)',
1580
+ re.MULTILINE,
1581
+ )
1582
+ section_m = section_re.search(roadmap_text)
1583
+ had_inline_section = section_m is not None
1584
+ if had_inline_section:
1585
+ section_text = section_m.group(1).rstrip('\n') + '\n'
1586
+ else:
1587
+ # Row-only feature: a roadmap table row exists (status done/abandoned,
1588
+ # Detail still '—' — the back-link case already returned at Step 1) but
1589
+ # there is no inline `## FEAT-ID` detail section to move. /draft-feature
1590
+ # emits a table row without a detail section, so an auto-closed feature
1591
+ # drafted that way reaches here. Returning "already archived" without
1592
+ # writing the anchor leaves assert_terminal_flips_fired unsatisfiable
1593
+ # and halts the driver on archive_anchor_missing (FEAT-2026-0022
1594
+ # surfaced this live). Synthesize a minimal stub section so the anchor
1595
+ # and back-link still materialize.
1596
+ title = parsed["columns"].get("Title", "").strip()
1597
+ heading = f"## {feature_id}" + (f" — {title}" if title else "")
1598
+ section_text = (
1599
+ f"{heading}\n\n"
1600
+ "_No inline detail section was recorded for this feature; "
1601
+ "stub written at archive time._\n"
1602
+ )
1603
+
1604
+ # Step 3 — append anchor + section to archive after marker.
1605
+ # Auto-create the archive file if a project never shipped it (the
1606
+ # roadmap-archive skill requires it to pre-exist; the unattended driver
1607
+ # must not crash on its absence — see FileNotFoundError on read_text).
1608
+ if not archive_path.exists():
1609
+ project = ""
1610
+ fm = re.match(r'^---\n(.*?)\n---', roadmap_text, re.DOTALL)
1611
+ if fm:
1612
+ pm = re.search(r'^project:\s*(.+)$', fm.group(1), re.MULTILINE)
1613
+ if pm:
1614
+ project = pm.group(1).strip()
1615
+ header = (
1616
+ (f"---\nproject: {project}\n---\n\n" if project else "")
1617
+ + "# Archived feature details\n\n"
1618
+ "This file holds the detail sections for features whose status has "
1619
+ "reached `done` or `abandoned`.\n\n"
1620
+ f"{marker}\n"
1621
+ )
1622
+ archive_path.write_text(header)
1623
+ archive_text = archive_path.read_text()
1624
+ if marker not in archive_text:
1625
+ return "refused: archive marker absent"
1626
+ marker_end = archive_text.index(marker) + len(marker)
1627
+ new_archive = archive_text[:marker_end] + f"\n{anchor}\n{section_text}" + archive_text[marker_end:]
1628
+ archive_path.write_text(new_archive)
1629
+
1630
+ # Step 4 — update Detail cell with back-link (skip if column absent; issue #15)
1631
+ if "Detail" in parsed["cell_spans"]:
1632
+ detail_start, detail_end = parsed["cell_spans"]["Detail"]
1633
+ roadmap_text = (
1634
+ roadmap_text[:detail_start] + f" {back_link} " + roadmap_text[detail_end:]
1635
+ )
1636
+
1637
+ # Step 5 — remove inline section (re-search since row update shifted
1638
+ # offsets). Only when one actually existed to move; a synthesized stub
1639
+ # was never in roadmap.md, so there is nothing to strip.
1640
+ if had_inline_section:
1641
+ section_m2 = section_re.search(roadmap_text)
1642
+ if section_m2:
1643
+ roadmap_text = roadmap_text[:section_m2.start()] + roadmap_text[section_m2.end():]
1644
+ roadmap_text = re.sub(r'\n{3,}', '\n\n', roadmap_text)
1645
+ roadmap_path.write_text(roadmap_text)
1646
+
1647
+ return "archived"
1648
+
1649
+
1650
+ def _legacy_4wu_terminal_close_complete(
1651
+ wu: "WorkUnit",
1652
+ units: "list[WorkUnit]",
1653
+ gate,
1654
+ gates: list,
1655
+ ) -> bool:
1656
+ """Detect legacy 4-WU close sequence completion on a terminal gate (issue #16).
1657
+
1658
+ Pre-FEAT-2026-0015 feature scaffolds use the four-WU closing sequence
1659
+ (`retrospective` → `lessons` → `docs` → `plan-next`). FEAT-2026-0015 wired
1660
+ `fire_terminal_flips` to fire only on `close`-type WUs, leaving the legacy
1661
+ sequence with no terminating-equivalent trigger. This helper recognizes
1662
+ completion of the 4-WU sequence as terminating-equivalent so the driver
1663
+ can fire `fire_terminal_flips` on the gate.
1664
+
1665
+ Returns True iff:
1666
+ - `wu.type == "plan-next"` (the last WU in the sequence)
1667
+ - `gate is gates[-1]` (terminal gate)
1668
+ - The gate's `units` include all four legacy types
1669
+ (`retrospective`, `lessons`, `docs`, `plan-next`) AND each is `done`.
1670
+ """
1671
+ if wu.type != "plan-next":
1672
+ return False
1673
+ if gate is not gates[-1]:
1674
+ return False
1675
+ required = {"retrospective", "lessons", "docs", "plan-next"}
1676
+ have_done = {u.type for u in units if u.type in required and u.status == DONE}
1677
+ return required.issubset(have_done)
1678
+
1679
+
1680
+ def fire_terminal_flips(wu: WorkUnit, feature_dir: Path, repo_root: Path) -> list[Path]:
1681
+ """Flip terminal gate → passed, roadmap row → done, call auto_archive_feature.
1682
+
1683
+ Called for close-type WUs after squash when verdict_permits_terminal_flips is True.
1684
+ Non-fatal: skips via logging, only raises on internal exceptions.
1685
+ Returns the Paths actually modified (for the bookkeeping commit add list).
1686
+ """
1687
+ modified: set[Path] = set()
1688
+ feature_id = wu.wu_id.rsplit("/", 1)[0]
1689
+
1690
+ _, gates = load_graph(feature_dir)
1691
+ if not gates:
1692
+ logging.warning("fire_terminal_flips: no gates in PLAN.md for %s", wu.wu_id)
1693
+ else:
1694
+ terminal_gate = gates[-1]
1695
+ gate_path = terminal_gate.file
1696
+ if not gate_path.exists():
1697
+ logging.warning(
1698
+ "fire_terminal_flips: terminal gate file absent: %s — skipping gate flip",
1699
+ gate_path,
1700
+ )
1701
+ else:
1702
+ current_gate_status = terminal_gate.status
1703
+ if current_gate_status == "passed":
1704
+ logging.info(
1705
+ "fire_terminal_flips: %s already passed — skipping gate flip",
1706
+ gate_path.name,
1707
+ )
1708
+ elif current_gate_status == "awaiting_review":
1709
+ write_frontmatter_field(gate_path, "status", "passed")
1710
+ modified.add(gate_path)
1711
+ else:
1712
+ logging.warning(
1713
+ "fire_terminal_flips: %s status is %r (not awaiting_review or passed)"
1714
+ " — skipping gate flip",
1715
+ gate_path.name,
1716
+ current_gate_status,
1717
+ )
1718
+
1719
+ roadmap_path = repo_root / ".specfuse" / "roadmap.md"
1720
+ if not roadmap_path.exists():
1721
+ logging.warning(
1722
+ "fire_terminal_flips: roadmap.md absent at %s — skipping row flip",
1723
+ roadmap_path,
1724
+ )
1725
+ else:
1726
+ # Header-name based parsing — tolerates projects with extra columns
1727
+ # (e.g. Priority). See issue #15.
1728
+ roadmap_text = roadmap_path.read_text()
1729
+ parsed = _parse_roadmap_row(roadmap_text, feature_id)
1730
+ if parsed is None:
1731
+ logging.warning(
1732
+ "fire_terminal_flips: %s not found in roadmap.md — skipping row flip",
1733
+ feature_id,
1734
+ )
1735
+ else:
1736
+ current_row_status = parsed["columns"].get("Status", "")
1737
+ status_start, status_end = parsed["cell_spans"]["Status"]
1738
+ if current_row_status == "done":
1739
+ logging.info(
1740
+ "fire_terminal_flips: roadmap row for %s already done — skipping",
1741
+ feature_id,
1742
+ )
1743
+ elif current_row_status == "active":
1744
+ status_cell = roadmap_text[status_start:status_end]
1745
+ new_roadmap = (
1746
+ roadmap_text[:status_start]
1747
+ + status_cell.replace("active", "done", 1)
1748
+ + roadmap_text[status_end:]
1749
+ )
1750
+ roadmap_path.write_text(new_roadmap)
1751
+ modified.add(roadmap_path)
1752
+ else:
1753
+ logging.warning(
1754
+ "fire_terminal_flips: roadmap row for %s has status %r"
1755
+ " (not active or done) — skipping row flip",
1756
+ feature_id,
1757
+ current_row_status,
1758
+ )
1759
+
1760
+ # PLAN.md status -> done (FEAT-2026-0023/T01, closes #49). Consolidate the
1761
+ # terminal PLAN flip into this one driver-side owner so BOTH the dispatched-
1762
+ # close path (loop.run's close branch) and the auto-close path
1763
+ # (_fire_and_verify_terminal_flips) get it for free — previously only the
1764
+ # dispatched path's *agent* flipped PLAN.md, so the agent-less auto-close
1765
+ # path left it `active`. Idempotent: a no-op when already `done`. Gated on
1766
+ # verdict_permits_terminal_flips so a hedged/non-met close does NOT flip PLAN
1767
+ # to done. Verdict is re-read from disk (not wu.verdict) to mirror
1768
+ # assert_terminal_flips_fired: the auto-close path writes verdict=met to the
1769
+ # WU file via mark_close_wu_auto_closed but leaves the in-memory wu.verdict
1770
+ # None, so disk is the authoritative source for both paths.
1771
+ # Re-read verdict from disk only when the WU file exists. The legacy 4-WU
1772
+ # close sequence reaches here with a plan-next WU that carries no verdict
1773
+ # field (and whose file may be a synthetic stub in tests); a missing file or
1774
+ # a non-met verdict simply skips the PLAN flip, leaving legacy behavior
1775
+ # unchanged (those features flip PLAN via the plan-next agent, as before).
1776
+ disk_verdict = None
1777
+ if wu.file.is_file():
1778
+ wu_fm, _ = read_frontmatter(wu.file)
1779
+ disk_verdict = wu_fm.get("verdict") or None
1780
+ if not verdict_permits_terminal_flips(disk_verdict):
1781
+ logging.info(
1782
+ "fire_terminal_flips: verdict %r does not permit terminal flips"
1783
+ " — skipping PLAN.md flip for %s",
1784
+ disk_verdict,
1785
+ wu.wu_id,
1786
+ )
1787
+ else:
1788
+ plan_path = feature_dir / "PLAN.md"
1789
+ if not plan_path.exists():
1790
+ logging.warning(
1791
+ "fire_terminal_flips: PLAN.md absent at %s — skipping PLAN flip",
1792
+ plan_path,
1793
+ )
1794
+ else:
1795
+ plan_fm, _ = read_frontmatter(plan_path)
1796
+ current_plan_status = plan_fm.get("status", "")
1797
+ if current_plan_status == "done":
1798
+ logging.info(
1799
+ "fire_terminal_flips: PLAN.md for %s already done — skipping",
1800
+ feature_id,
1801
+ )
1802
+ else:
1803
+ write_frontmatter_field(plan_path, "status", "done")
1804
+ modified.add(plan_path)
1805
+
1806
+ archive_result = auto_archive_feature(feature_id, repo_root)
1807
+ if archive_result == "archived":
1808
+ modified.add(roadmap_path)
1809
+ modified.add(repo_root / ".specfuse" / "roadmap-archive.md")
1810
+ elif archive_result == "already archived":
1811
+ logging.info(
1812
+ "fire_terminal_flips: %s already archived — skipping auto-archive",
1813
+ feature_id,
1814
+ )
1815
+ else:
1816
+ logging.warning(
1817
+ "fire_terminal_flips: auto_archive_feature: %s — run /roadmap-archive manually",
1818
+ archive_result,
1819
+ )
1820
+
1821
+ return list(modified)
1822
+
1823
+
1824
+ # --------------------------------------------------------------------------- #
1825
+ # Terminal auto-close helpers (FEAT-2026-0018/T04) #
1826
+ # --------------------------------------------------------------------------- #
1827
+
1828
+
1829
+ def _already_auto_closed(wu_file: Path) -> bool:
1830
+ """Return True iff the WU's on-disk frontmatter already shows it has been
1831
+ auto-closed (status=done AND auto_close truthy).
1832
+
1833
+ Idempotency guard for both maybe_auto_close_intermediate and
1834
+ maybe_auto_close_terminal — prevents the duplicate `auto_close_decision`
1835
+ event and duplicate bookkeeping commit observed in issue #23 when the
1836
+ dispatch loop re-enters with a stale in-memory wu.status.
1837
+ """
1838
+ if not wu_file.is_file():
1839
+ return False
1840
+ fm, _ = read_frontmatter(wu_file)
1841
+ if fm.get("status") != DONE:
1842
+ return False
1843
+ auto = fm.get("auto_close")
1844
+ return auto in (True, "true", "True")
1845
+
1846
+
1847
+ def write_stub_retrospective_terminal(
1848
+ feature_dir: Path,
1849
+ gate_number: int,
1850
+ decision: AutoCloseDecision,
1851
+ ) -> None:
1852
+ """Write (or append) the auto-close stub section to RETROSPECTIVE.md.
1853
+
1854
+ Satisfies both assert_retrospective_exists (non-empty file) and
1855
+ assert_retrospective_gate_section (^#{1,3} Gate N heading).
1856
+ """
1857
+ retro = feature_dir / "RETROSPECTIVE.md"
1858
+ metrics = decision.metrics
1859
+ budget = metrics.get("gate_budget")
1860
+ budget_str = f"${budget:.2f}" if budget is not None else "<unset>"
1861
+ total_cost = metrics.get("gate_total_cost", 0.0)
1862
+ section = (
1863
+ f"## Gate {gate_number} — auto-closed (predicate=v1)\n\n"
1864
+ f"On-plan close; full retrospective ceremony skipped per\n"
1865
+ f"`evaluate_auto_close`.\n\n"
1866
+ f"- feature_id: {decision.feature_id}\n"
1867
+ f"- predicate_version: {decision.predicate_version}\n"
1868
+ f"- gate_total_cost: ${total_cost:.2f}\n"
1869
+ f"- gate_budget: {budget_str}\n"
1870
+ f"- reasons: [] (auto=True)\n"
1871
+ )
1872
+ if retro.exists():
1873
+ with retro.open("a") as fh:
1874
+ fh.write("\n" + section)
1875
+ else:
1876
+ retro.write_text(section)
1877
+
1878
+
1879
+ def mark_close_wu_auto_closed(
1880
+ wu: "WorkUnit | None",
1881
+ decision: AutoCloseDecision,
1882
+ ) -> None:
1883
+ """Flip close-WU frontmatter fields for the auto-close path.
1884
+
1885
+ Sets status=done, verdict=met (so assert_terminal_flips_fired fires),
1886
+ auto_close=true, auto_close_reasons=[] for downstream discoverability.
1887
+ No-op when wu is None (legacy gate without a close WU).
1888
+ """
1889
+ if wu is None:
1890
+ return
1891
+ write_frontmatter_field(wu.file, "status", "done")
1892
+ write_frontmatter_field(wu.file, "verdict", "met")
1893
+ write_frontmatter_field(wu.file, "auto_close", "true")
1894
+ write_frontmatter_field(wu.file, "auto_close_reasons", "[]")
1895
+
1896
+
1897
+ def resolve_auto_close_override(
1898
+ args: "argparse.Namespace",
1899
+ feature_dir: Path,
1900
+ ) -> tuple[bool, str]:
1901
+ """Return (override_active, reason) for --force-full-close or PLAN.md field."""
1902
+ if getattr(args, "force_full_close", None):
1903
+ return (True, "force_full_close_cli_flag")
1904
+ plan_path = feature_dir / "PLAN.md"
1905
+ if plan_path.is_file():
1906
+ fm, _ = read_frontmatter(plan_path)
1907
+ if fm.get("auto_close_disabled") in (True, "true", "True"):
1908
+ return (True, "auto_close_disabled_per_plan")
1909
+ return (False, "")
1910
+
1911
+
1912
+ def maybe_auto_close_terminal(
1913
+ feature_dir: Path,
1914
+ feature_id: str,
1915
+ gate: "GateNode",
1916
+ gates: "list[GateNode]",
1917
+ events_path: Path,
1918
+ close_wu_for_terminal: "WorkUnit | None",
1919
+ repo_root: Path = REPO_ROOT,
1920
+ ) -> tuple[bool, AutoCloseDecision]:
1921
+ """Evaluate the auto-close predicate for the terminal gate.
1922
+
1923
+ Returns (True, decision) when predicate fires and the auto path was taken.
1924
+ Returns (False, decision) when predicate refuses; caller falls through to
1925
+ the existing close-WU dispatch path unchanged.
1926
+
1927
+ Idempotent: a second call after the WU has already been auto-closed on
1928
+ disk short-circuits without re-emitting events (see
1929
+ `maybe_auto_close_intermediate` and issue #23 for the rationale).
1930
+ """
1931
+ if close_wu_for_terminal is not None and _already_auto_closed(close_wu_for_terminal.file):
1932
+ return False, AutoCloseDecision(
1933
+ auto=False,
1934
+ reasons=["already_auto_closed"],
1935
+ metrics={},
1936
+ gate_id=gate.number,
1937
+ feature_id=feature_id,
1938
+ predicate_version="v1",
1939
+ )
1940
+ decision = evaluate_auto_close(feature_dir, gate.number)
1941
+ if not decision.auto:
1942
+ return False, decision
1943
+ write_stub_retrospective_terminal(feature_dir, gate.number, decision)
1944
+ mark_close_wu_auto_closed(close_wu_for_terminal, decision)
1945
+ metrics = decision.metrics
1946
+ flush_events(events_path, [build_event(
1947
+ "auto_close_decision", feature_id, {
1948
+ "gate": gate.number,
1949
+ "auto": True,
1950
+ "reasons": decision.reasons,
1951
+ "predicate_version": decision.predicate_version,
1952
+ "metrics": {
1953
+ "gate_total_cost": metrics.get("gate_total_cost", 0.0),
1954
+ "gate_budget": metrics.get("gate_budget"),
1955
+ "blocked_human_events": metrics.get("blocked_human_events", []),
1956
+ "replan_events": metrics.get("replan_events", []),
1957
+ },
1958
+ },
1959
+ )])
1960
+ return True, decision
1961
+
1962
+
1963
+ # --------------------------------------------------------------------------- #
1964
+ # Intermediate auto-close helpers (FEAT-2026-0018/T05) #
1965
+ # --------------------------------------------------------------------------- #
1966
+
1967
+
1968
+ def append_stub_retrospective_intermediate(
1969
+ feature_dir: Path,
1970
+ gate_number: int,
1971
+ decision: AutoCloseDecision,
1972
+ ) -> None:
1973
+ """APPEND a Gate N auto-close stub to RETROSPECTIVE.md; create file if absent.
1974
+
1975
+ Idempotent: skips if a '## Gate N ... auto-closed' heading already exists
1976
+ (re-arm guard, AC5). Satisfies assert_retrospective_gate_section.
1977
+ """
1978
+ retro = feature_dir / "RETROSPECTIVE.md"
1979
+ if retro.exists() and re.search(
1980
+ rf"^##\s+Gate\s+{gate_number}\b.*auto-closed",
1981
+ retro.read_text(),
1982
+ re.MULTILINE,
1983
+ ):
1984
+ return
1985
+ metrics = decision.metrics
1986
+ budget = metrics.get("gate_budget")
1987
+ budget_str = f"${budget:.2f}" if budget is not None else "<unset>"
1988
+ total_cost = metrics.get("gate_total_cost", 0.0)
1989
+ section = (
1990
+ f"## Gate {gate_number} — auto-closed (predicate=v1)\n\n"
1991
+ f"On-plan intermediate close; full close-intermediate ceremony\n"
1992
+ f"skipped per `evaluate_auto_close`. `plan-next` WU dispatched\n"
1993
+ f"to draft gate {gate_number + 1}.\n\n"
1994
+ f"- feature_id: {decision.feature_id}\n"
1995
+ f"- predicate_version: {decision.predicate_version}\n"
1996
+ f"- gate_total_cost: ${total_cost:.2f}\n"
1997
+ f"- gate_budget: {budget_str}\n"
1998
+ f"- reasons: [] (auto=True)\n"
1999
+ )
2000
+ if retro.exists():
2001
+ with retro.open("a") as fh:
2002
+ fh.write("\n" + section)
2003
+ else:
2004
+ retro.write_text(section)
2005
+
2006
+
2007
+ def maybe_auto_close_intermediate(
2008
+ feature_dir: Path,
2009
+ feature_id: str,
2010
+ gate: "GateNode",
2011
+ gates: "list[GateNode]",
2012
+ events_path: Path,
2013
+ repo_root: Path,
2014
+ close_intermediate_wu: "WorkUnit | None",
2015
+ plan_next_wu: "WorkUnit | None",
2016
+ ) -> tuple[bool, AutoCloseDecision]:
2017
+ """Evaluate auto-close predicate for an intermediate (non-terminal) gate.
2018
+
2019
+ Returns (True, decision) when predicate fires and the auto path was taken.
2020
+ Returns (False, decision) when predicate refuses; caller falls through to
2021
+ the existing close-intermediate dispatch unchanged.
2022
+ Caller is responsible for dispatching plan_next_wu afterward (AC4).
2023
+ Does NOT set verdict: met — close-intermediate has no terminal verdict.
2024
+
2025
+ Idempotent: a second call after the WU has already been auto-closed on
2026
+ disk (status=done AND auto_close=true) short-circuits with
2027
+ (False, decision_with_auto=False) and emits NO `auto_close_decision`
2028
+ event. Prevents the double-fire observed in #23 where the dispatch
2029
+ loop re-entered with a stale in-memory wu.status and called this
2030
+ helper again, appending a duplicate event + producing a duplicate
2031
+ bookkeeping commit.
2032
+ """
2033
+ if close_intermediate_wu is not None and _already_auto_closed(close_intermediate_wu.file):
2034
+ return False, AutoCloseDecision(
2035
+ auto=False,
2036
+ reasons=["already_auto_closed"],
2037
+ metrics={},
2038
+ gate_id=gate.number,
2039
+ feature_id=feature_id,
2040
+ predicate_version="v1",
2041
+ )
2042
+ decision = evaluate_auto_close(feature_dir, gate.number)
2043
+ if not decision.auto:
2044
+ return False, decision
2045
+ append_stub_retrospective_intermediate(feature_dir, gate.number, decision)
2046
+ if close_intermediate_wu is not None:
2047
+ write_frontmatter_field(close_intermediate_wu.file, "status", "done")
2048
+ write_frontmatter_field(close_intermediate_wu.file, "auto_close", "true")
2049
+ write_frontmatter_field(close_intermediate_wu.file, "auto_close_reasons", "[]")
2050
+ flush_events(events_path, [build_event(
2051
+ "auto_close_decision", feature_id, {
2052
+ "gate": gate.number,
2053
+ "gate_type": "intermediate",
2054
+ "auto": True,
2055
+ "reasons": decision.reasons,
2056
+ "plan_next_dispatched": True,
2057
+ "predicate_version": decision.predicate_version,
2058
+ },
2059
+ )])
2060
+ return True, decision
2061
+
2062
+
2063
+ def _fire_and_verify_terminal_flips(
2064
+ close_wu: "WorkUnit",
2065
+ feature_dir: Path,
2066
+ events_path: Path,
2067
+ feature_id: str,
2068
+ ) -> int:
2069
+ """Fire terminal state flips and run the post-pass invariant guard.
2070
+
2071
+ Returns 0 on success, 1 when the guard fires. Called from both the
2072
+ auto-close path and the normal close-WU path; factored here to avoid
2073
+ duplicating the fire+verify block across both branches (FEAT-2026-0018/T04).
2074
+ """
2075
+ flip_paths = fire_terminal_flips(close_wu, feature_dir, REPO_ROOT)
2076
+ if flip_paths:
2077
+ commit_bookkeeping(
2078
+ flip_paths,
2079
+ f"chore(loop): {close_wu.wu_id} terminal flips"
2080
+ f"\n\nFeature: {feature_id}",
2081
+ )
2082
+ head_post = git("rev-parse", "HEAD")
2083
+ ok, reason = verify_post_pass_invariants(close_wu, feature_dir, REPO_ROOT, head_post)
2084
+ if not ok:
2085
+ flush_events(events_path, [build_event(
2086
+ "human_escalation", close_wu.wu_id, {
2087
+ "reason": "post_pass_invariant_failed",
2088
+ "assertion": reason.split(":", 1)[0].strip(),
2089
+ "summary": reason,
2090
+ })])
2091
+ commit_bookkeeping(
2092
+ [events_path],
2093
+ f"chore(loop): {close_wu.wu_id} "
2094
+ f"post_pass_invariant_failed\n\nFeature: {feature_id}",
2095
+ )
2096
+ print(f"\n POST-PASS INVARIANT FAILED — {reason}")
2097
+ print(
2098
+ "Close WU passed with verdict=met but a terminal flip did "
2099
+ "not materialize. This is the FEAT-2026-0015/T06 "
2100
+ "wiring-race regression surface. Inspect events.jsonl "
2101
+ "and the fire_terminal_flips wiring."
2102
+ )
2103
+ return 1
2104
+ return 0
2105
+
2106
+
2107
+ # --------------------------------------------------------------------------- #
2108
+ # Closing-ceremony deliverable guards (FEAT-2026-0015/T07) #
2109
+ # --------------------------------------------------------------------------- #
2110
+
2111
+
2112
+ def _gate_number_from_wu_id(wu_id: str) -> int | None:
2113
+ """Parse gate number from a closing WU ID like FEAT-2026-0015/G1-PLAN."""
2114
+ segment = wu_id.rsplit("/", 1)[-1]
2115
+ m = re.match(r"G(\d+)-", segment)
2116
+ return int(m.group(1)) if m else None
2117
+
2118
+
2119
+ def assert_retrospective_exists(
2120
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2121
+ ) -> tuple[bool, str]:
2122
+ """(close-a) RETROSPECTIVE.md exists and is non-empty in the feature dir."""
2123
+ retro = feature_dir / "RETROSPECTIVE.md"
2124
+ if not retro.exists() or not retro.read_text().strip():
2125
+ return (
2126
+ False,
2127
+ "assert_retrospective_exists: RETROSPECTIVE.md absent or empty in feature dir",
2128
+ )
2129
+ return True, ""
2130
+
2131
+
2132
+ def assert_learnings_appended_or_noop(
2133
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2134
+ ) -> tuple[bool, str]:
2135
+ """(close-b) LEARNINGS.md has ≥1 added line in this squash, or RETRO says 'nothing generalizes'."""
2136
+ proc = subprocess.run(
2137
+ ["git", "diff", head_before, "HEAD", "--", ".specfuse/LEARNINGS.md"],
2138
+ capture_output=True, text=True,
2139
+ )
2140
+ added = any(
2141
+ ln.startswith("+") and not ln.startswith("+++")
2142
+ for ln in proc.stdout.splitlines()
2143
+ )
2144
+ if added:
2145
+ return True, ""
2146
+ retro = feature_dir / "RETROSPECTIVE.md"
2147
+ if retro.exists() and "nothing generalizes" in retro.read_text().lower():
2148
+ return True, ""
2149
+ return (
2150
+ False,
2151
+ "assert_learnings_appended_or_noop: no LEARNINGS.md additions in squash "
2152
+ "and no 'nothing generalizes' note in RETROSPECTIVE.md",
2153
+ )
2154
+
2155
+
2156
+ def assert_doc_or_roadmap_diff(
2157
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2158
+ ) -> tuple[bool, str]:
2159
+ """(close-c) A documentation deliverable appears in the squash diff.
2160
+
2161
+ Accepts: docs/*, .specfuse/roadmap.md, .specfuse/LEARNINGS.md, or any
2162
+ file named RETROSPECTIVE.md (under a feature dir). The roadmap.md case
2163
+ survives only for close-intermediate WUs that legitimately edit it;
2164
+ terminal close WUs do NOT touch roadmap.md (FEAT-2026-0015/T06
2165
+ consolidated that driver-side) — they deliver RETROSPECTIVE.md and
2166
+ LEARNINGS.md instead.
2167
+ """
2168
+ proc = subprocess.run(
2169
+ ["git", "diff", "--name-only", head_before, "HEAD"],
2170
+ capture_output=True, text=True,
2171
+ )
2172
+ for path in proc.stdout.splitlines():
2173
+ if path == ".specfuse/roadmap.md" or path.startswith("docs/"):
2174
+ return True, ""
2175
+ if path == ".specfuse/LEARNINGS.md":
2176
+ return True, ""
2177
+ if path.endswith("/RETROSPECTIVE.md") or path == "RETROSPECTIVE.md":
2178
+ return True, ""
2179
+ # For close-intermediate: skip when the WU spec declares no doc surface.
2180
+ if wu.type == "close-intermediate":
2181
+ if "docs/" not in wu.body and "roadmap.md" not in wu.body:
2182
+ return True, ""
2183
+ return (
2184
+ False,
2185
+ "assert_doc_or_roadmap_diff: no docs/, .specfuse/roadmap.md, "
2186
+ ".specfuse/LEARNINGS.md, or RETROSPECTIVE.md file in squash diff",
2187
+ )
2188
+
2189
+
2190
+ def assert_verdict_well_formed(
2191
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2192
+ ) -> tuple[bool, str]:
2193
+ """(close-d) verdict frontmatter field is present and in VERDICT_VALUES.
2194
+
2195
+ Re-reads frontmatter from disk: the agent writes `verdict:` DURING
2196
+ dispatch, but `wu.verdict` was populated by `load_wu` BEFORE dispatch.
2197
+ Without the re-read, the agent's verdict write is invisible and the
2198
+ assertion spins to MAX_ATTEMPTS, rolling back all artifacts on each
2199
+ attempt (issue #12). Mirrors the re-read at the terminal-flip path
2200
+ (FEAT-2026-0015/G2-CLOSE). Updates wu.verdict in-memory so downstream
2201
+ checks see the post-squash value.
2202
+ """
2203
+ fm, _ = read_frontmatter(wu.file)
2204
+ verdict = fm.get("verdict")
2205
+ if verdict is None or verdict not in VERDICT_VALUES:
2206
+ return (
2207
+ False,
2208
+ f"assert_verdict_well_formed: verdict {verdict!r} absent or not in "
2209
+ f"VERDICT_VALUES ({sorted(VERDICT_VALUES)})",
2210
+ )
2211
+ wu.verdict = verdict
2212
+ return True, ""
2213
+
2214
+
2215
+ def assert_cost_analysis_section_when_met(
2216
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2217
+ ) -> tuple[bool, str]:
2218
+ """(close-e) When verdict=='met', RETROSPECTIVE.md must have a '## Cost analysis' header.
2219
+
2220
+ Re-reads frontmatter (same reasoning as `assert_verdict_well_formed`):
2221
+ the agent writes `verdict:` during dispatch and `wu.verdict` from
2222
+ `load_wu` is stale. Independent re-read keeps this assertion robust
2223
+ even if invoked outside the canonical close-d → close-e ordering.
2224
+ """
2225
+ fm, _ = read_frontmatter(wu.file)
2226
+ verdict = fm.get("verdict")
2227
+ if verdict != "met":
2228
+ return True, ""
2229
+ retro = feature_dir / "RETROSPECTIVE.md"
2230
+ if retro.exists():
2231
+ if re.search(r"^##+ Cost analysis", retro.read_text(), re.MULTILINE | re.IGNORECASE):
2232
+ return True, ""
2233
+ return (
2234
+ False,
2235
+ "assert_cost_analysis_section_when_met: verdict=met but '## Cost analysis' "
2236
+ "section absent from RETROSPECTIVE.md",
2237
+ )
2238
+
2239
+
2240
+ _NO_FAILURES_SENTINEL = "### Failure-class breakdown\n\n(no non-passing attempts in scope)\n"
2241
+
2242
+
2243
+ def summarize_attempt_failure_classes(
2244
+ feature_dir: Path,
2245
+ gate_n: int | None = None,
2246
+ ) -> str:
2247
+ """Render a '### Failure-class breakdown' markdown table from events.jsonl.
2248
+
2249
+ Reads attempt_outcome events whose outcome != 'passed'. When gate_n is
2250
+ provided, restricts to events whose correlation_id belongs to that gate
2251
+ (resolved via _gate_number_from_wu_id). Returns _NO_FAILURES_SENTINEL when
2252
+ no non-passing attempts match the filter.
2253
+
2254
+ Pure function — reads events.jsonl; no writes, no side effects.
2255
+ Malformed JSONL lines are skipped (legacy-event tolerance, AC5).
2256
+ """
2257
+ events_path = feature_dir / "events.jsonl"
2258
+ if not events_path.exists():
2259
+ return _NO_FAILURES_SENTINEL
2260
+
2261
+ non_passing: list[dict] = []
2262
+ for raw in events_path.read_text(encoding="utf-8").splitlines():
2263
+ raw = raw.strip()
2264
+ if not raw:
2265
+ continue
2266
+ try:
2267
+ evt = json.loads(raw)
2268
+ except json.JSONDecodeError:
2269
+ continue
2270
+ if evt.get("event_type") != "attempt_outcome":
2271
+ continue
2272
+ payload = evt.get("payload") or {}
2273
+ if payload.get("outcome") == "passed":
2274
+ continue
2275
+ if gate_n is not None:
2276
+ cid = evt.get("correlation_id", "")
2277
+ if _gate_number_from_wu_id(cid) != gate_n:
2278
+ continue
2279
+ non_passing.append(payload)
2280
+
2281
+ if not non_passing:
2282
+ return _NO_FAILURES_SENTINEL
2283
+
2284
+ # Group by failure_class; collect signatures for dominant-sig resolution.
2285
+ class_counts: dict[str, int] = {}
2286
+ class_signatures: dict[str, list[str]] = {}
2287
+ for p in non_passing:
2288
+ fc = str(p.get("failure_class") or "null")
2289
+ sig = str(p.get("failure_signature") or "")
2290
+ class_counts[fc] = class_counts.get(fc, 0) + 1
2291
+ class_signatures.setdefault(fc, []).append(sig)
2292
+
2293
+ def _dominant(sigs: list[str]) -> str:
2294
+ freq: dict[str, int] = {}
2295
+ for s in sigs:
2296
+ freq[s] = freq.get(s, 0) + 1
2297
+ return max(freq, key=lambda k: (freq[k], -sigs.index(k)))
2298
+
2299
+ # Sort: count descending, class ascending for ties.
2300
+ rows = sorted(
2301
+ class_counts.items(),
2302
+ key=lambda item: (-item[1], item[0]),
2303
+ )
2304
+
2305
+ lines = [
2306
+ "### Failure-class breakdown",
2307
+ "",
2308
+ "| failure_class | non-passed attempts | dominant signature |",
2309
+ "|---------------|---------------------|--------------------|",
2310
+ ]
2311
+ total = 0
2312
+ for fc, count in rows:
2313
+ dom = _dominant(class_signatures[fc])
2314
+ lines.append(f"| {fc} | {count} | {dom} |")
2315
+ total += count
2316
+ lines.append(f"| **total** | **{total}** | — |")
2317
+ lines.append("")
2318
+ return "\n".join(lines)
2319
+
2320
+
2321
+ def assert_failure_class_breakdown_when_failures_present(
2322
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2323
+ ) -> tuple[bool, str]:
2324
+ """(close-f / close-intermediate-d) RETROSPECTIVE.md has '### Failure-class breakdown'
2325
+ when non-passing attempt_outcome events exist for the gate.
2326
+
2327
+ Returns (True, "") when:
2328
+ - RETROSPECTIVE.md is absent (assert_retrospective_exists fires first for 'close';
2329
+ assert_retrospective_gate_section fires first for 'close-intermediate').
2330
+ - No non-passing attempts exist in events.jsonl for the gate.
2331
+ - The heading is present.
2332
+
2333
+ Returns (False, reason) when non-passing attempts exist but the heading is absent.
2334
+ """
2335
+ retro = feature_dir / "RETROSPECTIVE.md"
2336
+ if not retro.exists():
2337
+ return True, ""
2338
+
2339
+ gate_n = _gate_number_from_wu_id(wu.wu_id)
2340
+ summary = summarize_attempt_failure_classes(feature_dir, gate_n)
2341
+
2342
+ if summary == _NO_FAILURES_SENTINEL:
2343
+ return True, ""
2344
+
2345
+ if re.search(r"^#{3} Failure-class breakdown\b", retro.read_text(), re.MULTILINE):
2346
+ return True, ""
2347
+
2348
+ # Count non-passing attempts for the error message.
2349
+ events_path = feature_dir / "events.jsonl"
2350
+ count = 0
2351
+ if events_path.exists():
2352
+ for raw in events_path.read_text(encoding="utf-8").splitlines():
2353
+ raw = raw.strip()
2354
+ if not raw:
2355
+ continue
2356
+ try:
2357
+ evt = json.loads(raw)
2358
+ except json.JSONDecodeError:
2359
+ continue
2360
+ if evt.get("event_type") != "attempt_outcome":
2361
+ continue
2362
+ payload = evt.get("payload") or {}
2363
+ if payload.get("outcome") == "passed":
2364
+ continue
2365
+ if gate_n is not None:
2366
+ cid = evt.get("correlation_id", "")
2367
+ if _gate_number_from_wu_id(cid) != gate_n:
2368
+ continue
2369
+ count += 1
2370
+
2371
+ gate_label = f"gate {gate_n}" if gate_n is not None else "all gates"
2372
+ return (
2373
+ False,
2374
+ f"assert_failure_class_breakdown_when_failures_present: {count} "
2375
+ f"non-passing attempt(s) in {gate_label} but '### Failure-class breakdown' "
2376
+ f"subsection absent from RETROSPECTIVE.md",
2377
+ )
2378
+
2379
+
2380
+ def assert_retrospective_gate_section(
2381
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2382
+ ) -> tuple[bool, str]:
2383
+ """(close-intermediate-a) RETROSPECTIVE.md contains a '## Gate N' or '### Gate N' section."""
2384
+ gate_n = _gate_number_from_wu_id(wu.wu_id)
2385
+ if gate_n is None:
2386
+ return (
2387
+ False,
2388
+ "assert_retrospective_gate_section: cannot parse gate number from wu_id",
2389
+ )
2390
+ retro = feature_dir / "RETROSPECTIVE.md"
2391
+ if not retro.exists():
2392
+ return (
2393
+ False,
2394
+ "assert_retrospective_gate_section: RETROSPECTIVE.md absent in feature dir",
2395
+ )
2396
+ if re.search(rf"^#{{1,3}} Gate {gate_n}\b", retro.read_text(), re.MULTILINE):
2397
+ return True, ""
2398
+ return (
2399
+ False,
2400
+ f"assert_retrospective_gate_section: RETROSPECTIVE.md has no "
2401
+ f"'## Gate {gate_n}' or '### Gate {gate_n}' section",
2402
+ )
2403
+
2404
+
2405
+ def assert_gate_review_exists(
2406
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2407
+ ) -> tuple[bool, str]:
2408
+ """(plan-next-a) GATE-(N+1)-REVIEW.md exists + non-empty, or no next gate (terminal)."""
2409
+ gate_n = _gate_number_from_wu_id(wu.wu_id)
2410
+ if gate_n is None:
2411
+ return (
2412
+ False,
2413
+ "assert_gate_review_exists: cannot parse gate number from wu_id",
2414
+ )
2415
+ # If no next gate is defined in PLAN.md the feature is terminal: no review expected.
2416
+ _, gates = load_graph(feature_dir)
2417
+ if not any(g.number == gate_n + 1 for g in gates):
2418
+ return True, ""
2419
+ next_gate = gate_n + 1
2420
+ review = feature_dir / f"GATE-{next_gate:02d}-REVIEW.md"
2421
+ if not review.exists() or not review.read_text().strip():
2422
+ return (
2423
+ False,
2424
+ f"assert_gate_review_exists: GATE-{next_gate:02d}-REVIEW.md absent or empty",
2425
+ )
2426
+ return True, ""
2427
+
2428
+
2429
+ def assert_next_gate_drafted_or_terminal(
2430
+ wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
2431
+ ) -> tuple[bool, str]:
2432
+ """(plan-next-b) Next gate has ≥1 drafted WU in PLAN.md, or PLAN.md/roadmap is terminal."""
2433
+ plan_path = feature_dir / "PLAN.md"
2434
+ plan_fm, _ = read_frontmatter(plan_path)
2435
+ if plan_fm.get("status") == "done":
2436
+ return True, ""
2437
+ feature_id = wu.wu_id.rsplit("/", 1)[0]
2438
+ roadmap_path = repo_root / ".specfuse" / "roadmap.md"
2439
+ if roadmap_path.exists():
2440
+ row_re = re.compile(
2441
+ r"^\|\s*" + re.escape(feature_id) + r"\s*\|([^|]*)\|([^|]*)\|",
2442
+ re.MULTILINE,
2443
+ )
2444
+ rm = row_re.search(roadmap_path.read_text())
2445
+ if rm and rm.group(2).strip() == "done":
2446
+ return True, ""
2447
+ gate_n = _gate_number_from_wu_id(wu.wu_id)
2448
+ if gate_n is None:
2449
+ return (
2450
+ False,
2451
+ "assert_next_gate_drafted_or_terminal: cannot parse gate number from wu_id",
2452
+ )
2453
+ _, gates = load_graph(feature_dir)
2454
+ next_gates = [g for g in gates if g.number == gate_n + 1]
2455
+ # No gate N+1 in PLAN.md → terminal (plan-next set PLAN.md done or feature is single-gate).
2456
+ if not next_gates:
2457
+ return True, ""
2458
+ if next_gates[0].refs:
2459
+ return True, ""
2460
+ return (
2461
+ False,
2462
+ f"assert_next_gate_drafted_or_terminal: gate {gate_n + 1} has no drafted "
2463
+ f"work_units in PLAN.md and neither PLAN.md nor roadmap marks done",
2464
+ )
2465
+
2466
+
2467
+ CLOSING_ASSERTIONS_BY_TYPE: dict[str, list] = {
2468
+ "close": [
2469
+ assert_retrospective_exists,
2470
+ assert_learnings_appended_or_noop,
2471
+ assert_doc_or_roadmap_diff,
2472
+ assert_verdict_well_formed,
2473
+ assert_cost_analysis_section_when_met,
2474
+ assert_failure_class_breakdown_when_failures_present,
2475
+ ],
2476
+ "close-intermediate": [
2477
+ assert_retrospective_gate_section,
2478
+ assert_learnings_appended_or_noop,
2479
+ assert_doc_or_roadmap_diff,
2480
+ assert_failure_class_breakdown_when_failures_present,
2481
+ ],
2482
+ "plan-next": [
2483
+ assert_gate_review_exists,
2484
+ assert_next_gate_drafted_or_terminal,
2485
+ ],
2486
+ }
2487
+
2488
+
2489
+ def assert_closing_deliverables(
2490
+ wu: WorkUnit,
2491
+ feature_dir: Path,
2492
+ repo_root: Path,
2493
+ head_before: str,
2494
+ ) -> tuple[bool, str]:
2495
+ """Fire the type-keyed closing deliverable guard (FEAT-2026-0015/T07).
2496
+
2497
+ Returns (True, "") if the WU type has no assertions (implementation type —
2498
+ other guards handle it) or all assertions pass. On the first failure returns
2499
+ (False, reason) where reason names the failing assertion function.
2500
+
2501
+ No "diff is empty" bypass: a close-type WU whose squash contains only the
2502
+ driver's own WU-file bookkeeping is a hollow pass and MUST fail one of the
2503
+ typed assertions (assert_retrospective_exists fires first for ``close``).
2504
+ The earlier bypass introduced for test-fixture convenience also silently
2505
+ passed real hollow-pass close ceremonies (FEAT-2026-0017/G1-CLOSE attempt-3
2506
+ surface).
2507
+ """
2508
+ assertions = CLOSING_ASSERTIONS_BY_TYPE.get(wu.type, [])
2509
+ if not assertions:
2510
+ return True, ""
2511
+ for fn in assertions:
2512
+ ok, reason = fn(wu, feature_dir, repo_root, head_before)
2513
+ if not ok:
2514
+ return False, reason
2515
+ return True, ""
2516
+
2517
+
2518
+ def assert_implementation_touched_files(
2519
+ wu: WorkUnit,
2520
+ touched: list[str],
2521
+ ) -> tuple[bool, str]:
2522
+ """Empty-files escalation for implementation WUs (FEAT-2026-0022/T03).
2523
+
2524
+ A hard, ``produces:``-independent gate on the ``files_touched`` signal
2525
+ every WU already produces. Returns ``(True, "")`` when ``wu.type`` is not
2526
+ ``implementation`` (close/plan-next/etc. produce reflective artifacts
2527
+ gated by ``assert_closing_deliverables``), or when ``touched`` — after
2528
+ removing the WU's own file and any ``events.jsonl`` entry — still names a
2529
+ file. Otherwise returns ``(False, summary)``: an ``implementation`` WU that
2530
+ produced no deliverable file diff cannot be ``done``.
2531
+
2532
+ This closes the zero-deliverable hollow pass from the other side of
2533
+ ``verify_files_changed`` (which opts out when the agent claims nothing):
2534
+ regardless of what the agent claimed, the squash diff must name a real
2535
+ deliverable. ``touched`` MUST be derived from the post-squash ``sha`` so the
2536
+ WU's own status flip is present — the filter strips it; without that strip
2537
+ the guard never fires and is a silent no-op (escalation trigger 2).
2538
+ """
2539
+ if wu.type != "implementation":
2540
+ return True, ""
2541
+ wu_name = wu.file.name
2542
+ deliverables = [
2543
+ t for t in touched
2544
+ if Path(t).name not in (wu_name, "events.jsonl")
2545
+ ]
2546
+ if deliverables:
2547
+ return True, ""
2548
+ return (
2549
+ False,
2550
+ f"implementation WU {wu.wu_id} produced no deliverable files: the "
2551
+ f"squash diff names only its own WU file and/or events.jsonl",
2552
+ )
2553
+
2554
+
2555
+ def assert_declared_deliverables(wu: WorkUnit) -> tuple[bool, str]:
2556
+ """Deliverable-presence gate (FEAT-2026-0022/T02).
2557
+
2558
+ Verify every path the WU declared in ``produces:`` exists on disk and is
2559
+ non-empty (``test -s`` semantics: ``Path(p).exists()`` and
2560
+ ``Path(p).stat().st_size > 0``). Returns ``(True, "")`` when ``wu.produces``
2561
+ is empty — the opt-out: an undeclared ``produces:`` means no gate, exactly
2562
+ as ``verify_files_changed``'s absence opt-out (loop.py:994) — or when every
2563
+ declared path exists and is non-empty. On the first offending path returns
2564
+ ``(False, summary)`` naming that path and whether it was absent or empty.
2565
+
2566
+ A path that exists but is zero-length is treated as missing: an empty
2567
+ deliverable is a hollow deliverable. This catches the partial-bundle hollow
2568
+ pass (FEAT-2026-0020/T12: SECURITY.md present, bundled CODE_OF_CONDUCT.md
2569
+ absent). The check is file-level only; symbol-level checks are out of scope
2570
+ (PLAN Scope OUT).
2571
+ """
2572
+ if not wu.produces:
2573
+ return True, ""
2574
+ for raw in wu.produces:
2575
+ path = str(raw)
2576
+ p = Path(path)
2577
+ if not p.exists():
2578
+ return False, f"declared deliverable absent: {path}"
2579
+ if p.stat().st_size == 0:
2580
+ return False, f"declared deliverable empty: {path}"
2581
+ return True, ""
2582
+
2583
+
2584
+ # --------------------------------------------------------------------------- #
2585
+ # Post-pass driver-state invariants (FEAT-2026-0017/T01) #
2586
+ # --------------------------------------------------------------------------- #
2587
+
2588
+
2589
+ def assert_terminal_flips_fired(
2590
+ wu: WorkUnit,
2591
+ feature_dir: Path,
2592
+ repo_root: Path,
2593
+ head_before: str,
2594
+ ) -> tuple[bool, str]:
2595
+ """Post-pass invariant: when a close WU writes verdict=met, the terminal
2596
+ state-flips must have materialized.
2597
+
2598
+ Checks (in order):
2599
+ - WU frontmatter verdict (re-read from disk); skip if not "met"
2600
+ - Terminal gate file's `status: passed`
2601
+ - Roadmap row Status column == `done`
2602
+ - Roadmap-archive anchor `<a id="<feat_lc>"></a>` present
2603
+
2604
+ head_before is accepted to mirror the assertion-function signature shape;
2605
+ this check is pure file-state and does not need it.
2606
+ """
2607
+ fm, _ = read_frontmatter(wu.file)
2608
+ verdict = fm.get("verdict") or None
2609
+ if verdict != "met":
2610
+ return True, ""
2611
+
2612
+ feature_id = wu.wu_id.rsplit("/", 1)[0]
2613
+
2614
+ _, gates = load_graph(feature_dir)
2615
+ if not gates:
2616
+ return False, "terminal_gate_not_passed: PLAN.md has no gates"
2617
+ terminal_gate = gates[-1]
2618
+ gate_path = terminal_gate.file
2619
+ if not gate_path.exists():
2620
+ return (
2621
+ False,
2622
+ f"terminal_gate_not_passed: {gate_path.name} absent",
2623
+ )
2624
+ gate_fm, _ = read_frontmatter(gate_path)
2625
+ gate_status = gate_fm.get("status", "")
2626
+ if gate_status != "passed":
2627
+ return (
2628
+ False,
2629
+ f"terminal_gate_not_passed: {gate_path.name} status={gate_status!r}",
2630
+ )
2631
+
2632
+ roadmap_path = repo_root / ".specfuse" / "roadmap.md"
2633
+ if not roadmap_path.exists():
2634
+ return (
2635
+ False,
2636
+ f"roadmap_row_not_done: roadmap.md absent at {roadmap_path}",
2637
+ )
2638
+ row_re = re.compile(
2639
+ r"^\|\s*" + re.escape(feature_id) + r"\s*\|([^|]*)\|([^|]*)\|",
2640
+ re.MULTILINE,
2641
+ )
2642
+ rm = row_re.search(roadmap_path.read_text())
2643
+ if not rm:
2644
+ return (
2645
+ False,
2646
+ f"roadmap_row_not_done: row for {feature_id} not found",
2647
+ )
2648
+ row_status = rm.group(2).strip()
2649
+ if row_status != "done":
2650
+ return False, f"roadmap_row_not_done: status={row_status!r}"
2651
+
2652
+ archive_path = repo_root / ".specfuse" / "roadmap-archive.md"
2653
+ feat_id_lower = feature_id.lower()
2654
+ anchor = f'<a id="{feat_id_lower}"></a>'
2655
+ if not archive_path.exists():
2656
+ return (
2657
+ False,
2658
+ f"archive_anchor_missing: {feat_id_lower} (roadmap-archive.md absent)",
2659
+ )
2660
+ if anchor not in archive_path.read_text():
2661
+ return False, f"archive_anchor_missing: {feat_id_lower}"
2662
+ return True, ""
2663
+
2664
+
2665
+ POST_PASS_INVARIANTS_BY_TYPE: dict[str, list] = {
2666
+ "close": [assert_terminal_flips_fired],
2667
+ }
2668
+
2669
+
2670
+ def verify_post_pass_invariants(
2671
+ wu: WorkUnit,
2672
+ feature_dir: Path,
2673
+ repo_root: Path,
2674
+ head_before: str,
2675
+ ) -> tuple[bool, str]:
2676
+ """Dispatch the type-keyed post-pass invariant guard (FEAT-2026-0017/T01).
2677
+
2678
+ Returns (True, "") when the WU type has no invariants or all pass. On the
2679
+ first failure returns (False, reason).
2680
+
2681
+ Distinct from `assert_closing_deliverables`: that guard fires immediately
2682
+ after squash and checks the WU's own ceremony deliverables (retrospective,
2683
+ learnings, etc.). This guard fires after the gate-boundary
2684
+ `fire_terminal_flips` invocation and checks that driver-side state
2685
+ transitions actually materialized — independent of the agent's RESULT.
2686
+
2687
+ Defends against the FEAT-2026-0015/T06 wiring-race surface: a close WU
2688
+ passed cleanly with `verdict: met` but `fire_terminal_flips` was never
2689
+ invoked because the in-memory `wu.verdict` snapshot (loaded BEFORE
2690
+ dispatch by `load_wu`) shadowed the agent's just-written frontmatter
2691
+ value. The re-read fix landed in PR #11 (commit 7f403bf); this guard is
2692
+ the canary against re-introducing that or any equivalent close-path race.
2693
+ """
2694
+ invariants = POST_PASS_INVARIANTS_BY_TYPE.get(wu.type, [])
2695
+ if not invariants:
2696
+ return True, ""
2697
+ for fn in invariants:
2698
+ ok, reason = fn(wu, feature_dir, repo_root, head_before)
2699
+ if not ok:
2700
+ return False, reason
2701
+ return True, ""
2702
+
2703
+
2704
+ # --------------------------------------------------------------------------- #
2705
+ # The loop #
2706
+ # --------------------------------------------------------------------------- #
2707
+
2708
+
2709
+ def ready(units: list[WorkUnit], done_ids: set[str]) -> list[WorkUnit]:
2710
+ return [u for u in units
2711
+ if u.status in DISPATCHABLE and all(d in done_ids for d in u.depends_on)]
2712
+
2713
+
2714
+ def run(
2715
+ feature_arg: str | None,
2716
+ dry_run: bool,
2717
+ force_full_close: str | None = None,
2718
+ ) -> int:
2719
+ # Fail-fast on a malformed verification.yml BEFORE we touch any WU state.
2720
+ # The per-gate `verify()` call lazy-loads the same file; if it's malformed,
2721
+ # the crash lands mid-WU with `status: in_progress` already on disk,
2722
+ # corrupting the recovery surface (see specfuse/loop#35). Validating once
2723
+ # here collapses that into "bad config, no work started."
2724
+ try:
2725
+ load_verification()
2726
+ except _miniyaml.MiniYAMLError as exc:
2727
+ print(
2728
+ f"loop.py: .specfuse/verification.yml is malformed — {exc}",
2729
+ file=sys.stderr,
2730
+ )
2731
+ print(
2732
+ "Fix the file and re-run. No WUs were touched.",
2733
+ file=sys.stderr,
2734
+ )
2735
+ return 1
2736
+ feature_dir = find_feature(feature_arg)
2737
+ feat_fm, gates = load_graph(feature_dir)
2738
+ feature_id = feat_fm.get("feature_id", feature_dir.name)
2739
+ if force_full_close is not None and force_full_close != feature_id:
2740
+ sys.exit(
2741
+ f"loop.py: --force-full-close {force_full_close} does not match "
2742
+ f"feature being processed {feature_id}"
2743
+ )
2744
+ _override_ns = argparse.Namespace(force_full_close=force_full_close)
2745
+ _override_active, _override_reason = resolve_auto_close_override(_override_ns, feature_dir)
2746
+ events_path = feature_dir / "events.jsonl"
2747
+ work_dir = feature_dir / "work"
2748
+ backend = make_backend(feat_fm)
2749
+ backend.on_feature_start(feature_id, feat_fm)
2750
+
2751
+ gate = next((g for g in gates if g.status != "passed"), None)
2752
+ if gate is None:
2753
+ print(f"{feature_id}: all gates passed — feature complete.")
2754
+ backend.on_feature_complete(feature_id)
2755
+ write_frontmatter_field(feature_dir / "PLAN.md", "status", "complete")
2756
+ return 0
2757
+
2758
+ lock_fd = None
2759
+ if not dry_run:
2760
+ # dry-run performs no mutation; inspecting while a real run is active must stay allowed.
2761
+ try:
2762
+ lock_fd = acquire_tree_lock(SPECFUSE_DIR)
2763
+ except BlockingIOError:
2764
+ print(
2765
+ "another loop driver is already running in this working tree "
2766
+ "(.specfuse/.loop.lock held)",
2767
+ file=sys.stderr,
2768
+ )
2769
+ return 1
2770
+ require_git_ready()
2771
+ ensure_feature_branch(feat_fm, feature_dir)
2772
+
2773
+ try:
2774
+
2775
+ # Per-project cost-tracking toggle (top-level key in verification.yml,
2776
+ # default True). When True, the driver records cumulative cost / tokens
2777
+ # on each WU's frontmatter at outcome time and a per-attempt breakdown
2778
+ # in events.jsonl; when False the driver passes plain text mode to
2779
+ # `claude -p` and writes no cost fields.
2780
+ cfg = load_verification()
2781
+ cost_tracking = cfg.get("cost_tracking", True) is not False
2782
+
2783
+ units = [load_wu(feature_dir, ref) for ref in gate.refs]
2784
+ print(f"== {feature_id} — Gate {gate.number} [{gate.status}] "
2785
+ f"({len(units)} work units) ==")
2786
+
2787
+ # Arm check: a gate plan-next drafted starts with draft WUs. Don't execute drafts.
2788
+ drafts = [u for u in units if u.status == "draft"]
2789
+ if drafts and not dry_run:
2790
+ review = feature_dir / f"GATE-{gate.number:02d}-REVIEW.md"
2791
+ print(f"\nGate {gate.number} is drafted but not armed. {len(drafts)} work "
2792
+ f"unit(s) are in `draft`.")
2793
+ if review.exists():
2794
+ print(f"Read {review} for the planner's findings, review the draft WU "
2795
+ f"files, then flip the ones you accept to `status: pending` and "
2796
+ f"re-run.")
2797
+ return 2
2798
+
2799
+ # Done-set must include WUs from every PREVIOUS gate that are already done —
2800
+ # cross-gate `depends_on` references are valid (e.g. gate 2's implementation
2801
+ # WU may depend on gate 1's). Without this, the ready() filter sees the
2802
+ # cross-gate dep as unmet and silently no-ops the gate (then set_gate
2803
+ # awaiting_review fires on an empty run, leaving real WUs un-dispatched).
2804
+ done_ids: set[str] = set()
2805
+ for g in gates:
2806
+ if g.number > gate.number:
2807
+ continue
2808
+ for ref in g.refs:
2809
+ wu_path = feature_dir / ref["file"]
2810
+ if not wu_path.is_file():
2811
+ continue
2812
+ wfm, _ = read_frontmatter(wu_path)
2813
+ if wfm.get("status") == DONE:
2814
+ done_ids.add(ref["id"])
2815
+ blocked = False
2816
+ close_wu_for_terminal: WorkUnit | None = None
2817
+ _terminal_auto_closed_wu: WorkUnit | None = None # FEAT-2026-0018/T11H
2818
+
2819
+ while True:
2820
+ pending = ready(units, done_ids)
2821
+ if not pending:
2822
+ break
2823
+ for wu in pending: # sequential v1; the frontier is independent -> fan-out later
2824
+ # Per-gate cost budget brake — halt-between-WUs.
2825
+ # Mirrors MAX_ATTEMPTS' shape (a brake, not an estimator). Fires
2826
+ # before the next WU's set_wu(in_progress) so an in-progress WU
2827
+ # always runs to a terminal outcome (squash contract intact).
2828
+ # Skipped when the gate is already awaiting_review: the closing
2829
+ # sequence already flipped the gate; the reviewer will observe the
2830
+ # overshoot via the spent vs budget numbers in the next review.
2831
+ if not dry_run and gate.status != "awaiting_review":
2832
+ gate_dict = {"file": gate.file.name, "work_units": gate.refs}
2833
+ if _should_halt_for_budget(feat_fm, gate_dict, feature_dir):
2834
+ budget = gate_budget_usd(gate.file)
2835
+ spent = gate_spent_usd(feat_fm, gate_dict, feature_dir)
2836
+ backend.set_gate(gate, "awaiting_review")
2837
+ flush_events(events_path, [build_event(
2838
+ "human_escalation", feature_id, {
2839
+ "reason": "gate_budget_exceeded",
2840
+ "budget_usd": budget,
2841
+ "spent_usd": round(spent, 6),
2842
+ "next_wu_id": wu.wu_id,
2843
+ })])
2844
+ commit_bookkeeping(
2845
+ [gate.file, events_path],
2846
+ f"chore(loop): gate {gate.number} budget exceeded "
2847
+ f"— awaiting_review\n\nFeature: {feature_id}",
2848
+ )
2849
+ print(f"\nGate {gate.number} budget exceeded: spent "
2850
+ f"${spent:.4f} >= budget ${budget:.4f}. "
2851
+ f"Halted before {wu.wu_id}.")
2852
+ return 1
2853
+
2854
+ print(f"\n[{time.strftime('%H:%M:%S')}] -- {wu.wu_id} "
2855
+ f"[{wu.type}] model={wu.model} effort={wu.effort}")
2856
+ if dry_run:
2857
+ print(" (dry run — would dispatch)")
2858
+ wu.status = DONE
2859
+ done_ids.add(wu.wu_id)
2860
+ continue
2861
+
2862
+ # FEAT-2026-0018/T05 — intermediate auto-close branch
2863
+ if wu.type == "close-intermediate" and not _override_active:
2864
+ _plan_next_wu = next(
2865
+ (w for w in units if w.type == "plan-next"),
2866
+ None,
2867
+ )
2868
+ _auto_closed, _ = maybe_auto_close_intermediate(
2869
+ feature_dir, feature_id, gate, gates,
2870
+ events_path, REPO_ROOT, wu, _plan_next_wu,
2871
+ )
2872
+ if _auto_closed:
2873
+ commit_bookkeeping(
2874
+ [feature_dir / "RETROSPECTIVE.md",
2875
+ wu.file, events_path],
2876
+ f"chore(loop): {wu.wu_id} auto-closed "
2877
+ f"(predicate=v1)\n\nFeature: {feature_id}",
2878
+ )
2879
+ # Mirror the on-disk status flip into the in-memory
2880
+ # WorkUnit so ready()'s u.status in DISPATCHABLE filter
2881
+ # excludes it on the next while-loop pass — without
2882
+ # this, the same WU re-appears in pending, the helper
2883
+ # is called a second time, and (absent its idempotency
2884
+ # guard) a duplicate auto_close_decision event +
2885
+ # duplicate bookkeeping commit are produced (issue #23).
2886
+ wu.status = DONE
2887
+ done_ids.add(wu.wu_id)
2888
+ continue
2889
+ elif wu.type == "close-intermediate" and _override_active:
2890
+ flush_events(events_path, [build_event(
2891
+ "auto_close_decision", wu.wu_id, {
2892
+ "gate": gate.number,
2893
+ "gate_type": "intermediate",
2894
+ "auto": False,
2895
+ "reasons": [_override_reason],
2896
+ "predicate_version": "v1",
2897
+ "override": True,
2898
+ }
2899
+ )])
2900
+
2901
+ # FEAT-2026-0018/T11H — terminal auto-close branch (relocated from post-loop)
2902
+ # Guard wu.verdict is None: only attempt auto-close for WUs that have
2903
+ # not yet been dispatched (no verdict written). WUs with a pre-existing
2904
+ # verdict (e.g. met_locally from a prior attempt) fall through to
2905
+ # normal dispatch so their verdict semantics are honoured.
2906
+ if (wu.type == "close" and gate is gates[-1]
2907
+ and not _override_active and wu.verdict is None):
2908
+ _auto_closed, _decision = maybe_auto_close_terminal(
2909
+ feature_dir, feature_id, gate, gates,
2910
+ events_path, wu, repo_root=REPO_ROOT,
2911
+ )
2912
+ if _auto_closed:
2913
+ commit_bookkeeping(
2914
+ [feature_dir / "RETROSPECTIVE.md",
2915
+ wu.file, events_path],
2916
+ f"chore(loop): {wu.wu_id} auto-closed "
2917
+ f"(predicate=v1)\n\nFeature: {feature_id}",
2918
+ )
2919
+ # Terminal flips fire in post-loop after set_gate(awaiting_review)
2920
+ _terminal_auto_closed_wu = wu
2921
+ # See intermediate branch above: mirror disk → memory
2922
+ # so ready() filters this WU on the next pass (issue #23).
2923
+ wu.status = DONE
2924
+ done_ids.add(wu.wu_id)
2925
+ continue
2926
+ elif (wu.type == "close" and gate is gates[-1]
2927
+ and _override_active and wu.verdict is None):
2928
+ flush_events(events_path, [build_event(
2929
+ "auto_close_decision", wu.wu_id, {
2930
+ "gate": gate.number,
2931
+ "auto": False,
2932
+ "reasons": [_override_reason],
2933
+ "predicate_version": "v1",
2934
+ "override": True,
2935
+ }
2936
+ )])
2937
+ # Fall through to existing close-WU dispatch path
2938
+
2939
+ head_before = git("rev-parse", "HEAD")
2940
+ _is_rearm = detect_rearm_dispatch(wu)
2941
+ if _is_rearm:
2942
+ fold_cumulative_on_rearm(wu, backend)
2943
+ backend.set_wu(wu, "status", "in_progress")
2944
+ # Events and per-attempt notes are buffered in memory during the
2945
+ # WU's lifecycle and flushed at outcome time. This prevents the
2946
+ # `git reset --hard` between failed attempts from silently
2947
+ # wiping appended events / status flips — anything that should
2948
+ # be durable is either committed in the squash (PASS) or in a
2949
+ # bookkeeping commit (BLOCKED/SPINNING).
2950
+ _wu_fm_rearm, _ = read_frontmatter(wu.file)
2951
+ re_arm_count = int(_wu_fm_rearm.get("re_arm_count") or 0)
2952
+ wu_events = [build_event("task_started", wu.wu_id,
2953
+ {"type": wu.type, "model": wu.model,
2954
+ "re_arm_count": re_arm_count})]
2955
+ if _is_rearm:
2956
+ _rearm_history = _wu_fm_rearm.get("re_arm_history") or []
2957
+ _rearm_reason = ""
2958
+ if isinstance(_rearm_history, list) and _rearm_history:
2959
+ _last_entry = _rearm_history[-1]
2960
+ if isinstance(_last_entry, dict):
2961
+ _rearm_reason = str(_last_entry.get("reason", ""))
2962
+ wu_events.append(build_event("re_arm_dispatched", wu.wu_id, {
2963
+ "re_arm_count": re_arm_count,
2964
+ "reason": _rearm_reason,
2965
+ }))
2966
+ if wu.unsandboxed:
2967
+ # Audit signal: WU opted out of the claude -p sandbox.
2968
+ # Event logged before first attempt so the trail exists
2969
+ # even if the attempt crashes. Rationale carried verbatim.
2970
+ wu_events.append(build_event("unsandboxed_dispatch", wu.wu_id, {
2971
+ "rationale": wu.unsandboxed_rationale,
2972
+ }))
2973
+ print(f" ⚠ UNSANDBOXED dispatch — rationale: "
2974
+ f"{wu.unsandboxed_rationale}")
2975
+ attempt_notes: list[tuple[int, str]] = []
2976
+ attempt_outcomes: list[str] = []
2977
+ # Cost accumulators: per-attempt list goes to events.jsonl,
2978
+ # cumulative sum to WU frontmatter at outcome time.
2979
+ attempts_usage: list[dict] = []
2980
+ cum_usage = {"cost_usd": 0.0, "input_tokens": 0, "output_tokens": 0,
2981
+ "duration_seconds": 0.0}
2982
+
2983
+ failure_note = None
2984
+ prior_failure_signature: tuple[str | None, str | None] | None = None
2985
+ for attempt in range(1, MAX_ATTEMPTS + 1):
2986
+ backend.set_wu(wu, "attempts", attempt)
2987
+ print(f" [{time.strftime('%H:%M:%S')}] attempt "
2988
+ f"{attempt}/{MAX_ATTEMPTS} model={wu.model} "
2989
+ f"effort={wu.effort} — fresh session")
2990
+ if attempt > 1 and failure_note:
2991
+ reason = failure_note.strip().splitlines()[0][:200]
2992
+ print(f" retry reason: {reason}")
2993
+ t0 = time.monotonic()
2994
+ outcome, payload, usage = execute_unit_attempt(
2995
+ wu, feature_dir, failure_note, cost_tracking=cost_tracking,
2996
+ head_before=head_before,
2997
+ )
2998
+ duration = round(time.monotonic() - t0, 3)
2999
+ attempt_record: dict = {"attempt": attempt,
3000
+ "duration_seconds": duration}
3001
+ if usage:
3002
+ attempt_record.update(usage)
3003
+ cum_usage["cost_usd"] += float(usage.get("cost_usd", 0.0))
3004
+ cum_usage["input_tokens"] += int(usage.get("input_tokens", 0))
3005
+ cum_usage["output_tokens"] += int(usage.get("output_tokens", 0))
3006
+ attempts_usage.append(attempt_record)
3007
+ cum_usage["duration_seconds"] = round(
3008
+ cum_usage["duration_seconds"] + duration, 3)
3009
+ attempt_outcomes.append(outcome)
3010
+
3011
+ if outcome == "zero_token":
3012
+ # Agent never produced output (input_tokens=0). Skip
3013
+ # RESULT parsing, buffer an event, reset the tree, and
3014
+ # treat as a failed attempt for the purposes of the
3015
+ # attempt loop — counter already incremented at top.
3016
+ wu_events.append(emit_attempt_outcome(
3017
+ wu, attempt, "zero_token_skip",
3018
+ attempts_usage[-1],
3019
+ ))
3020
+ reset_preserving_events(head_before, events_path)
3021
+ print(f" ZERO-TOKEN attempt {attempt}/{MAX_ATTEMPTS} "
3022
+ f"— no agent output, skipping")
3023
+ continue
3024
+
3025
+ if outcome == "blocked":
3026
+ # Reset agent work first; THEN write our bookkeeping; THEN
3027
+ # commit it. Doing the flip before the reset would let the
3028
+ # reset wipe the flip — the silent-state-loss bug.
3029
+ # Use reset_preserving_events to keep prior WU's
3030
+ # flushed-but-uncommitted events.jsonl entries.
3031
+ reset_preserving_events(head_before, events_path)
3032
+ backend.set_wu(wu, "status", "blocked_human")
3033
+ write_cost_to_wu(backend, wu, cum_usage)
3034
+ wu_events.append(emit_attempt_outcome(
3035
+ wu, attempt, "blocked",
3036
+ attempts_usage[-1],
3037
+ files_touched=git_diff_names(head_before, "HEAD"),
3038
+ agent_status="blocked",
3039
+ agent_blocked_reason=payload,
3040
+ ))
3041
+ wu_events.append(build_event("human_escalation", wu.wu_id, {
3042
+ "reason": "agent_reported_blocked",
3043
+ "blocked_reason": payload,
3044
+ "attempts": attempt,
3045
+ "attempts_usage": attempts_usage,
3046
+ }))
3047
+ flush_events(events_path, wu_events)
3048
+ commit_bookkeeping(
3049
+ [wu.file, events_path],
3050
+ f"chore(loop): {wu.wu_id} blocked_human "
3051
+ f"(agent-reported)\n\nFeature: {wu.wu_id}",
3052
+ )
3053
+ print(f" BLOCKED by agent — "
3054
+ f"{payload or '(no reason given)'}")
3055
+ blocked = True
3056
+ break
3057
+
3058
+ if outcome == "passed":
3059
+ # Flip status to DONE BEFORE the squash so the flip is
3060
+ # included in the commit content — survives the next WU's
3061
+ # reset.
3062
+ backend.set_wu(wu, "status", DONE)
3063
+ write_cost_to_wu(backend, wu, cum_usage)
3064
+ try:
3065
+ sha = squash_commit(wu, head_before)
3066
+ except SquashCommitError as exc:
3067
+ # The squash commit was rejected (typically a
3068
+ # pre-commit hook). Treat as a failed attempt rather
3069
+ # than crashing the driver (issue #51): reset the
3070
+ # tree — which also discards the premature DONE flip
3071
+ # written just above — record the failure with git's
3072
+ # stderr, and retry within budget (MAX_ATTEMPTS
3073
+ # exhaustion escalates to blocked_human).
3074
+ reset_preserving_events(head_before, events_path)
3075
+ summary = str(exc)
3076
+ wu_events.append(emit_attempt_outcome(
3077
+ wu, attempt, "squash_commit_failed",
3078
+ attempts_usage[-1],
3079
+ extras={"summary": summary},
3080
+ ))
3081
+ attempt_notes.append((attempt, summary))
3082
+ failure_note = summary
3083
+ print(f" SQUASH COMMIT REJECTED attempt "
3084
+ f"{attempt}/{MAX_ATTEMPTS}")
3085
+ continue
3086
+ # Smoke-import runner (FEAT-2026-0008/T03): after a
3087
+ # successful verify() AND squash, run each
3088
+ # `python3 -c "from X import Y"` line declared in the WU
3089
+ # body. A non-zero exit fails the attempt — the squash
3090
+ # is rolled back via `git reset --hard head_before` so
3091
+ # the verify-passing-but-smoke-failing tree does not
3092
+ # remain in history. Rollback FIRST (before event log
3093
+ # and before the next attempt iterates) per WU
3094
+ # escalation trigger 3.
3095
+ smoke_cmds = extract_smoke_imports(wu.body)
3096
+ if smoke_cmds:
3097
+ smoke_ok, smoke_summary = run_smoke_imports(
3098
+ smoke_cmds, Path("."),
3099
+ )
3100
+ if not smoke_ok:
3101
+ reset_preserving_events(head_before, events_path)
3102
+ wu_events.append(emit_attempt_outcome(
3103
+ wu, attempt, "smoke_import_failed",
3104
+ attempts_usage[-1],
3105
+ extras={"summary": smoke_summary},
3106
+ ))
3107
+ attempt_notes.append((attempt, smoke_summary))
3108
+ failure_note = smoke_summary
3109
+ print(f" SMOKE FAIL attempt "
3110
+ f"{attempt}/{MAX_ATTEMPTS}")
3111
+ continue
3112
+ # Closing deliverable guard (FEAT-2026-0015/T07):
3113
+ # fires after smoke, before terminal-flip bookkeeping.
3114
+ closing_ok, closing_summary = assert_closing_deliverables(
3115
+ wu, feature_dir, REPO_ROOT, head_before,
3116
+ )
3117
+ if not closing_ok:
3118
+ reset_preserving_events(head_before, events_path)
3119
+ wu_events.append(emit_attempt_outcome(
3120
+ wu, attempt, "closing_deliverable_missing",
3121
+ attempts_usage[-1],
3122
+ extras={
3123
+ "assertion": closing_summary.split(":", 1)[0].strip(),
3124
+ "summary": closing_summary,
3125
+ },
3126
+ ))
3127
+ attempt_notes.append((attempt, closing_summary))
3128
+ failure_note = closing_summary
3129
+ print(
3130
+ f" CLOSING DELIVERABLE MISSING attempt "
3131
+ f"{attempt}/{MAX_ATTEMPTS} — {closing_summary}"
3132
+ )
3133
+ continue
3134
+ # Deliverable-presence gate (FEAT-2026-0022/T02):
3135
+ # fires after smoke and closing-deliverable guards,
3136
+ # before the empty-files catch-all so the named-path
3137
+ # diagnostic wins. Every path the WU declared in
3138
+ # `produces:` must exist on disk and be non-empty; an
3139
+ # absent or zero-length declared deliverable refuses the
3140
+ # pass (the partial-bundle hollow pass,
3141
+ # FEAT-2026-0020/T12). Opt-out: a WU with empty
3142
+ # `produces:` never fires this — existing behavior for
3143
+ # every current WU is unchanged.
3144
+ deliv_ok, deliv_summary = assert_declared_deliverables(wu)
3145
+ if not deliv_ok:
3146
+ reset_preserving_events(head_before, events_path)
3147
+ missing = deliv_summary.split(": ", 1)[-1]
3148
+ wu_events.append(emit_attempt_outcome(
3149
+ wu, attempt, "deliverable_missing",
3150
+ attempts_usage[-1],
3151
+ extras={"summary": deliv_summary,
3152
+ "missing": missing},
3153
+ ))
3154
+ attempt_notes.append((attempt, deliv_summary))
3155
+ failure_note = deliv_summary
3156
+ print(
3157
+ f" DELIVERABLE MISSING attempt "
3158
+ f"{attempt}/{MAX_ATTEMPTS} — {deliv_summary}"
3159
+ )
3160
+ continue
3161
+ # Empty-files escalation (FEAT-2026-0022/T03): compute
3162
+ # the post-squash touched-paths list ONCE here and reuse
3163
+ # it for the passed event below. An implementation WU
3164
+ # whose squash names only its own WU file + events.jsonl
3165
+ # produced no deliverable — refuse the pass, MAX_ATTEMPTS
3166
+ # exhaustion escalates via existing machinery.
3167
+ touched = git_diff_names(head_before, sha) if sha else []
3168
+ impl_ok, impl_summary = assert_implementation_touched_files(
3169
+ wu, touched,
3170
+ )
3171
+ if not impl_ok:
3172
+ reset_preserving_events(head_before, events_path)
3173
+ wu_events.append(emit_attempt_outcome(
3174
+ wu, attempt, "no_deliverable_files",
3175
+ attempts_usage[-1],
3176
+ extras={"summary": impl_summary},
3177
+ ))
3178
+ attempt_notes.append((attempt, impl_summary))
3179
+ failure_note = impl_summary
3180
+ print(
3181
+ f" NO DELIVERABLE FILES attempt "
3182
+ f"{attempt}/{MAX_ATTEMPTS}"
3183
+ )
3184
+ continue
3185
+ if wu.type == "close":
3186
+ # Re-read frontmatter post-squash: the agent writes
3187
+ # `verdict:` to the WU file DURING dispatch, but
3188
+ # `wu.verdict` was populated by `load_wu` BEFORE
3189
+ # dispatch. Without this re-read, the agent's
3190
+ # verdict write is invisible to the close-path
3191
+ # check and `fire_terminal_flips` never fires.
3192
+ # Surfaced FEAT-2026-0015/G2-CLOSE: verdict: met
3193
+ # written by agent; in-memory wu.verdict stayed
3194
+ # None; terminal flips skipped silently.
3195
+ wu_fm_post, _ = read_frontmatter(wu.file)
3196
+ wu.verdict = wu_fm_post.get("verdict") or None
3197
+ if verdict_permits_terminal_flips(wu.verdict):
3198
+ close_wu_for_terminal = wu
3199
+ else:
3200
+ plan_path = feature_dir / "PLAN.md"
3201
+ plan_fm_recheck, _ = read_frontmatter(plan_path)
3202
+ if plan_fm_recheck.get("status") == "done":
3203
+ write_frontmatter_field(plan_path, "status", "active")
3204
+ commit_bookkeeping(
3205
+ [plan_path],
3206
+ f"chore(loop): {wu.wu_id} revert PLAN.md done"
3207
+ f" (hedged verdict)\n\nFeature: {wu.wu_id}",
3208
+ )
3209
+ elif _legacy_4wu_terminal_close_complete(
3210
+ wu, units, gate, gates,
3211
+ ):
3212
+ # Legacy 4-WU close sequence completed on terminal gate
3213
+ # (issue #16). The pre-FEAT-2026-0015 shape
3214
+ # (retrospective + lessons + docs + plan-next) has no
3215
+ # close-type WU and no verdict field. Treat the
3216
+ # plan-next pass as terminating-equivalent so the
3217
+ # post-loop block fires fire_terminal_flips.
3218
+ close_wu_for_terminal = wu
3219
+ # FEAT-2026-0018/T07 — plan-next-draft lint hook (warn-only v1)
3220
+ if wu.type == "plan-next":
3221
+ try:
3222
+ from lint_plan import lint_plan_next_draft
3223
+ _warns = lint_plan_next_draft(feature_dir, gate.number)
3224
+ except Exception as _exc:
3225
+ _warns = [f"lint_plan_next_draft raised: {_exc}"]
3226
+ for _w in _warns:
3227
+ print(f" WARN (plan-next-draft lint): {_w}")
3228
+ if _warns:
3229
+ wu_events.append(build_event(
3230
+ "plan_next_draft_lint", wu.wu_id,
3231
+ {"gate": gate.number, "warns": list(_warns),
3232
+ "blocking": False},
3233
+ ))
3234
+ wu_events.append(emit_attempt_outcome(
3235
+ wu, attempt, "passed",
3236
+ attempts_usage[-1],
3237
+ files_touched=touched,
3238
+ agent_status="complete",
3239
+ agent_blocked_reason=None,
3240
+ ))
3241
+ wu_events.append(build_event("task_completed", wu.wu_id, {
3242
+ "attempts": attempt,
3243
+ "attempts_usage": attempts_usage,
3244
+ }))
3245
+ flush_events(events_path, wu_events)
3246
+ done_ids.add(wu.wu_id)
3247
+ print(f" PASS — committed {sha}")
3248
+ break
3249
+
3250
+ if outcome == "files_changed_mismatch":
3251
+ # RESULT declared files_changed paths that show no diff
3252
+ # against head_before. Treat as a failed attempt: skip
3253
+ # squash, reset the tree, record evidence, retry within
3254
+ # budget. payload is the list of unchanged paths.
3255
+ note = (
3256
+ "RESULT block declared `files_changed` paths that "
3257
+ "show NO diff against HEAD before this attempt:\n"
3258
+ + "\n".join(f" - {p}" for p in payload)
3259
+ + "\nEither actually modify them, or correct the "
3260
+ "files_changed list to match what you really edited."
3261
+ )
3262
+ wu_events.append(emit_attempt_outcome(
3263
+ wu, attempt, "files_changed_mismatch",
3264
+ attempts_usage[-1],
3265
+ extras={"unchanged_paths": list(payload)},
3266
+ ))
3267
+ attempt_notes.append((attempt, note))
3268
+ failure_note = note
3269
+ reset_preserving_events(head_before, events_path)
3270
+ print(f" FILES_CHANGED MISMATCH attempt "
3271
+ f"{attempt}/{MAX_ATTEMPTS} — {len(payload)} path(s) "
3272
+ f"unchanged")
3273
+ continue
3274
+
3275
+ # outcome == "failed": evidence in payload, retry within budget.
3276
+ # Per-attempt notes are buffered (not written to disk) so they
3277
+ # ride with the spinning-escalation commit if we exhaust
3278
+ # attempts; on eventual PASS they're discarded as scratch.
3279
+ attempt_notes.append((attempt, payload))
3280
+ failure_note = payload
3281
+ _fc, _fs = parse_gate_failure_signature(payload)
3282
+ _ex = extract_failure_excerpt(payload)
3283
+ wu_events.append(emit_attempt_outcome(
3284
+ wu, attempt, "failed",
3285
+ attempts_usage[-1],
3286
+ failure_class=_fc,
3287
+ failure_signature=_fs,
3288
+ failure_excerpt=_ex,
3289
+ files_touched=git_diff_names(head_before, "HEAD"),
3290
+ agent_status="complete",
3291
+ agent_blocked_reason=None,
3292
+ ))
3293
+ # T04: halt early when same (class, signature) repeats.
3294
+ if detect_spinning_signature_repeat((_fc, _fs), prior_failure_signature):
3295
+ wu_events.append(build_event("human_escalation", wu.wu_id, {
3296
+ "reason": "spinning_signature_repeat",
3297
+ "failure_class": _fc,
3298
+ "failure_signature": _fs,
3299
+ "attempts": attempt,
3300
+ "attempts_usage": attempts_usage,
3301
+ }))
3302
+ reset_preserving_events(head_before, events_path)
3303
+ backend.set_wu(wu, "status", "blocked_human")
3304
+ write_cost_to_wu(backend, wu, cum_usage)
3305
+ flush_events(events_path, wu_events)
3306
+ commit_bookkeeping(
3307
+ [wu.file, events_path],
3308
+ f"chore(loop): {wu.wu_id} blocked_human "
3309
+ f"(spinning_signature_repeat, attempt {attempt})"
3310
+ f"\n\nFeature: {wu.wu_id}",
3311
+ )
3312
+ print(f" BLOCKED — spinning signature repeat at "
3313
+ f"attempt {attempt}/{MAX_ATTEMPTS}")
3314
+ blocked = True
3315
+ break
3316
+ if (_fc, _fs) != ("other", "no_gate_marker"):
3317
+ prior_failure_signature = (_fc, _fs)
3318
+ flush_events(events_path, wu_events)
3319
+ wu_events.clear()
3320
+ reset_preserving_events(head_before, events_path)
3321
+ print(f" FAIL attempt {attempt}/{MAX_ATTEMPTS}")
3322
+ else:
3323
+ # for-else: ran out of attempts without break = spinning.
3324
+ # The reset has already happened in the failed/zero_token
3325
+ # branch above. Flush attempt notes to disk for human
3326
+ # inspection, mark the WU blocked_human, then commit it.
3327
+ #
3328
+ # Distinguish two spinning shapes in the event payload:
3329
+ # all_attempts_zero_token — every attempt billed 0 input
3330
+ # tokens (CLI/quota/connectivity issue, not a real
3331
+ # verification failure); no per-attempt notes to write.
3332
+ # spinning_detected — at least one attempt produced
3333
+ # output that failed verify(); per-attempt evidence is
3334
+ # buffered in attempt_notes.
3335
+ all_zero = bool(attempt_outcomes) and all(
3336
+ o == "zero_token" for o in attempt_outcomes)
3337
+ reason = ("all_attempts_zero_token" if all_zero
3338
+ else "spinning_detected")
3339
+ wu_key = wu.wu_id.replace("/", "_")
3340
+ note_paths = []
3341
+ for atmpt, evidence in attempt_notes:
3342
+ p = work_dir / wu_key / f"attempt-{atmpt}.md"
3343
+ p.parent.mkdir(parents=True, exist_ok=True)
3344
+ p.write_text(evidence)
3345
+ note_paths.append(p)
3346
+ backend.set_wu(wu, "status", "blocked_human")
3347
+ write_cost_to_wu(backend, wu, cum_usage)
3348
+ wu_events.append(build_event("human_escalation", wu.wu_id, {
3349
+ "reason": reason,
3350
+ "attempts": MAX_ATTEMPTS,
3351
+ "attempts_usage": attempts_usage,
3352
+ }))
3353
+ flush_events(events_path, wu_events)
3354
+ commit_bookkeeping(
3355
+ [wu.file, events_path, *note_paths],
3356
+ f"chore(loop): {wu.wu_id} blocked_human "
3357
+ f"({reason}, {MAX_ATTEMPTS} attempts)"
3358
+ f"\n\nFeature: {wu.wu_id}",
3359
+ )
3360
+ print(f" BLOCKED after {MAX_ATTEMPTS} attempts — "
3361
+ f"escalated ({reason})")
3362
+ blocked = True
3363
+
3364
+ if blocked:
3365
+ print("\nGate halted: work unit(s) need human attention.")
3366
+ return 1
3367
+ if dry_run:
3368
+ print(f"\n(dry run) Gate {gate.number} would complete and await review.")
3369
+ return 0
3370
+
3371
+ backend.set_gate(gate, "awaiting_review")
3372
+ # on_gate_passed fires here: WUs all done, gate now awaiting human review
3373
+ backend.on_gate_passed(feature_id, gate.number)
3374
+ flush_events(events_path,
3375
+ [build_event("gate_reached", feature_id, {"gate": gate.number})])
3376
+ commit_bookkeeping(
3377
+ [gate.file, events_path],
3378
+ f"chore(loop): gate {gate.number} awaiting_review\n\nFeature: {feature_id}",
3379
+ )
3380
+ is_terminal_gate = gate is gates[-1]
3381
+ # FEAT-2026-0018/T11H: in-loop auto-close sets _terminal_auto_closed_wu;
3382
+ # fire terminal flips here after gate is at awaiting_review.
3383
+ if _terminal_auto_closed_wu is not None:
3384
+ rc = _fire_and_verify_terminal_flips(
3385
+ _terminal_auto_closed_wu, feature_dir, events_path, feature_id,
3386
+ )
3387
+ if rc:
3388
+ return rc
3389
+ elif close_wu_for_terminal is not None:
3390
+ # Post-pass driver-state invariant guard (FEAT-2026-0017/T01):
3391
+ # fires AFTER fire_terminal_flips so the side-effect checks (gate
3392
+ # `passed`, roadmap row `done`, archive anchor) observe the flips.
3393
+ rc = _fire_and_verify_terminal_flips(
3394
+ close_wu_for_terminal, feature_dir, events_path, feature_id,
3395
+ )
3396
+ if rc:
3397
+ return rc
3398
+ used_combined_close = any(
3399
+ (feature_dir / ref["file"]).is_file()
3400
+ and read_frontmatter(feature_dir / ref["file"])[0].get("type") == "close"
3401
+ for ref in gate.refs
3402
+ )
3403
+ # Re-read PLAN.md status after the close ceremony: a `close` or
3404
+ # `plan-next` WU may have flipped it to `done` (single-gate combined
3405
+ # close always does; multi-gate terminal plan-next does on the
3406
+ # terminal gate). The branching below honors what the close ceremony
3407
+ # actually decided rather than guessing from gate shape alone.
3408
+ plan_fm_after, _ = read_frontmatter(feature_dir / "PLAN.md")
3409
+ feature_done = plan_fm_after.get("status") == "done"
3410
+ review = feature_dir / f"GATE-{gate.number:02d}-REVIEW.md"
3411
+ if feature_done:
3412
+ ceremony = ("combined close ceremony"
3413
+ if used_combined_close
3414
+ else "retro, lessons, docs, plan-next")
3415
+ print(f"\nGate {gate.number} complete ({ceremony}); "
3416
+ f"PLAN.md status: done.")
3417
+ print(
3418
+ "Terminal — feature ready to wrap. Next step:\n"
3419
+ " - /wrap-feature push branch + "
3420
+ "open PR + merge advisory (single-confirm per step).\n"
3421
+ " - Or manually: read RETROSPECTIVE.md, "
3422
+ "git push, gh pr create."
3423
+ )
3424
+ elif is_terminal_gate:
3425
+ print(f"\nGate {gate.number} complete (retro, lessons, docs, "
3426
+ f"plan-next); terminal gate but PLAN.md not yet `done`.")
3427
+ print(
3428
+ "Inconsistency: terminal gate closed without close ceremony "
3429
+ "flipping PLAN.md to `done`. Inspect RETROSPECTIVE.md / "
3430
+ "events.jsonl. Likely fix: manually flip PLAN.md `status: "
3431
+ "active -> done`, then `/wrap-feature`."
3432
+ )
3433
+ else:
3434
+ print(f"\nGate {gate.number} complete (retro, lessons, docs, "
3435
+ f"plan-next).")
3436
+ print(
3437
+ f"Next gate drafted. Next step:\n"
3438
+ f" - /arm-gate walk drafts, accept/revise/reject, "
3439
+ f"flip accepted WUs to `pending`,\n"
3440
+ f" mark this gate `passed`. "
3441
+ f"Reads {review.name} for planner findings.\n"
3442
+ f" - Resume python3 .specfuse/scripts/loop.py"
3443
+ )
3444
+ return 0
3445
+ finally:
3446
+ if lock_fd is not None:
3447
+ lock_fd.close()
3448
+
3449
+
3450
+ def _parse_version(s: str) -> tuple[int, ...]:
3451
+ """Lenient dotted-int parse for version compare. Non-numeric leading junk in a
3452
+ component (e.g. a `-rc1` suffix) is dropped; missing components count as 0. No
3453
+ third-party `packaging` dependency — the driver stays stdlib-only."""
3454
+ parts: list[int] = []
3455
+ for tok in str(s).strip().split("."):
3456
+ m = re.match(r"\d+", tok)
3457
+ parts.append(int(m.group()) if m else 0)
3458
+ return tuple(parts) or (0,)
3459
+
3460
+
3461
+ def check_scaffold_version(scaffold_path: Path | None = None,
3462
+ driver_min: str = MIN_SCAFFOLD_VERSION) -> str:
3463
+ """Fail loud (SystemExit) if the consumer's `.specfuse/VERSION` is missing, empty,
3464
+ or older than this driver supports. The scaffold declares its own version; the
3465
+ driver requires it to be >= MIN_SCAFFOLD_VERSION. Returns the scaffold version
3466
+ string on success. `scaffold_path` is injectable for testing."""
3467
+ path = scaffold_path or SCAFFOLD_VERSION_PATH
3468
+ if not path.exists():
3469
+ sys.exit(
3470
+ f"ERROR: {path} is missing — this scaffold predates driver version "
3471
+ f"checking. Run `specfuse upgrade` (or ./init.sh --upgrade <repo>) to "
3472
+ f"stamp it. Driver {DRIVER_VERSION} requires scaffold >= {driver_min}."
3473
+ )
3474
+ raw = path.read_text().strip()
3475
+ if not raw:
3476
+ sys.exit(f"ERROR: {path} is empty. Run `specfuse upgrade` to restamp it.")
3477
+ raw = raw.splitlines()[0].strip()
3478
+ if _parse_version(raw) < _parse_version(driver_min):
3479
+ sys.exit(
3480
+ f"ERROR: scaffold version {raw} is older than this driver requires "
3481
+ f"(driver {DRIVER_VERSION} needs scaffold >= {driver_min}). Run "
3482
+ f"`specfuse upgrade` (or ./init.sh --upgrade <repo>) to update the scaffold."
3483
+ )
3484
+ return raw
3485
+
3486
+
3487
+ def main() -> int:
3488
+ ap = argparse.ArgumentParser(description="Specfuse loop driver (single-repo).")
3489
+ ap.add_argument("--feature", help="Feature dir name under .specfuse/features/ "
3490
+ "(optional if exactly one feature is active).")
3491
+ ap.add_argument("--dry-run", action="store_true",
3492
+ help="Walk the current gate without dispatching or writing.")
3493
+ ap.add_argument("--force-full-close", metavar="FEATURE_ID",
3494
+ help="Bypass predicate consultation and run the existing close "
3495
+ "path for the named feature. Must match the feature being processed.")
3496
+ args = ap.parse_args()
3497
+ if not FEATURES_DIR.exists():
3498
+ sys.exit(f"No {FEATURES_DIR}. Run from your repo root.")
3499
+ check_scaffold_version()
3500
+ return run(args.feature, args.dry_run, force_full_close=args.force_full_close)
3501
+
3502
+
3503
+ if __name__ == "__main__":
3504
+ raise SystemExit(main())