specfuse-loop 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- specfuse/loop/__init__.py +5 -0
- specfuse/loop/_miniyaml.py +466 -0
- specfuse/loop/adopt_feature.py +217 -0
- specfuse/loop/gate_eval.py +503 -0
- specfuse/loop/gh_backend.py +82 -0
- specfuse/loop/gh_features.py +98 -0
- specfuse/loop/lint_plan.py +616 -0
- specfuse/loop/loop.py +3504 -0
- specfuse/loop/validate_event.py +282 -0
- specfuse_loop-0.2.0.dist-info/METADATA +192 -0
- specfuse_loop-0.2.0.dist-info/RECORD +16 -0
- specfuse_loop-0.2.0.dist-info/WHEEL +5 -0
- specfuse_loop-0.2.0.dist-info/entry_points.txt +3 -0
- specfuse_loop-0.2.0.dist-info/licenses/LICENSE +201 -0
- specfuse_loop-0.2.0.dist-info/licenses/NOTICE +6 -0
- specfuse_loop-0.2.0.dist-info/top_level.txt +1 -0
specfuse/loop/loop.py
ADDED
|
@@ -0,0 +1,3504 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
# Copyright 2026 Specfuse contributors
|
|
4
|
+
# Licensed under the Apache License, Version 2.0. See LICENSE.
|
|
5
|
+
#
|
|
6
|
+
"""
|
|
7
|
+
Specfuse loop driver — single-repo, exploded-layout edition.
|
|
8
|
+
|
|
9
|
+
A "dumb driver, smart spec" loop in the Ralph tradition. Intelligence lives in the
|
|
10
|
+
work-unit files and the verification gates, never here. Per feature, per gate, the
|
|
11
|
+
driver:
|
|
12
|
+
|
|
13
|
+
1. reads the task GRAPH from PLAN.md (gate order, WU membership, dependencies),
|
|
14
|
+
2. finds the next ready work units in the current gate,
|
|
15
|
+
3. dispatches each as a FRESH `claude -p` session with its declared model,
|
|
16
|
+
handing it the WU file's prompt body,
|
|
17
|
+
4. acts as the exit oracle by running the WU's verification ITSELF,
|
|
18
|
+
5. on pass: makes one squashed, trailer-carrying commit per WU,
|
|
19
|
+
6. on fail: re-dispatches a fresh session carrying the failure evidence, up to
|
|
20
|
+
MAX_ATTEMPTS, then escalates (blocked_human) and halts the gate,
|
|
21
|
+
7. when every WU in the gate is done — including the closing sequence
|
|
22
|
+
(retrospective -> lessons -> docs -> plan-next) — marks the gate
|
|
23
|
+
awaiting_review and stops for human reflection.
|
|
24
|
+
|
|
25
|
+
Ownership (one fact, one home):
|
|
26
|
+
- PLAN.md owns the SHAPE : gates, which WUs are in them, dependency edges.
|
|
27
|
+
- GATE-NN owns the GATE : gate status + definition of done + reflection.
|
|
28
|
+
- WU-*.md owns ITSELF : type, model, status, attempts + the prompt body.
|
|
29
|
+
|
|
30
|
+
Durable state lives in those files, git history, and the per-feature event log —
|
|
31
|
+
never in a context window. Each dispatch is a fresh session. That is the Ralph
|
|
32
|
+
property, kept at work-unit granularity because units are crafted to land in one pass.
|
|
33
|
+
|
|
34
|
+
Two things differ in the orchestrator and are isolated behind `Backend`:
|
|
35
|
+
- STATE BACKEND : status in WU/GATE files here; GitHub issue labels + registry there.
|
|
36
|
+
- DISPATCH : subprocess here; inbox + polling loop there.
|
|
37
|
+
Swap those; everything else is portable.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from __future__ import annotations
|
|
41
|
+
|
|
42
|
+
import argparse
|
|
43
|
+
import datetime as dt
|
|
44
|
+
import fcntl
|
|
45
|
+
import json
|
|
46
|
+
import logging
|
|
47
|
+
import re
|
|
48
|
+
import subprocess
|
|
49
|
+
import sys
|
|
50
|
+
import time
|
|
51
|
+
from dataclasses import dataclass, field
|
|
52
|
+
from pathlib import Path
|
|
53
|
+
|
|
54
|
+
from . import _miniyaml
|
|
55
|
+
from .gate_eval import evaluate_auto_close, AutoCloseDecision
|
|
56
|
+
|
|
57
|
+
SPECFUSE_DIR = Path(".specfuse")
|
|
58
|
+
REPO_ROOT = SPECFUSE_DIR.parent
|
|
59
|
+
FEATURES_DIR = SPECFUSE_DIR / "features"
|
|
60
|
+
VERIFICATION_PATH = SPECFUSE_DIR / "verification.yml"
|
|
61
|
+
DRIVER_VERSION = "0.2.0"
|
|
62
|
+
# Oldest scaffold layout this driver can drive. init.sh stamps the scaffold's own
|
|
63
|
+
# version into `.specfuse/VERSION`; check_scaffold_version() fails loud at startup if
|
|
64
|
+
# the consumer's scaffold is older than this, pointing at `specfuse upgrade`. Bump
|
|
65
|
+
# this only when a scaffold-format change makes an older `.specfuse/` undriveable.
|
|
66
|
+
MIN_SCAFFOLD_VERSION = "0.2.0"
|
|
67
|
+
SCAFFOLD_VERSION_PATH = SPECFUSE_DIR / "VERSION"
|
|
68
|
+
MAX_ATTEMPTS = 3 # spinning threshold: 3 failed verification cycles -> escalate
|
|
69
|
+
|
|
70
|
+
# How to launch a fresh agent. {model} and {effort} are filled per WU; prompt is piped on stdin.
|
|
71
|
+
CLAUDE_CMD = ["claude", "-p", "--model", "{model}", "--effort", "{effort}"]
|
|
72
|
+
|
|
73
|
+
VALID_EFFORT = frozenset({"low", "medium", "high", "xhigh", "max"})
|
|
74
|
+
|
|
75
|
+
# Family aliases accepted in WU frontmatter's `model:` field.
|
|
76
|
+
# The CLI resolves them to the latest concrete model at dispatch time;
|
|
77
|
+
# the loop passes the value verbatim — no expansion here.
|
|
78
|
+
MODEL_ALIASES = frozenset({"sonnet", "opus", "haiku"})
|
|
79
|
+
|
|
80
|
+
# Defaults applied by load_wu when `model` or `effort` are absent from WU frontmatter.
|
|
81
|
+
# A WU that declares either field explicitly overrides these. Keys cover every VALID_TYPES value.
|
|
82
|
+
MODEL_BY_TYPE = {
|
|
83
|
+
"implementation": "sonnet",
|
|
84
|
+
"retrospective": "sonnet",
|
|
85
|
+
"lessons": "sonnet",
|
|
86
|
+
"docs": "sonnet",
|
|
87
|
+
"plan-next": "opus",
|
|
88
|
+
"close": "opus",
|
|
89
|
+
"close-intermediate": "opus",
|
|
90
|
+
}
|
|
91
|
+
EFFORT_BY_TYPE = {
|
|
92
|
+
"implementation": "medium",
|
|
93
|
+
"retrospective": "low",
|
|
94
|
+
"lessons": "low",
|
|
95
|
+
"docs": "low",
|
|
96
|
+
"plan-next": "high",
|
|
97
|
+
"close": "high",
|
|
98
|
+
"close-intermediate": "high",
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Which verification gate set (a key in verification.yml) applies to each WU type.
|
|
102
|
+
GATES_FOR_TYPE = {
|
|
103
|
+
"implementation": "code",
|
|
104
|
+
"retrospective": "doc",
|
|
105
|
+
"lessons": "doc",
|
|
106
|
+
"docs": "doc",
|
|
107
|
+
"plan-next": "plannext",
|
|
108
|
+
# `close` collapses the four closing ceremonies into one session for any terminal gate
|
|
109
|
+
# (single- or multi-gate); `close-intermediate` is the equivalent for non-terminal gates,
|
|
110
|
+
# leaving `plan-next` as a separate dispatch.
|
|
111
|
+
# Both reuse the `plannext` gate set: lint_plan.py verifies structural integrity post-close.
|
|
112
|
+
"close": "plannext",
|
|
113
|
+
"close-intermediate": "plannext",
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
VERDICT_VALUES = frozenset({"met", "met_locally", "partially_met", "not_met"})
|
|
117
|
+
|
|
118
|
+
# Statuses the driver will dispatch. `draft` is excluded on purpose: plan-next
|
|
119
|
+
# writes the next gate's WUs as drafts, and a human must arm them first.
|
|
120
|
+
DISPATCHABLE = {"pending", "ready"}
|
|
121
|
+
DONE = "done"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def verdict_permits_terminal_flips(verdict: str | None) -> bool:
|
|
125
|
+
"""Return True iff verdict == 'met'; False for every other value including None."""
|
|
126
|
+
return verdict == "met"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# --------------------------------------------------------------------------- #
|
|
130
|
+
# Data model #
|
|
131
|
+
# --------------------------------------------------------------------------- #
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class WorkUnit:
|
|
136
|
+
wu_id: str
|
|
137
|
+
file: Path
|
|
138
|
+
depends_on: list[str] # from the PLAN.md graph
|
|
139
|
+
type: str # from the WU file frontmatter
|
|
140
|
+
model: str
|
|
141
|
+
status: str
|
|
142
|
+
attempts: int
|
|
143
|
+
title: str
|
|
144
|
+
body: str # the prompt handed to the session
|
|
145
|
+
effort: str = "medium" # low|medium|high|xhigh|max — passed as --effort to claude -p
|
|
146
|
+
# OPTIONAL sandbox-escape. When `unsandboxed: true` in WU frontmatter,
|
|
147
|
+
# driver appends `--dangerously-skip-permissions` to the claude -p
|
|
148
|
+
# invocation. Requires `unsandboxed_rationale` string in same frontmatter
|
|
149
|
+
# (one-line justification, written to events.jsonl as the audit signal).
|
|
150
|
+
# load_wu refuses to load a WU with unsandboxed=True and no rationale.
|
|
151
|
+
unsandboxed: bool = False
|
|
152
|
+
unsandboxed_rationale: str = ""
|
|
153
|
+
verdict: str | None = None
|
|
154
|
+
produces_driver_helper: list[str] = field(default_factory=list)
|
|
155
|
+
# OPTIONAL author-declared deliverable contract. Names the file path(s) this
|
|
156
|
+
# WU is contracted to yield. Distinct from `files_changed` (RESULT-block
|
|
157
|
+
# runtime claim) and `produces_driver_helper` (driver symbols, lint-only):
|
|
158
|
+
# `produces` names files and IS machine-enforced by FEAT-2026-0022/T02's
|
|
159
|
+
# presence gate (each path must exist and be non-empty at completion).
|
|
160
|
+
produces: list[str] = field(default_factory=list)
|
|
161
|
+
# OPTIONAL extra verification gate sets, unioned onto the WU-type-selected set
|
|
162
|
+
# by verify(). Names index into verification.yml the same way the type sets do
|
|
163
|
+
# (e.g. `extra_gates: [live-verify]`). A name absent from verification.yml is a
|
|
164
|
+
# CONFIGURATION ERROR, never a silent pass. See issue #62.
|
|
165
|
+
extra_gates: list[str] = field(default_factory=list)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class GateNode:
|
|
170
|
+
number: int
|
|
171
|
+
file: Path
|
|
172
|
+
status: str # from the GATE file frontmatter
|
|
173
|
+
refs: list[dict] = field(default_factory=list) # [{id, file, depends_on}]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# --------------------------------------------------------------------------- #
|
|
177
|
+
# Frontmatter helpers #
|
|
178
|
+
# --------------------------------------------------------------------------- #
|
|
179
|
+
|
|
180
|
+
FM = re.compile(r"^---\s*$")
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def read_frontmatter(path: Path) -> tuple[dict, str]:
|
|
184
|
+
"""Return (frontmatter_dict, body_text)."""
|
|
185
|
+
lines = path.read_text().splitlines()
|
|
186
|
+
if not lines or not FM.match(lines[0]):
|
|
187
|
+
return {}, path.read_text()
|
|
188
|
+
j = 1
|
|
189
|
+
while j < len(lines) and not FM.match(lines[j]):
|
|
190
|
+
j += 1
|
|
191
|
+
fm = _miniyaml.parse("\n".join(lines[1:j])) or {}
|
|
192
|
+
body = "\n".join(lines[j + 1 :])
|
|
193
|
+
return fm, body
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def write_frontmatter_field(path: Path, key: str, value) -> None:
|
|
197
|
+
"""Replace (or insert) a single key in a file's YAML frontmatter, leaving the
|
|
198
|
+
body untouched. This is the whole reason the exploded layout is nicer than one
|
|
199
|
+
shared file: status writes are clean single-file edits, not regex surgery."""
|
|
200
|
+
lines = path.read_text().splitlines()
|
|
201
|
+
if not lines or not FM.match(lines[0]):
|
|
202
|
+
raise ValueError(f"{path} has no frontmatter")
|
|
203
|
+
j = 1
|
|
204
|
+
while j < len(lines) and not FM.match(lines[j]):
|
|
205
|
+
j += 1
|
|
206
|
+
block = lines[1:j]
|
|
207
|
+
pat = re.compile(rf"^{re.escape(key)}:")
|
|
208
|
+
for idx, line in enumerate(block):
|
|
209
|
+
if pat.match(line):
|
|
210
|
+
block[idx] = f"{key}: {value}"
|
|
211
|
+
break
|
|
212
|
+
else:
|
|
213
|
+
block.append(f"{key}: {value}")
|
|
214
|
+
new = ["---", *block, "---", *lines[j + 1 :]]
|
|
215
|
+
path.write_text("\n".join(new) + "\n")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# --------------------------------------------------------------------------- #
|
|
219
|
+
# Plan / gate / WU loading #
|
|
220
|
+
# --------------------------------------------------------------------------- #
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def find_feature(arg: str | None) -> Path:
|
|
224
|
+
if arg:
|
|
225
|
+
d = FEATURES_DIR / arg if not arg.startswith(".") else Path(arg)
|
|
226
|
+
if not (d / "PLAN.md").exists():
|
|
227
|
+
sys.exit(f"No PLAN.md under {d}")
|
|
228
|
+
return d
|
|
229
|
+
actives = []
|
|
230
|
+
done_pending_wrap = []
|
|
231
|
+
for d in sorted(FEATURES_DIR.glob("*/")):
|
|
232
|
+
plan = d / "PLAN.md"
|
|
233
|
+
if plan.exists():
|
|
234
|
+
fm, _ = read_frontmatter(plan)
|
|
235
|
+
if fm.get("status") == "active":
|
|
236
|
+
actives.append(d)
|
|
237
|
+
elif fm.get("status") == "done":
|
|
238
|
+
# Surface done features that may not have been wrapped yet.
|
|
239
|
+
# Conservative heuristic: a RETROSPECTIVE.md exists (close
|
|
240
|
+
# ceremony ran). Operator decides via /wrap-feature whether
|
|
241
|
+
# push + PR are pending.
|
|
242
|
+
if (d / "RETROSPECTIVE.md").is_file():
|
|
243
|
+
done_pending_wrap.append(d)
|
|
244
|
+
if len(actives) == 1:
|
|
245
|
+
return actives[0]
|
|
246
|
+
if not actives:
|
|
247
|
+
msg = "No active feature. Set a feature's PLAN.md status to 'active'.\n"
|
|
248
|
+
if done_pending_wrap:
|
|
249
|
+
names = ", ".join(d.name for d in done_pending_wrap[-3:])
|
|
250
|
+
msg += (
|
|
251
|
+
f" - /wrap-feature finalize a recently-closed feature "
|
|
252
|
+
f"(push branch + open PR + merge advisory).\n"
|
|
253
|
+
f" Candidates: {names}\n"
|
|
254
|
+
)
|
|
255
|
+
msg += (
|
|
256
|
+
" - /pick-feature choose a 'planned' feature from the roadmap and activate it\n"
|
|
257
|
+
" - /draft-feature scaffold a new feature (gates + gate-1 work units)\n"
|
|
258
|
+
" - /arm-gate if a feature halted at a gate boundary awaiting review"
|
|
259
|
+
)
|
|
260
|
+
sys.exit(msg)
|
|
261
|
+
sys.exit(f"Multiple active features; pass --feature. Found: "
|
|
262
|
+
f"{[d.name for d in actives]}")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def load_graph(feature_dir: Path) -> tuple[dict, list[GateNode]]:
|
|
266
|
+
"""Parse PLAN.md: feature frontmatter + the `gates` graph block."""
|
|
267
|
+
fm, body = read_frontmatter(feature_dir / "PLAN.md")
|
|
268
|
+
m = re.search(r"```ya?ml\s*\n(.*?)\n```", body, re.DOTALL)
|
|
269
|
+
if not m:
|
|
270
|
+
sys.exit("PLAN.md has no ```yaml graph block.")
|
|
271
|
+
graph = _miniyaml.parse(m.group(1)) or {}
|
|
272
|
+
gates: list[GateNode] = []
|
|
273
|
+
for g in graph.get("gates", []):
|
|
274
|
+
gate_file = feature_dir / g["file"]
|
|
275
|
+
gfm, _ = read_frontmatter(gate_file) if gate_file.exists() else ({}, "")
|
|
276
|
+
gates.append(
|
|
277
|
+
GateNode(
|
|
278
|
+
number=g["gate"],
|
|
279
|
+
file=gate_file,
|
|
280
|
+
status=gfm.get("status", "open"),
|
|
281
|
+
refs=g.get("work_units", []) or [],
|
|
282
|
+
)
|
|
283
|
+
)
|
|
284
|
+
return fm, gates
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def load_wu(feature_dir: Path, ref: dict) -> WorkUnit:
|
|
288
|
+
path = feature_dir / ref["file"]
|
|
289
|
+
fm, body = read_frontmatter(path)
|
|
290
|
+
title_m = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
|
|
291
|
+
wu_type = fm.get("type", "implementation")
|
|
292
|
+
effort = fm.get("effort")
|
|
293
|
+
if effort is None:
|
|
294
|
+
effort = EFFORT_BY_TYPE.get(wu_type, "medium")
|
|
295
|
+
elif effort not in VALID_EFFORT:
|
|
296
|
+
raise ValueError(
|
|
297
|
+
f"{path}: invalid effort '{effort}' — must be one of "
|
|
298
|
+
f"{sorted(VALID_EFFORT)}"
|
|
299
|
+
)
|
|
300
|
+
wu_model = fm.get("model")
|
|
301
|
+
if wu_model is None:
|
|
302
|
+
wu_model = MODEL_BY_TYPE.get(wu_type, "claude-sonnet-4-6")
|
|
303
|
+
unsandboxed = bool(fm.get("unsandboxed", False))
|
|
304
|
+
unsandboxed_rationale = str(fm.get("unsandboxed_rationale", "") or "").strip()
|
|
305
|
+
if unsandboxed and not unsandboxed_rationale:
|
|
306
|
+
raise ValueError(
|
|
307
|
+
f"{path}: `unsandboxed: true` requires a non-empty "
|
|
308
|
+
f"`unsandboxed_rationale` in the same frontmatter. Sandbox-escape "
|
|
309
|
+
f"is auditable; the rationale is the audit signal."
|
|
310
|
+
)
|
|
311
|
+
verdict: str | None = None
|
|
312
|
+
if wu_type in {"close", "close-intermediate"}:
|
|
313
|
+
verdict = fm.get("verdict") or None
|
|
314
|
+
raw_pdh = fm.get("produces_driver_helper")
|
|
315
|
+
if raw_pdh is None:
|
|
316
|
+
produces_driver_helper: list[str] = []
|
|
317
|
+
elif isinstance(raw_pdh, str):
|
|
318
|
+
produces_driver_helper = [raw_pdh]
|
|
319
|
+
elif isinstance(raw_pdh, list):
|
|
320
|
+
produces_driver_helper = raw_pdh
|
|
321
|
+
else:
|
|
322
|
+
raise ValueError(
|
|
323
|
+
f"{path}: `produces_driver_helper` must be a string or list of strings, "
|
|
324
|
+
f"got {type(raw_pdh).__name__!r}"
|
|
325
|
+
)
|
|
326
|
+
raw_produces = fm.get("produces")
|
|
327
|
+
if raw_produces is None:
|
|
328
|
+
produces: list[str] = []
|
|
329
|
+
elif isinstance(raw_produces, str):
|
|
330
|
+
produces = [raw_produces]
|
|
331
|
+
elif isinstance(raw_produces, list):
|
|
332
|
+
produces = raw_produces
|
|
333
|
+
else:
|
|
334
|
+
raise ValueError(
|
|
335
|
+
f"{path}: `produces` must be a string or list of strings, "
|
|
336
|
+
f"got {type(raw_produces).__name__!r}"
|
|
337
|
+
)
|
|
338
|
+
raw_extra_gates = fm.get("extra_gates")
|
|
339
|
+
if raw_extra_gates is None:
|
|
340
|
+
extra_gates: list[str] = []
|
|
341
|
+
elif isinstance(raw_extra_gates, str):
|
|
342
|
+
extra_gates = [raw_extra_gates]
|
|
343
|
+
elif isinstance(raw_extra_gates, list):
|
|
344
|
+
extra_gates = [str(g) for g in raw_extra_gates]
|
|
345
|
+
else:
|
|
346
|
+
raise ValueError(
|
|
347
|
+
f"{path}: `extra_gates` must be a string or list of strings, "
|
|
348
|
+
f"got {type(raw_extra_gates).__name__!r}"
|
|
349
|
+
)
|
|
350
|
+
return WorkUnit(
|
|
351
|
+
wu_id=ref["id"],
|
|
352
|
+
file=path,
|
|
353
|
+
depends_on=list(ref.get("depends_on", []) or []),
|
|
354
|
+
type=wu_type,
|
|
355
|
+
model=wu_model,
|
|
356
|
+
effort=effort,
|
|
357
|
+
status=fm.get("status", "pending"),
|
|
358
|
+
attempts=int(fm.get("attempts", 0)),
|
|
359
|
+
title=title_m.group(1).strip() if title_m else ref["id"],
|
|
360
|
+
body=body.strip(),
|
|
361
|
+
unsandboxed=unsandboxed,
|
|
362
|
+
unsandboxed_rationale=unsandboxed_rationale,
|
|
363
|
+
verdict=verdict,
|
|
364
|
+
produces_driver_helper=produces_driver_helper,
|
|
365
|
+
produces=produces,
|
|
366
|
+
extra_gates=extra_gates,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# --------------------------------------------------------------------------- #
|
|
371
|
+
# State backend seam #
|
|
372
|
+
# --------------------------------------------------------------------------- #
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class Backend:
|
|
376
|
+
"""All status mutation goes through here. The orchestrator subclasses this to
|
|
377
|
+
write GitHub issue labels and registry frontmatter instead of these files —
|
|
378
|
+
nothing above this line changes."""
|
|
379
|
+
|
|
380
|
+
def set_wu(self, wu: WorkUnit, key: str, value) -> None:
|
|
381
|
+
write_frontmatter_field(wu.file, key, value)
|
|
382
|
+
setattr(wu, "status" if key == "status" else key, value) # keep memory in sync
|
|
383
|
+
|
|
384
|
+
def set_gate(self, gate: GateNode, status: str) -> None:
|
|
385
|
+
# Materialize the gate file if missing — PLAN.md may reference a gate
|
|
386
|
+
# whose markdown was never authored (e.g. plan-next drafted an empty
|
|
387
|
+
# follow-up gate that the human never filled in). Without this, the
|
|
388
|
+
# first set_gate on a missing file crashes write_frontmatter_field
|
|
389
|
+
# with FileNotFoundError and the whole feature halts unrecoverably.
|
|
390
|
+
if not gate.file.is_file():
|
|
391
|
+
gate.file.parent.mkdir(parents=True, exist_ok=True)
|
|
392
|
+
gate.file.write_text(
|
|
393
|
+
f"---\ngate: {gate.number}\nstatus: {status}\n---\n\n"
|
|
394
|
+
f"# Gate {gate.number}\n\n"
|
|
395
|
+
f"_Stub created by loop.set_gate because PLAN.md referenced "
|
|
396
|
+
f"this gate but no markdown file existed. Body intentionally "
|
|
397
|
+
f"minimal; edit if you want a real Definition of Done._\n"
|
|
398
|
+
)
|
|
399
|
+
gate.status = status
|
|
400
|
+
return
|
|
401
|
+
write_frontmatter_field(gate.file, "status", status)
|
|
402
|
+
gate.status = status
|
|
403
|
+
|
|
404
|
+
def on_feature_start(self, feature_id: str, feat_fm: dict) -> None:
|
|
405
|
+
"""Called once per run(), before any dispatch, even on no-op polls."""
|
|
406
|
+
|
|
407
|
+
def on_gate_passed(self, feature_id: str, gate_number: int) -> None:
|
|
408
|
+
"""Called after a gate's WUs are all done and the gate flips to awaiting_review."""
|
|
409
|
+
|
|
410
|
+
def on_feature_complete(self, feature_id: str) -> None:
|
|
411
|
+
"""Called when all gates are passed and the feature is fully complete."""
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def make_backend(feat_fm: dict) -> Backend:
|
|
415
|
+
"""Factory: returns GitHubBackend when source_issue_url is a GitHub issue URL."""
|
|
416
|
+
source_url = feat_fm.get("source_issue_url", "") or ""
|
|
417
|
+
# Pattern: https://github.com/<owner>/<repo>/issues/<number>
|
|
418
|
+
_m = re.match(r"^https://github\.com/([^/]+/[^/]+)/issues/(\d+)$", source_url)
|
|
419
|
+
if _m:
|
|
420
|
+
from .gh_backend import GitHubBackend
|
|
421
|
+
return GitHubBackend(repo=_m.group(1), issue_number=int(_m.group(2)))
|
|
422
|
+
return Backend()
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# --------------------------------------------------------------------------- #
|
|
426
|
+
# Event log (per feature) #
|
|
427
|
+
# --------------------------------------------------------------------------- #
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def build_event(event_type: str, correlation_id: str, payload: dict) -> dict:
|
|
431
|
+
"""Build a single event record. Pure — no I/O. Buffered in memory during a
|
|
432
|
+
WU's lifecycle and flushed to disk at outcome time so a `git reset --hard`
|
|
433
|
+
between attempts doesn't silently lose events that were appended."""
|
|
434
|
+
return {
|
|
435
|
+
"timestamp": dt.datetime.now(dt.timezone.utc).isoformat(),
|
|
436
|
+
"correlation_id": correlation_id,
|
|
437
|
+
"event_type": event_type,
|
|
438
|
+
"source": "driver",
|
|
439
|
+
"source_version": DRIVER_VERSION,
|
|
440
|
+
"payload": payload,
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def flush_events(events_path: Path, events: list) -> None:
|
|
445
|
+
"""Append a batch of buffered events to the JSONL log."""
|
|
446
|
+
if not events:
|
|
447
|
+
return
|
|
448
|
+
with events_path.open("a") as fh:
|
|
449
|
+
for evt in events:
|
|
450
|
+
fh.write(json.dumps(evt) + "\n")
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
# --------------------------------------------------------------------------- #
|
|
454
|
+
# Attempt-outcome event helpers (FEAT-2026-0016/T01) #
|
|
455
|
+
# --------------------------------------------------------------------------- #
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def parse_gate_failure_signature(stdout: str) -> tuple[str, str]:
|
|
459
|
+
"""Extract (failure_class, failure_signature) from gate runner stdout.
|
|
460
|
+
|
|
461
|
+
Scans for '### <gate>: FAIL' markers and maps them to a failure class.
|
|
462
|
+
Returns ('other', 'no_gate_marker') when no marker is found.
|
|
463
|
+
Both returned values are non-empty strings.
|
|
464
|
+
"""
|
|
465
|
+
_GATE_CLASS_MAP = {
|
|
466
|
+
"tests": "tests",
|
|
467
|
+
"lint": "lint",
|
|
468
|
+
"security": "security",
|
|
469
|
+
"coverage": "coverage",
|
|
470
|
+
}
|
|
471
|
+
marker_re = re.compile(r"^### (\w+): FAIL", re.MULTILINE)
|
|
472
|
+
m = marker_re.search(stdout)
|
|
473
|
+
if not m:
|
|
474
|
+
return "other", "no_gate_marker"
|
|
475
|
+
gate_name = m.group(1)
|
|
476
|
+
failure_class = _GATE_CLASS_MAP.get(gate_name, "other")
|
|
477
|
+
after_lines = stdout[m.end():].splitlines()[:50]
|
|
478
|
+
after_text = "\n".join(after_lines)
|
|
479
|
+
_SIG_PATTERNS: dict[str, re.Pattern[str]] = {
|
|
480
|
+
"tests": re.compile(r"^FAIL: (test_\S+)", re.MULTILINE),
|
|
481
|
+
"lint": re.compile(r"\b([A-Z]\d{3,4})\b"),
|
|
482
|
+
"security": re.compile(r"Issue: \[(B\d+)"),
|
|
483
|
+
"coverage": re.compile(r"^([^\s]+\.py)\s+\d+\s+\d+", re.MULTILINE),
|
|
484
|
+
}
|
|
485
|
+
pattern = _SIG_PATTERNS.get(failure_class)
|
|
486
|
+
if pattern:
|
|
487
|
+
sm = pattern.search(after_text)
|
|
488
|
+
if sm:
|
|
489
|
+
sig = sm.group(1)
|
|
490
|
+
return failure_class, sig if sig else "unknown"
|
|
491
|
+
for line in after_lines:
|
|
492
|
+
stripped = line.strip()
|
|
493
|
+
if stripped:
|
|
494
|
+
return failure_class, stripped[:100]
|
|
495
|
+
return failure_class, "unknown"
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def detect_spinning_signature_repeat(
|
|
499
|
+
current: tuple[str | None, str | None],
|
|
500
|
+
prior: tuple[str | None, str | None] | None,
|
|
501
|
+
) -> bool:
|
|
502
|
+
"""Return True iff the same (failure_class, failure_signature) repeats.
|
|
503
|
+
|
|
504
|
+
Returns False when prior is None (first failure — nothing to compare).
|
|
505
|
+
Returns False when either element of current is None.
|
|
506
|
+
Returns False when current or prior is the no_gate_marker sentinel to
|
|
507
|
+
avoid false-positive halts on parser-opaque failures (AC4).
|
|
508
|
+
"""
|
|
509
|
+
_SENTINEL = ("other", "no_gate_marker")
|
|
510
|
+
if prior is None:
|
|
511
|
+
return False
|
|
512
|
+
if current[0] is None or current[1] is None:
|
|
513
|
+
return False
|
|
514
|
+
if current == _SENTINEL or prior == _SENTINEL:
|
|
515
|
+
return False
|
|
516
|
+
return current == prior
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def extract_failure_excerpt(stdout: str, max_chars: int = 500) -> str:
|
|
520
|
+
"""Return the last max_chars of failure-relevant lines from gate stdout.
|
|
521
|
+
|
|
522
|
+
Relevant lines contain FAIL, Error, Exception, or Traceback.
|
|
523
|
+
Falls back to the last max_chars of all stdout when no such lines exist.
|
|
524
|
+
Trims to a UTF-8 safe boundary.
|
|
525
|
+
"""
|
|
526
|
+
_KW = re.compile(r"FAIL|Error|Exception|Traceback", re.IGNORECASE)
|
|
527
|
+
relevant = [ln for ln in stdout.splitlines() if _KW.search(ln)]
|
|
528
|
+
text = "\n".join(relevant) if relevant else stdout
|
|
529
|
+
encoded = text.encode("utf-8")
|
|
530
|
+
if len(encoded) <= max_chars:
|
|
531
|
+
return text
|
|
532
|
+
return encoded[-max_chars:].decode("utf-8", errors="ignore")
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def emit_attempt_outcome(
|
|
536
|
+
wu: WorkUnit,
|
|
537
|
+
attempt: int,
|
|
538
|
+
outcome: str,
|
|
539
|
+
usage: dict,
|
|
540
|
+
*,
|
|
541
|
+
failure_class: str | None = None,
|
|
542
|
+
failure_signature: str | None = None,
|
|
543
|
+
failure_excerpt: str | None = None,
|
|
544
|
+
files_touched: list[str] | None = None,
|
|
545
|
+
agent_status: str | None = None,
|
|
546
|
+
agent_blocked_reason: str | None = None,
|
|
547
|
+
extras: dict | None = None,
|
|
548
|
+
) -> dict:
|
|
549
|
+
"""Build a standardized attempt_outcome event dict (v1 payload shape).
|
|
550
|
+
|
|
551
|
+
# T01's own events lack standardized payload; bootstrap gap
|
|
552
|
+
|
|
553
|
+
Caller appends the returned dict to wu_events; flush_events runs at
|
|
554
|
+
the existing flush point. This helper does NOT call flush_events
|
|
555
|
+
itself — preserves the 'one flush per outcome-cycle' invariant.
|
|
556
|
+
|
|
557
|
+
extras: optional additional fields merged into the payload last.
|
|
558
|
+
Used to preserve outcome-specific fields (e.g. assertion, summary)
|
|
559
|
+
that are not part of the standard schema.
|
|
560
|
+
"""
|
|
561
|
+
payload: dict = {
|
|
562
|
+
"attempt": attempt,
|
|
563
|
+
"outcome": outcome,
|
|
564
|
+
"duration_seconds": usage.get("duration_seconds", 0.0),
|
|
565
|
+
"cost_usd": usage.get("cost_usd", 0.0),
|
|
566
|
+
"input_tokens": usage.get("input_tokens", 0),
|
|
567
|
+
"output_tokens": usage.get("output_tokens", 0),
|
|
568
|
+
"cache_read_input_tokens": usage.get("cache_read_input_tokens", 0),
|
|
569
|
+
"cache_creation_input_tokens": usage.get("cache_creation_input_tokens", 0),
|
|
570
|
+
"model": wu.model,
|
|
571
|
+
"effort": wu.effort,
|
|
572
|
+
"failure_class": failure_class,
|
|
573
|
+
"failure_signature": failure_signature,
|
|
574
|
+
"failure_excerpt": failure_excerpt,
|
|
575
|
+
"files_touched": files_touched if files_touched is not None else [],
|
|
576
|
+
"agent_status": agent_status,
|
|
577
|
+
"agent_blocked_reason": agent_blocked_reason,
|
|
578
|
+
"re_arm_count": getattr(wu, "re_arm_count", 0),
|
|
579
|
+
}
|
|
580
|
+
if extras:
|
|
581
|
+
payload.update(extras)
|
|
582
|
+
return build_event("attempt_outcome", wu.wu_id, payload)
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
# --------------------------------------------------------------------------- #
|
|
586
|
+
# Git #
|
|
587
|
+
# --------------------------------------------------------------------------- #
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def git(*args: str) -> str:
|
|
591
|
+
return subprocess.run(["git", *args], capture_output=True, text=True,
|
|
592
|
+
check=True).stdout.strip()
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def git_diff_names(head_before: str, head_after: str) -> list[str]:
|
|
596
|
+
"""Return file paths changed between two refs via git diff --name-only.
|
|
597
|
+
|
|
598
|
+
When head_after is 'HEAD', also appends untracked files from
|
|
599
|
+
git ls-files --others --exclude-standard (per [driver/files_changed-guard]
|
|
600
|
+
LEARNINGS). Returns an empty list on any git error.
|
|
601
|
+
"""
|
|
602
|
+
try:
|
|
603
|
+
names = subprocess.run(
|
|
604
|
+
["git", "diff", "--name-only", head_before, head_after],
|
|
605
|
+
capture_output=True, text=True, check=True,
|
|
606
|
+
).stdout.strip().splitlines()
|
|
607
|
+
if head_after == "HEAD":
|
|
608
|
+
untracked = subprocess.run(
|
|
609
|
+
["git", "ls-files", "--others", "--exclude-standard"],
|
|
610
|
+
capture_output=True, text=True, check=True,
|
|
611
|
+
).stdout.strip().splitlines()
|
|
612
|
+
names = names + [f for f in untracked if f]
|
|
613
|
+
return [f for f in names if f]
|
|
614
|
+
except subprocess.CalledProcessError:
|
|
615
|
+
return []
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def require_git_ready() -> None:
|
|
619
|
+
"""Driver squashes per WU on top of HEAD, so the repo needs an initial commit."""
|
|
620
|
+
in_repo = subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
|
|
621
|
+
capture_output=True, text=True)
|
|
622
|
+
if in_repo.returncode != 0:
|
|
623
|
+
sys.exit("Not a git repository. Run `git init` from the repo root first.")
|
|
624
|
+
has_head = subprocess.run(["git", "rev-parse", "HEAD"],
|
|
625
|
+
capture_output=True, text=True)
|
|
626
|
+
if has_head.returncode != 0:
|
|
627
|
+
sys.exit("Git repository has no commits yet. The driver squashes per work "
|
|
628
|
+
"unit on top of HEAD; create an initial commit first "
|
|
629
|
+
"(e.g., `git commit --allow-empty -m 'init'`).")
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
class FeatureBranchError(RuntimeError):
|
|
633
|
+
"""Raised when the feature branch cannot be entered safely.
|
|
634
|
+
|
|
635
|
+
Carries an actionable, human-readable message — including git's own
|
|
636
|
+
captured stderr when a checkout fails — instead of letting a bare
|
|
637
|
+
subprocess.CalledProcessError (which swallows stderr) escape main().
|
|
638
|
+
"""
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def _tracked_dirty_paths() -> set[str]:
|
|
642
|
+
"""Paths with TRACKED, uncommitted changes (staged or unstaged), repo-relative.
|
|
643
|
+
|
|
644
|
+
Untracked files (porcelain `??`) are excluded: they never block a create
|
|
645
|
+
(`checkout -B`) and carry harmlessly, so counting them would spuriously
|
|
646
|
+
flag a leftover events.jsonl as an "unexpected" change. The dirty-tree
|
|
647
|
+
failure in #48 is tracked local modifications ("your local changes would
|
|
648
|
+
be overwritten by checkout"), which is exactly what this set captures.
|
|
649
|
+
"""
|
|
650
|
+
out = subprocess.run(
|
|
651
|
+
["git", "status", "--porcelain"],
|
|
652
|
+
capture_output=True, text=True, check=True,
|
|
653
|
+
).stdout
|
|
654
|
+
paths: set[str] = set()
|
|
655
|
+
for line in out.splitlines():
|
|
656
|
+
if not line.strip():
|
|
657
|
+
continue
|
|
658
|
+
if line[:2] == "??":
|
|
659
|
+
continue # untracked — carries harmlessly, never blocks checkout
|
|
660
|
+
path = line[3:]
|
|
661
|
+
if " -> " in path: # rename: "old -> new"
|
|
662
|
+
path = path.split(" -> ", 1)[1]
|
|
663
|
+
paths.add(path.strip().strip('"'))
|
|
664
|
+
return paths
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def _expected_flip_paths(feature_dir: "Path | None") -> set[str]:
|
|
668
|
+
"""The paths /pick-feature legitimately leaves dirty before the loop runs:
|
|
669
|
+
`.specfuse/roadmap.md` and the active feature's `PLAN.md`.
|
|
670
|
+
"""
|
|
671
|
+
expected = {".specfuse/roadmap.md"}
|
|
672
|
+
if feature_dir is not None:
|
|
673
|
+
try:
|
|
674
|
+
top = subprocess.run(
|
|
675
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
676
|
+
capture_output=True, text=True, check=True,
|
|
677
|
+
).stdout.strip()
|
|
678
|
+
rel = (Path(feature_dir) / "PLAN.md").resolve().relative_to(
|
|
679
|
+
Path(top).resolve()
|
|
680
|
+
)
|
|
681
|
+
expected.add(str(rel))
|
|
682
|
+
except (subprocess.CalledProcessError, ValueError):
|
|
683
|
+
pass # can't resolve PLAN path — fall back to roadmap-only
|
|
684
|
+
return expected
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def _checked_checkout(checkout_args: list[str], action: str) -> str:
|
|
688
|
+
"""Run a `git checkout ...` guarded: on non-zero exit raise FeatureBranchError
|
|
689
|
+
carrying git's stderr, instead of a bare CalledProcessError that hides it.
|
|
690
|
+
"""
|
|
691
|
+
proc = subprocess.run(
|
|
692
|
+
["git", *checkout_args], capture_output=True, text=True,
|
|
693
|
+
)
|
|
694
|
+
if proc.returncode != 0:
|
|
695
|
+
stderr = proc.stderr.strip() or proc.stdout.strip() or "(no git output)"
|
|
696
|
+
raise FeatureBranchError(f"{action} failed: {stderr}")
|
|
697
|
+
return proc.stdout.strip()
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def ensure_feature_branch(feat_fm: dict, feature_dir: "Path | None" = None) -> None:
|
|
701
|
+
"""Ensure HEAD is on the feature's declared branch, creating it if needed.
|
|
702
|
+
|
|
703
|
+
The methodology assigns each feature its own branch (PLAN.md frontmatter's
|
|
704
|
+
`branch` field). Without this, per-WU squash commits land on whatever
|
|
705
|
+
branch the user happened to be on, violating per-feature isolation.
|
|
706
|
+
|
|
707
|
+
Idempotent: no-op if already on the declared branch. If the branch
|
|
708
|
+
doesn't exist locally, creates it from the current HEAD (`git checkout -B`),
|
|
709
|
+
which carries the expected /pick-feature flips (roadmap.md + PLAN.md) onto
|
|
710
|
+
the new branch.
|
|
711
|
+
|
|
712
|
+
Robust to the two real-world states that used to crash with a bare
|
|
713
|
+
CalledProcessError (#48):
|
|
714
|
+
|
|
715
|
+
* **Dirty tree.** Tracked changes confined to the expected /pick-feature
|
|
716
|
+
flips are carried onto a freshly created branch. Tracked changes to any
|
|
717
|
+
OTHER path stop the driver with a message naming them (silently moving
|
|
718
|
+
unrelated edits onto a feature branch is worse than failing loudly).
|
|
719
|
+
* **Stale divergent branch.** A pre-existing branch that is not an ancestor
|
|
720
|
+
of HEAD is surfaced rather than silently checked out; resolution policy
|
|
721
|
+
(reuse / recreate / abort) is left to the human.
|
|
722
|
+
|
|
723
|
+
Any checkout failure raises FeatureBranchError carrying git's stderr.
|
|
724
|
+
"""
|
|
725
|
+
branch = feat_fm.get("branch")
|
|
726
|
+
if not branch:
|
|
727
|
+
return # not declared — defensive (lint_plan normally requires it)
|
|
728
|
+
current = subprocess.run(
|
|
729
|
+
["git", "branch", "--show-current"],
|
|
730
|
+
capture_output=True, text=True,
|
|
731
|
+
).stdout.strip()
|
|
732
|
+
if current == branch:
|
|
733
|
+
return
|
|
734
|
+
exists = subprocess.run(
|
|
735
|
+
["git", "rev-parse", "--verify", branch],
|
|
736
|
+
capture_output=True, text=True,
|
|
737
|
+
).returncode == 0
|
|
738
|
+
if exists:
|
|
739
|
+
# Surface a stale branch that diverged from the current base instead of
|
|
740
|
+
# silently reusing it. `merge-base --is-ancestor B HEAD` exits 0 iff B
|
|
741
|
+
# is an ancestor of HEAD (i.e. HEAD already contains B — safe to reuse).
|
|
742
|
+
is_ancestor = subprocess.run(
|
|
743
|
+
["git", "merge-base", "--is-ancestor", branch, "HEAD"],
|
|
744
|
+
capture_output=True, text=True,
|
|
745
|
+
).returncode == 0
|
|
746
|
+
if not is_ancestor:
|
|
747
|
+
raise FeatureBranchError(
|
|
748
|
+
f"branch '{branch}' exists and diverges from HEAD (not an "
|
|
749
|
+
f"ancestor). Refusing to silently check out a stale branch; "
|
|
750
|
+
f"resolve manually (rebase, recreate, or delete it) and re-run."
|
|
751
|
+
)
|
|
752
|
+
_checked_checkout(["checkout", branch], f"checkout of existing branch '{branch}'")
|
|
753
|
+
print(f"Switched to feature branch '{branch}' (was on '{current}').")
|
|
754
|
+
else:
|
|
755
|
+
# Create-from-HEAD carries the working tree onto the new branch. Only
|
|
756
|
+
# the expected /pick-feature flips may ride along; anything else stops.
|
|
757
|
+
dirty = _tracked_dirty_paths()
|
|
758
|
+
unexpected = dirty - _expected_flip_paths(feature_dir)
|
|
759
|
+
if unexpected:
|
|
760
|
+
raise FeatureBranchError(
|
|
761
|
+
"working tree has uncommitted changes to unexpected paths: "
|
|
762
|
+
+ ", ".join(sorted(unexpected))
|
|
763
|
+
+ f". Refusing to carry them onto new branch '{branch}'. "
|
|
764
|
+
"Commit or stash them first, then re-run."
|
|
765
|
+
)
|
|
766
|
+
_checked_checkout(["checkout", "-B", branch], f"create of branch '{branch}'")
|
|
767
|
+
print(f"Created feature branch '{branch}' from '{current}'.")
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def acquire_tree_lock(specfuse_dir: Path):
|
|
771
|
+
"""Open .specfuse/.loop.lock and acquire a non-blocking exclusive flock.
|
|
772
|
+
|
|
773
|
+
Returns the open file object; caller keeps it alive for the process
|
|
774
|
+
lifetime — the kernel auto-releases on fd close or process exit (SIGKILL
|
|
775
|
+
included), so no stale-lock cleanup is ever needed.
|
|
776
|
+
Raises BlockingIOError if another process already holds the lock.
|
|
777
|
+
"""
|
|
778
|
+
lock_path = specfuse_dir / ".loop.lock"
|
|
779
|
+
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
780
|
+
fd = lock_path.open("w")
|
|
781
|
+
try:
|
|
782
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
783
|
+
except BlockingIOError:
|
|
784
|
+
fd.close()
|
|
785
|
+
raise
|
|
786
|
+
return fd
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def write_cost_to_wu(backend, wu: WorkUnit, cum_usage: dict) -> None:
|
|
790
|
+
"""Write cumulative cost/token/duration fields to the WU's frontmatter at
|
|
791
|
+
outcome time. duration_seconds is always written when present; cost/token
|
|
792
|
+
fields are written only when a positive cost_usd or non-zero token counts
|
|
793
|
+
were captured."""
|
|
794
|
+
if "duration_seconds" in cum_usage:
|
|
795
|
+
backend.set_wu(wu, "duration_seconds",
|
|
796
|
+
round(cum_usage["duration_seconds"], 3))
|
|
797
|
+
if cum_usage.get("cost_usd", 0) <= 0 and not cum_usage.get("input_tokens") \
|
|
798
|
+
and not cum_usage.get("output_tokens"):
|
|
799
|
+
return
|
|
800
|
+
backend.set_wu(wu, "cost_usd", round(cum_usage["cost_usd"], 6))
|
|
801
|
+
backend.set_wu(wu, "input_tokens", cum_usage["input_tokens"])
|
|
802
|
+
backend.set_wu(wu, "output_tokens", cum_usage["output_tokens"])
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
def detect_rearm_dispatch(wu: WorkUnit) -> bool:
|
|
806
|
+
"""Return True when wu is a re-arm dispatch whose prior cycle's cost has
|
|
807
|
+
not yet been folded into the cumulative accumulators.
|
|
808
|
+
|
|
809
|
+
Reads re_arm_count and cost_usd from the WU's on-disk frontmatter because
|
|
810
|
+
load_wu does not load those fields into the WorkUnit object.
|
|
811
|
+
Returns False for first-time dispatches (re_arm_count absent or 0) and for
|
|
812
|
+
re-arms where cost was already folded (cost_usd == 0 after a prior fold).
|
|
813
|
+
"""
|
|
814
|
+
fm, _ = read_frontmatter(wu.file)
|
|
815
|
+
re_arm_count = fm.get("re_arm_count", 0)
|
|
816
|
+
if not isinstance(re_arm_count, int) or re_arm_count <= 0:
|
|
817
|
+
return False
|
|
818
|
+
cost_usd = fm.get("cost_usd", 0)
|
|
819
|
+
return isinstance(cost_usd, (int, float)) and float(cost_usd) > 0
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def fold_cumulative_on_rearm(wu: WorkUnit, backend: Backend) -> None:
|
|
823
|
+
"""Fold the prior dispatch cycle's cost/token/duration into cumulative fields.
|
|
824
|
+
|
|
825
|
+
Called once per re-arm before the new cycle's attempt loop begins.
|
|
826
|
+
Reads per-cycle fields (cost_usd, duration_seconds, input_tokens,
|
|
827
|
+
output_tokens) written by the prior write_cost_to_wu call, accumulates
|
|
828
|
+
them into cumulative_* counterparts (initialising to 0 when absent), then
|
|
829
|
+
resets the per-cycle fields so the new cycle's write_cost_to_wu starts
|
|
830
|
+
from zero.
|
|
831
|
+
|
|
832
|
+
Backward-compatible: existing WUs with no cumulative_* fields initialise
|
|
833
|
+
from 0 — no KeyError on first re-arm of a WU that pre-dates this contract.
|
|
834
|
+
"""
|
|
835
|
+
fm, _ = read_frontmatter(wu.file)
|
|
836
|
+
prior_cost = float(fm.get("cost_usd") or 0)
|
|
837
|
+
prior_duration = float(fm.get("duration_seconds") or 0)
|
|
838
|
+
prior_input = int(fm.get("input_tokens") or 0)
|
|
839
|
+
prior_output = int(fm.get("output_tokens") or 0)
|
|
840
|
+
|
|
841
|
+
cum_cost = float(fm.get("cumulative_cost_usd") or 0) + prior_cost
|
|
842
|
+
cum_duration = float(fm.get("cumulative_duration_seconds") or 0) + prior_duration
|
|
843
|
+
cum_input = int(fm.get("cumulative_input_tokens") or 0) + prior_input
|
|
844
|
+
cum_output = int(fm.get("cumulative_output_tokens") or 0) + prior_output
|
|
845
|
+
|
|
846
|
+
backend.set_wu(wu, "cumulative_cost_usd", round(cum_cost, 6))
|
|
847
|
+
backend.set_wu(wu, "cumulative_duration_seconds", round(cum_duration, 3))
|
|
848
|
+
backend.set_wu(wu, "cumulative_input_tokens", cum_input)
|
|
849
|
+
backend.set_wu(wu, "cumulative_output_tokens", cum_output)
|
|
850
|
+
|
|
851
|
+
# Reset per-cycle fields so the new cycle's write_cost_to_wu starts clean.
|
|
852
|
+
backend.set_wu(wu, "cost_usd", 0.0)
|
|
853
|
+
backend.set_wu(wu, "duration_seconds", 0.0)
|
|
854
|
+
backend.set_wu(wu, "input_tokens", 0)
|
|
855
|
+
backend.set_wu(wu, "output_tokens", 0)
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def gate_budget_usd(gate_file: Path) -> float | None:
|
|
859
|
+
"""Return the optional cumulative-cost ceiling declared on a GATE.md.
|
|
860
|
+
|
|
861
|
+
Reads `cost_budget_usd` from the GATE file's frontmatter. Returns the float
|
|
862
|
+
when set, None when the field is absent. A present-but-non-numeric value is
|
|
863
|
+
a configuration error and raises ValueError naming the gate file — the
|
|
864
|
+
fail-loud posture matches verify()'s missing-gate-set treatment.
|
|
865
|
+
"""
|
|
866
|
+
fm, _ = read_frontmatter(gate_file)
|
|
867
|
+
if "cost_budget_usd" not in fm:
|
|
868
|
+
return None
|
|
869
|
+
val = fm["cost_budget_usd"]
|
|
870
|
+
if isinstance(val, bool) or not isinstance(val, (int, float)):
|
|
871
|
+
raise ValueError(
|
|
872
|
+
f"{gate_file}: cost_budget_usd must be numeric, got {val!r}"
|
|
873
|
+
)
|
|
874
|
+
return float(val)
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
def gate_spent_usd(plan: dict, gate: dict, feature_dir: Path) -> float:
|
|
878
|
+
"""Sum cost_usd across the gate's done WUs (closing-sequence included).
|
|
879
|
+
|
|
880
|
+
Reads each WU file's frontmatter from `gate["work_units"]` and adds
|
|
881
|
+
`cost_usd` when the WU's status is "done". WUs whose frontmatter omits
|
|
882
|
+
cost_usd — cost tracking off, or the attempt didn't record a cost —
|
|
883
|
+
contribute 0.0. `plan` is the feature frontmatter dict and is accepted for
|
|
884
|
+
signature symmetry with the broader gate-budget helpers; the spent total
|
|
885
|
+
is derived from WU files alone.
|
|
886
|
+
"""
|
|
887
|
+
del plan # signature symmetry — sum is derived from WU files only
|
|
888
|
+
total = 0.0
|
|
889
|
+
for ref in gate.get("work_units") or []:
|
|
890
|
+
wu_file = ref.get("file")
|
|
891
|
+
if not wu_file:
|
|
892
|
+
continue
|
|
893
|
+
wu_path = feature_dir / wu_file
|
|
894
|
+
if not wu_path.exists():
|
|
895
|
+
continue
|
|
896
|
+
fm, _ = read_frontmatter(wu_path)
|
|
897
|
+
if fm.get("status") != "done":
|
|
898
|
+
continue
|
|
899
|
+
cost = fm.get("cost_usd")
|
|
900
|
+
if isinstance(cost, bool):
|
|
901
|
+
continue
|
|
902
|
+
if isinstance(cost, (int, float)):
|
|
903
|
+
total += float(cost)
|
|
904
|
+
return total
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
def _should_halt_for_budget(plan: dict, gate: dict, feature_dir: Path) -> bool:
|
|
908
|
+
"""Run-loop predicate: should the per-gate budget brake fire before the
|
|
909
|
+
next WU dispatch? True when a budget is declared and the gate's spent
|
|
910
|
+
total has reached or exceeded it. False otherwise (including no budget)."""
|
|
911
|
+
gate_file = feature_dir / gate["file"]
|
|
912
|
+
budget = gate_budget_usd(gate_file)
|
|
913
|
+
if budget is None:
|
|
914
|
+
return False
|
|
915
|
+
return gate_spent_usd(plan, gate, feature_dir) >= budget
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
class BookkeepingCommitError(RuntimeError):
|
|
919
|
+
"""Raised when commit_bookkeeping's `git commit` is rejected (non-zero exit).
|
|
920
|
+
|
|
921
|
+
Sibling of SquashCommitError (issue #51) for the driver's bookkeeping
|
|
922
|
+
commits (gate status flips + events.jsonl audit). Before this, the
|
|
923
|
+
bookkeeping commit used `check=True` and a pre-commit hook rejection escaped
|
|
924
|
+
run()/main() as a bare CalledProcessError with git's stderr swallowed — an
|
|
925
|
+
unhandled traceback. It now raises this readable error carrying git's
|
|
926
|
+
stderr instead. Surfaced FEAT-2026-0024: a leak-scan FINDINGS line quoting
|
|
927
|
+
`git@github.com` was captured into events.jsonl and re-tripped the hook on
|
|
928
|
+
the awaiting_review bookkeeping commit (the address is now allowlisted; this
|
|
929
|
+
guard remains so a genuine bookkeeping leak fails loud, not cryptic).
|
|
930
|
+
"""
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def commit_bookkeeping(paths: list, message: str) -> str | None:
|
|
934
|
+
"""Stage specific paths and create a chore(loop) bookkeeping commit.
|
|
935
|
+
|
|
936
|
+
Used for state we want durable that is NOT part of a WU's squash commit:
|
|
937
|
+
the WU's `blocked_human` status flip, the events.jsonl append for that
|
|
938
|
+
block, the gate's `awaiting_review` status flip, and (on spinning) the
|
|
939
|
+
per-attempt failure notes flushed out of memory.
|
|
940
|
+
|
|
941
|
+
The bug this exists to prevent: writes to the working tree don't survive
|
|
942
|
+
a subsequent `git reset --hard`. Status flips written but not committed
|
|
943
|
+
silently revert. Anything that should persist must be committed.
|
|
944
|
+
|
|
945
|
+
No-op if nothing to commit (path missing or no diff).
|
|
946
|
+
"""
|
|
947
|
+
existing = [str(p) for p in paths if Path(p).exists()]
|
|
948
|
+
if not existing:
|
|
949
|
+
return None
|
|
950
|
+
# -f: caller curates the path list (driver-managed bookkeeping state); some
|
|
951
|
+
# paths intentionally live under `.specfuse/**/work/` which the scaffold
|
|
952
|
+
# gitignores. Force-add bypasses the ignore for these known paths only.
|
|
953
|
+
git("add", "-f", *existing)
|
|
954
|
+
if not git("status", "--porcelain"):
|
|
955
|
+
return None # all paths were already in their committed state
|
|
956
|
+
res = subprocess.run(
|
|
957
|
+
["git", "commit", "-m", message], capture_output=True, text=True,
|
|
958
|
+
)
|
|
959
|
+
if res.returncode != 0:
|
|
960
|
+
raise BookkeepingCommitError(
|
|
961
|
+
f"bookkeeping commit was rejected (exit {res.returncode}) — "
|
|
962
|
+
f"usually a pre-commit hook rejecting the staged bookkeeping state.\n"
|
|
963
|
+
f"--- git stderr ---\n{res.stderr.strip()}\n"
|
|
964
|
+
f"--- git stdout ---\n{res.stdout.strip()}"
|
|
965
|
+
)
|
|
966
|
+
return git("rev-parse", "HEAD")
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
def reset_preserving_events(head_before: str, events_path: Path) -> None:
|
|
970
|
+
"""`git reset --hard <head_before>` without losing events.jsonl content.
|
|
971
|
+
|
|
972
|
+
The hard-reset is the methodology's "wipe agent's edits before we write our
|
|
973
|
+
bookkeeping" move. But events.jsonl can carry flushed-but-not-yet-committed
|
|
974
|
+
entries from a PRIOR WU whose flush happened after its squash commit (the
|
|
975
|
+
passed path flushes events AFTER the squash). Those entries sit on disk
|
|
976
|
+
waiting for the NEXT WU's `commit_bookkeeping` to capture them. A bare
|
|
977
|
+
`git reset --hard` between WUs rolls events.jsonl back to its last-
|
|
978
|
+
committed state, silently dropping the prior WU's lifecycle events.
|
|
979
|
+
|
|
980
|
+
Surfaced FEAT-2026-0015/T02 (commits 52a176a / 74d1911): T02 ran clean,
|
|
981
|
+
its task_started + task_completed events were flushed post-squash, then
|
|
982
|
+
T03 blocked → bare hard-reset wiped them. Same loss recurred when T02H
|
|
983
|
+
completed clean and T03 was re-armed.
|
|
984
|
+
|
|
985
|
+
This helper:
|
|
986
|
+
1. Reads events.jsonl content (if any) into memory.
|
|
987
|
+
2. Performs the hard-reset (drops the agent's working-tree edits).
|
|
988
|
+
3. Writes the preserved events.jsonl back to disk.
|
|
989
|
+
|
|
990
|
+
Subsequent `flush_events` calls then append to the preserved content;
|
|
991
|
+
`commit_bookkeeping` captures the full history.
|
|
992
|
+
"""
|
|
993
|
+
saved = events_path.read_text() if events_path.is_file() else None
|
|
994
|
+
git("reset", "--hard", head_before)
|
|
995
|
+
if saved is not None:
|
|
996
|
+
events_path.write_text(saved)
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
class SquashCommitError(RuntimeError):
|
|
1000
|
+
"""Raised when squash_commit's `git commit` is rejected (non-zero exit).
|
|
1001
|
+
|
|
1002
|
+
The usual cause is a pre-commit hook (e.g. the leak-scan hook) rejecting the
|
|
1003
|
+
squash. The message carries git's stderr/stdout — which `capture_output`
|
|
1004
|
+
would otherwise swallow — so the caller can record an actionable failure
|
|
1005
|
+
note instead of crashing on a bare CalledProcessError. See issue #51.
|
|
1006
|
+
"""
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
def squash_commit(wu: WorkUnit, head_before: str) -> str | None:
|
|
1010
|
+
if git("rev-parse", "HEAD") != head_before:
|
|
1011
|
+
git("reset", "--soft", head_before) # fold away any commits the agent made
|
|
1012
|
+
if not git("status", "--porcelain"):
|
|
1013
|
+
return None
|
|
1014
|
+
git("add", "-A")
|
|
1015
|
+
msg = f"feat: {wu.title}\n\nFeature: {wu.wu_id}"
|
|
1016
|
+
res = subprocess.run(
|
|
1017
|
+
["git", "commit", "-m", msg], capture_output=True, text=True,
|
|
1018
|
+
)
|
|
1019
|
+
if res.returncode != 0:
|
|
1020
|
+
raise SquashCommitError(
|
|
1021
|
+
f"git commit for {wu.wu_id} was rejected (exit {res.returncode}) — "
|
|
1022
|
+
f"usually a pre-commit hook rejecting the squash.\n"
|
|
1023
|
+
f"--- git stderr ---\n{res.stderr.strip()}\n"
|
|
1024
|
+
f"--- git stdout ---\n{res.stdout.strip()}"
|
|
1025
|
+
)
|
|
1026
|
+
return git("rev-parse", "HEAD")
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
# --------------------------------------------------------------------------- #
|
|
1030
|
+
# Dispatch + verification #
|
|
1031
|
+
# --------------------------------------------------------------------------- #
|
|
1032
|
+
|
|
1033
|
+
PROMPT_PREAMBLE = """\
|
|
1034
|
+
You are executing a single Specfuse work unit. Read .specfuse/rules/ in full before \
|
|
1035
|
+
acting; they are binding. Do NOT run any git command — the driver owns all commits \
|
|
1036
|
+
and bookkeeping. Edit files only. End your turn with the RESULT block defined in \
|
|
1037
|
+
.specfuse/rules/result-contract.md. Verification is run by the driver, not by you; \
|
|
1038
|
+
report honestly.
|
|
1039
|
+
"""
|
|
1040
|
+
|
|
1041
|
+
CAVEMAN_DIRECTIVE = """\
|
|
1042
|
+
## Output terseness directive
|
|
1043
|
+
Drop articles (a/an/the), filler words (just/really/basically/actually/simply), \
|
|
1044
|
+
pleasantries (sure/certainly/of course/happy to), and hedging. \
|
|
1045
|
+
Avoid prose narration between tool calls. \
|
|
1046
|
+
Omit any end-of-turn summary. \
|
|
1047
|
+
Write code blocks and the fenced RESULT block normally — do not abbreviate them. \
|
|
1048
|
+
Quote error strings exactly as they appear.
|
|
1049
|
+
"""
|
|
1050
|
+
|
|
1051
|
+
_CAVEMAN_EFFORT = frozenset({"low", "medium"})
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
def truncate_failure_note(note: str, max_lines: int = 200,
|
|
1055
|
+
max_chars: int = 8000) -> str:
|
|
1056
|
+
"""Return note unchanged when within limits; otherwise head+marker+tail.
|
|
1057
|
+
|
|
1058
|
+
Splits budget 50/50 by line count, clamped by char budget too. Marker is
|
|
1059
|
+
plain ASCII with no triple-backtick so RESULT-block parsing is unaffected.
|
|
1060
|
+
"""
|
|
1061
|
+
if len(note) <= max_chars and note.count("\n") < max_lines:
|
|
1062
|
+
return note
|
|
1063
|
+
lines = note.splitlines()
|
|
1064
|
+
n = len(lines)
|
|
1065
|
+
line_budget = min(max_lines, n - 1)
|
|
1066
|
+
head_count = line_budget // 2
|
|
1067
|
+
tail_count = line_budget - head_count
|
|
1068
|
+
half_char_budget = max_chars // 2
|
|
1069
|
+
while head_count > 0 and sum(len(ln) + 1 for ln in lines[:head_count]) > half_char_budget:
|
|
1070
|
+
head_count -= 1
|
|
1071
|
+
while tail_count > 0 and sum(len(ln) + 1 for ln in lines[n - tail_count:]) > half_char_budget:
|
|
1072
|
+
tail_count -= 1
|
|
1073
|
+
head_lines = lines[:head_count]
|
|
1074
|
+
tail_lines = lines[n - tail_count:] if tail_count > 0 else []
|
|
1075
|
+
elided_lines = n - head_count - tail_count
|
|
1076
|
+
elided_chars = len(note) - sum(len(ln) + 1 for ln in head_lines) \
|
|
1077
|
+
- sum(len(ln) + 1 for ln in tail_lines)
|
|
1078
|
+
marker = f"\n... [{elided_lines} lines / {elided_chars} chars elided] ...\n"
|
|
1079
|
+
return "\n".join(head_lines) + marker + "\n".join(tail_lines)
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
def dispatch(wu: WorkUnit, failure_note: str | None,
|
|
1083
|
+
cost_tracking: bool = True) -> tuple[str, dict | None]:
|
|
1084
|
+
"""Run a fresh agent session for this WU.
|
|
1085
|
+
|
|
1086
|
+
When `cost_tracking` is True (default), requests JSON output from
|
|
1087
|
+
`claude -p` so the cost / token-usage block can be extracted. Returns
|
|
1088
|
+
(result_text, usage_dict_or_None). On any JSON parse failure or
|
|
1089
|
+
unexpected shape, usage is None — the result_text is still returned so
|
|
1090
|
+
the RESULT-block parser and verify() can do their normal work.
|
|
1091
|
+
"""
|
|
1092
|
+
preamble = (PROMPT_PREAMBLE + "\n\n" + CAVEMAN_DIRECTIVE
|
|
1093
|
+
if wu.effort in _CAVEMAN_EFFORT else PROMPT_PREAMBLE)
|
|
1094
|
+
prompt = preamble + "\n\n" + wu.body
|
|
1095
|
+
if failure_note:
|
|
1096
|
+
prompt += ("\n\n## Previous attempt failed verification\n"
|
|
1097
|
+
"A prior fresh attempt failed the gates below. Diagnose and fix; "
|
|
1098
|
+
"do not repeat the same approach.\n\n"
|
|
1099
|
+
+ truncate_failure_note(failure_note))
|
|
1100
|
+
cmd = [p.replace("{model}", wu.model).replace("{effort}", wu.effort)
|
|
1101
|
+
for p in CLAUDE_CMD]
|
|
1102
|
+
if wu.unsandboxed:
|
|
1103
|
+
# Per-WU sandbox-escape. Audited via the unsandboxed_dispatch event
|
|
1104
|
+
# emitted in run()'s attempt loop; rationale lives in WU frontmatter.
|
|
1105
|
+
# Inserted after `-p` so it composes with --model/--effort/--output-format.
|
|
1106
|
+
cmd.insert(2, "--dangerously-skip-permissions")
|
|
1107
|
+
if cost_tracking:
|
|
1108
|
+
cmd += ["--output-format", "json"]
|
|
1109
|
+
proc = subprocess.run(cmd, input=prompt, capture_output=True, text=True)
|
|
1110
|
+
raw = proc.stdout or ""
|
|
1111
|
+
if not cost_tracking:
|
|
1112
|
+
return raw, None
|
|
1113
|
+
return parse_claude_json_output(raw)
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
def parse_claude_json_output(raw: str) -> tuple[str, dict | None]:
|
|
1117
|
+
"""Parse Claude CLI's `--output-format=json` envelope.
|
|
1118
|
+
|
|
1119
|
+
Tolerant: any shape drift returns (raw, None) so the caller falls back
|
|
1120
|
+
to text-mode RESULT-block parsing. Extracts `total_cost_usd`,
|
|
1121
|
+
`input_tokens`, `output_tokens`, and cache-token counts when present.
|
|
1122
|
+
"""
|
|
1123
|
+
try:
|
|
1124
|
+
data = json.loads(raw)
|
|
1125
|
+
except (json.JSONDecodeError, ValueError):
|
|
1126
|
+
return raw, None
|
|
1127
|
+
if not isinstance(data, dict):
|
|
1128
|
+
return raw, None
|
|
1129
|
+
result_text = data.get("result", "")
|
|
1130
|
+
if not isinstance(result_text, str):
|
|
1131
|
+
result_text = raw
|
|
1132
|
+
usage: dict = {}
|
|
1133
|
+
cost = data.get("total_cost_usd")
|
|
1134
|
+
if isinstance(cost, (int, float)):
|
|
1135
|
+
usage["cost_usd"] = float(cost)
|
|
1136
|
+
u = data.get("usage")
|
|
1137
|
+
if isinstance(u, dict):
|
|
1138
|
+
for key in ("input_tokens", "output_tokens",
|
|
1139
|
+
"cache_read_input_tokens", "cache_creation_input_tokens"):
|
|
1140
|
+
if isinstance(u.get(key), int):
|
|
1141
|
+
usage[key] = u[key]
|
|
1142
|
+
return result_text, (usage if usage else None)
|
|
1143
|
+
|
|
1144
|
+
|
|
1145
|
+
def verify_files_changed(result: dict, head_before: str) -> list[str]:
|
|
1146
|
+
"""Return claimed `files_changed` paths that show no diff against head_before.
|
|
1147
|
+
|
|
1148
|
+
The RESULT-block contract lets the agent declare which paths its work
|
|
1149
|
+
touched. This guard, run before squash_commit, checks each claimed path
|
|
1150
|
+
actually differs from HEAD's pre-attempt SHA. A path that does not exist
|
|
1151
|
+
on disk is reported as "unchanged" — it cannot have a diff to commit.
|
|
1152
|
+
|
|
1153
|
+
Returns an empty list when all claimed paths show real diffs, OR when
|
|
1154
|
+
`files_changed` is absent / empty (the opt-out: pre-existing WUs and the
|
|
1155
|
+
worked example do not always declare it; absence MUST NOT fire the
|
|
1156
|
+
guard).
|
|
1157
|
+
|
|
1158
|
+
See FEAT-2026-0008 / RETROSPECTIVE for the failure mode this exists to
|
|
1159
|
+
catch — T04 and T08 of FEAT-2026-0007 declared files_changed naming
|
|
1160
|
+
source paths their attempts never touched.
|
|
1161
|
+
"""
|
|
1162
|
+
paths = result.get("files_changed") or []
|
|
1163
|
+
if not isinstance(paths, list) or not paths:
|
|
1164
|
+
return []
|
|
1165
|
+
unchanged: list[str] = []
|
|
1166
|
+
for raw in paths:
|
|
1167
|
+
path = str(raw)
|
|
1168
|
+
if not Path(path).exists():
|
|
1169
|
+
unchanged.append(path)
|
|
1170
|
+
continue
|
|
1171
|
+
rc = subprocess.run(
|
|
1172
|
+
["git", "diff", "--quiet", head_before, "--", path],
|
|
1173
|
+
capture_output=True,
|
|
1174
|
+
).returncode
|
|
1175
|
+
if rc == 0:
|
|
1176
|
+
# `git diff` only sees tracked content — a freshly created file
|
|
1177
|
+
# is invisible to it even though it's a real change vs
|
|
1178
|
+
# head_before. Probe ls-files --others to catch the
|
|
1179
|
+
# newly-created-untracked case; without this, agent-created new
|
|
1180
|
+
# files (.tf, .sh, .md the WU just added) get flagged as
|
|
1181
|
+
# "unchanged" and the WU spins to blocked_human even though the
|
|
1182
|
+
# deliverable is present and correct.
|
|
1183
|
+
ls = subprocess.run(
|
|
1184
|
+
["git", "ls-files", "--others", "--exclude-standard",
|
|
1185
|
+
"--", path],
|
|
1186
|
+
capture_output=True, text=True,
|
|
1187
|
+
).stdout.strip()
|
|
1188
|
+
if not ls:
|
|
1189
|
+
unchanged.append(path)
|
|
1190
|
+
return unchanged
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
# Smoke-import runner (FEAT-2026-0008/T03). The conservative pattern matches
|
|
1194
|
+
# ONLY a `python3 -c "from X import Y"` line. The agent-authored WU body may
|
|
1195
|
+
# declare an existence check naming new symbols this WU just minted; the
|
|
1196
|
+
# driver runs each match after a successful verify() + squash and rolls back
|
|
1197
|
+
# the squash if any smoke import raises. Free-form `python3 -c` lines are
|
|
1198
|
+
# NOT executed — running arbitrary agent-authored Python in the driver
|
|
1199
|
+
# process would be a security regression (see WU escalation trigger 2).
|
|
1200
|
+
SMOKE_IMPORT_RE = re.compile(
|
|
1201
|
+
r'''^\s*python3?\s+-c\s+(["'])from\s+\S+\s+import\s+\S+\1\s*$'''
|
|
1202
|
+
)
|
|
1203
|
+
|
|
1204
|
+
|
|
1205
|
+
def extract_smoke_imports(wu_body: str) -> list[str]:
|
|
1206
|
+
"""Return WU-body lines matching the conservative import-smoke pattern.
|
|
1207
|
+
|
|
1208
|
+
Each returned element is the full command string ready for
|
|
1209
|
+
`subprocess.run(shell=True, ...)`. Order preserved. Lines that look
|
|
1210
|
+
similar but do not match — `python -c "import X"`, `python -c
|
|
1211
|
+
"print(...)"`, prose — are skipped.
|
|
1212
|
+
"""
|
|
1213
|
+
out: list[str] = []
|
|
1214
|
+
for line in wu_body.splitlines():
|
|
1215
|
+
if SMOKE_IMPORT_RE.match(line):
|
|
1216
|
+
out.append(line.strip())
|
|
1217
|
+
return out
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
def run_smoke_imports(commands: list[str], cwd: Path) -> tuple[bool, str]:
|
|
1221
|
+
"""Run each smoke-import command in `cwd` in declared order.
|
|
1222
|
+
|
|
1223
|
+
Returns `(True, "")` if every command exits 0. On the first non-zero
|
|
1224
|
+
exit, returns `(False, summary)` where `summary` names the failing
|
|
1225
|
+
command and its stderr — short, suitable for an event payload and a
|
|
1226
|
+
retry failure_note. Subsequent commands are not run; one failure is
|
|
1227
|
+
enough to fail the attempt.
|
|
1228
|
+
|
|
1229
|
+
Inherits the driver's PATH so the active venv's `python3` resolves
|
|
1230
|
+
(the methodology requires the driver to be invoked from within an
|
|
1231
|
+
active venv per `[loop-driver-operation]`).
|
|
1232
|
+
"""
|
|
1233
|
+
for cmd in commands:
|
|
1234
|
+
proc = subprocess.run( # nosec B602
|
|
1235
|
+
cmd, shell=True, capture_output=True, text=True, cwd=str(cwd),
|
|
1236
|
+
)
|
|
1237
|
+
if proc.returncode != 0:
|
|
1238
|
+
summary = (
|
|
1239
|
+
f"smoke import failed (exit {proc.returncode}):\n"
|
|
1240
|
+
f" $ {cmd}\n"
|
|
1241
|
+
f"stderr:\n{proc.stderr.strip()}"
|
|
1242
|
+
)
|
|
1243
|
+
return False, summary
|
|
1244
|
+
return True, ""
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
def is_zero_token_attempt(usage: dict | None) -> bool:
|
|
1248
|
+
"""Did the dispatched session bill zero input tokens?
|
|
1249
|
+
|
|
1250
|
+
Returns True iff `usage` is a dict whose `input_tokens` key is exactly 0.
|
|
1251
|
+
A zero-token attempt means the agent never produced output (often due to a
|
|
1252
|
+
transient CLI / quota / connectivity failure that the SDK reports as a
|
|
1253
|
+
success with empty content); its RESULT block — if present — is
|
|
1254
|
+
hallucinated upstream and must not be trusted.
|
|
1255
|
+
|
|
1256
|
+
Returns False for `usage is None` (cost tracking disabled — preserve prior
|
|
1257
|
+
behavior for users who opt out), for a dict missing `input_tokens`, and
|
|
1258
|
+
for any positive integer. The guard is opt-in via the cost-tracking flag:
|
|
1259
|
+
when the operator runs with cost tracking off, `dispatch()` always returns
|
|
1260
|
+
`usage=None` and this function always returns False.
|
|
1261
|
+
|
|
1262
|
+
See FEAT-2026-0008 / RETROSPECTIVE for the failure mode this exists to
|
|
1263
|
+
catch — a zero-token attempt in FEAT-2026-0007/T08H landed `status: done`
|
|
1264
|
+
despite the agent never running.
|
|
1265
|
+
"""
|
|
1266
|
+
if not isinstance(usage, dict):
|
|
1267
|
+
return False
|
|
1268
|
+
return usage.get("input_tokens") == 0
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
RESULT_BLOCK_RE = re.compile(r"```result\s*\n(.*?)\n```", re.DOTALL)
|
|
1272
|
+
|
|
1273
|
+
|
|
1274
|
+
def parse_result_block(stdout: str) -> dict | None:
|
|
1275
|
+
"""Return the parsed final ```result``` block from stdout, or None.
|
|
1276
|
+
|
|
1277
|
+
The result-contract rule (`.specfuse/rules/result-contract.md`) requires the
|
|
1278
|
+
agent to end its turn with a single fenced `result` block. Be forgiving:
|
|
1279
|
+
agents may discuss before it, may emit other fenced blocks elsewhere, may
|
|
1280
|
+
produce malformed YAML. Any of those returns None and the caller falls back
|
|
1281
|
+
to verify() as the exit oracle. Crashing the loop on a garbled agent output
|
|
1282
|
+
would defeat the purpose of having a separate oracle in the first place.
|
|
1283
|
+
"""
|
|
1284
|
+
if not stdout:
|
|
1285
|
+
return None
|
|
1286
|
+
matches = list(RESULT_BLOCK_RE.finditer(stdout))
|
|
1287
|
+
if not matches:
|
|
1288
|
+
return None
|
|
1289
|
+
body = matches[-1].group(1) # LAST result block — agents may discuss before it
|
|
1290
|
+
try:
|
|
1291
|
+
parsed = _miniyaml.parse(body)
|
|
1292
|
+
except Exception: # noqa: BLE001 - intentional: see comment below
|
|
1293
|
+
# Broad catch is deliberate AND scoped to this site only. The agent's
|
|
1294
|
+
# stdout is the least-trusted input in the system (free-form LLM text
|
|
1295
|
+
# supposedly ending in a fenced result block); the forgiving contract
|
|
1296
|
+
# here is "anything malformed degrades to verify() decides, never
|
|
1297
|
+
# crashes the driver." A MiniYAMLError covers documented-subset
|
|
1298
|
+
# violations, but the parser is hand-rolled and could in principle
|
|
1299
|
+
# raise IndexError/ValueError/etc. on a sufficiently weird input —
|
|
1300
|
+
# those must also degrade, not crash a real driver run.
|
|
1301
|
+
# Every OTHER _miniyaml.parse site (read_frontmatter, load_graph,
|
|
1302
|
+
# load_verification, and the linter) reads operator-authored config
|
|
1303
|
+
# and intentionally keeps the strict MiniYAMLError-only handling so
|
|
1304
|
+
# malformed config files fail loudly, per verify()'s fail-closed
|
|
1305
|
+
# philosophy. Do not broaden those.
|
|
1306
|
+
return None
|
|
1307
|
+
return parsed if isinstance(parsed, dict) else None
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
def agent_reported_blocked(stdout: str) -> tuple[bool, str | None]:
|
|
1311
|
+
"""Did the agent explicitly emit `status: blocked` in its RESULT block?
|
|
1312
|
+
|
|
1313
|
+
Returns (True, blocked_reason) only when a well-formed block names
|
|
1314
|
+
`status: blocked`. Missing block, malformed block, or any other status
|
|
1315
|
+
falls through to (False, None) — the driver then runs verify() as usual.
|
|
1316
|
+
"""
|
|
1317
|
+
parsed = parse_result_block(stdout)
|
|
1318
|
+
if not parsed or parsed.get("status") != "blocked":
|
|
1319
|
+
return False, None
|
|
1320
|
+
reason = parsed.get("blocked_reason")
|
|
1321
|
+
return True, (str(reason) if reason is not None else None)
|
|
1322
|
+
|
|
1323
|
+
|
|
1324
|
+
def load_verification() -> dict:
|
|
1325
|
+
if not VERIFICATION_PATH.exists():
|
|
1326
|
+
return {}
|
|
1327
|
+
return _miniyaml.parse(VERIFICATION_PATH.read_text()) or {}
|
|
1328
|
+
|
|
1329
|
+
|
|
1330
|
+
def verify(wu: WorkUnit, feature_dir: Path,
|
|
1331
|
+
cfg: dict | None = None) -> tuple[bool, str]:
|
|
1332
|
+
"""Driver runs the gates itself — the exit oracle. Agent self-report is advisory.
|
|
1333
|
+
|
|
1334
|
+
Empty or missing gate set for the WU's type is a CONFIGURATION failure (not a
|
|
1335
|
+
pass): a misconfigured verification.yml must not silently let work through.
|
|
1336
|
+
The failure message names the configuration cause so a human reading the log
|
|
1337
|
+
knows to fix verification.yml, not the work unit. `cfg` is injectable for
|
|
1338
|
+
testing; in production it is read from VERIFICATION_PATH.
|
|
1339
|
+
"""
|
|
1340
|
+
if cfg is None:
|
|
1341
|
+
cfg = load_verification()
|
|
1342
|
+
set_name = GATES_FOR_TYPE.get(wu.type, "code")
|
|
1343
|
+
gate_set = cfg.get(set_name) or []
|
|
1344
|
+
if not gate_set:
|
|
1345
|
+
return False, (
|
|
1346
|
+
f"CONFIGURATION ERROR: no '{set_name}' gates configured in "
|
|
1347
|
+
f".specfuse/verification.yml for work-unit type '{wu.type}'. "
|
|
1348
|
+
f"This is not a work-unit failure — fix verification.yml and re-run."
|
|
1349
|
+
)
|
|
1350
|
+
# Union any author-declared extra_gates sets onto the type-selected set,
|
|
1351
|
+
# deduping by gate name so a set shared between the type default and an extra
|
|
1352
|
+
# entry is not run twice (issue #62). An extra_gates name absent from
|
|
1353
|
+
# verification.yml is a CONFIGURATION ERROR — same class as an empty type set,
|
|
1354
|
+
# never a silent pass.
|
|
1355
|
+
gate_set = list(gate_set)
|
|
1356
|
+
seen_names = {g["name"] for g in gate_set}
|
|
1357
|
+
for extra_name in wu.extra_gates:
|
|
1358
|
+
if extra_name == set_name:
|
|
1359
|
+
continue # already the type-selected set
|
|
1360
|
+
extra_set = cfg.get(extra_name)
|
|
1361
|
+
if not extra_set:
|
|
1362
|
+
return False, (
|
|
1363
|
+
f"CONFIGURATION ERROR: work unit declares `extra_gates: "
|
|
1364
|
+
f"[{extra_name}]` but no '{extra_name}' gates are configured in "
|
|
1365
|
+
f".specfuse/verification.yml. This is not a work-unit failure — "
|
|
1366
|
+
f"fix verification.yml (or the WU's extra_gates) and re-run."
|
|
1367
|
+
)
|
|
1368
|
+
for gate in extra_set:
|
|
1369
|
+
if gate["name"] in seen_names:
|
|
1370
|
+
continue
|
|
1371
|
+
seen_names.add(gate["name"])
|
|
1372
|
+
gate_set.append(gate)
|
|
1373
|
+
results, ok_all = [], True
|
|
1374
|
+
for gate in gate_set:
|
|
1375
|
+
command = gate["command"].replace("{feature_dir}", str(feature_dir))
|
|
1376
|
+
# shell=True is intentional: gate commands are authored by the user in
|
|
1377
|
+
# verification.yml and routinely use shell features (pipes, &&, glob,
|
|
1378
|
+
# redirects — e.g. `dotnet build && dotnet test --no-build`). The input
|
|
1379
|
+
# is the project's own config, not untrusted external data.
|
|
1380
|
+
proc = subprocess.run( # nosec B602
|
|
1381
|
+
command, shell=True, capture_output=True, text=True,
|
|
1382
|
+
)
|
|
1383
|
+
ok = proc.returncode == 0
|
|
1384
|
+
ok_all = ok_all and ok
|
|
1385
|
+
tail = (proc.stdout + proc.stderr).strip().splitlines()[-15:]
|
|
1386
|
+
results.append(f"### {gate['name']}: {'PASS' if ok else 'FAIL'}\n"
|
|
1387
|
+
f"```\n$ {command}\n" + "\n".join(tail) + "\n```")
|
|
1388
|
+
return ok_all, "\n\n".join(results)
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
def execute_unit_attempt(
|
|
1392
|
+
wu: WorkUnit,
|
|
1393
|
+
feature_dir: Path,
|
|
1394
|
+
failure_note: str | None,
|
|
1395
|
+
*,
|
|
1396
|
+
dispatch_fn=None,
|
|
1397
|
+
verify_fn=None,
|
|
1398
|
+
cost_tracking: bool = True,
|
|
1399
|
+
head_before: str | None = None,
|
|
1400
|
+
) -> tuple[str, object, dict | None]:
|
|
1401
|
+
"""One dispatch + parse + (if not blocked) verify cycle.
|
|
1402
|
+
|
|
1403
|
+
Factored out of run() so the parse-and-decision logic is unit-testable
|
|
1404
|
+
without spawning a real agent — pass stub callables for dispatch_fn and
|
|
1405
|
+
verify_fn from a test.
|
|
1406
|
+
|
|
1407
|
+
Returns (outcome, payload, usage) where outcome is one of:
|
|
1408
|
+
"zero_token" — usage reports input_tokens=0 (agent never
|
|
1409
|
+
ran); payload is None
|
|
1410
|
+
"blocked" — agent explicitly emitted status: blocked
|
|
1411
|
+
"passed" — verify() passed AND the files_changed
|
|
1412
|
+
guard found nothing to flag
|
|
1413
|
+
"failed" — verify() failed
|
|
1414
|
+
"files_changed_mismatch" — verify() passed but the RESULT's
|
|
1415
|
+
files_changed list names paths that show
|
|
1416
|
+
no diff against head_before; payload is
|
|
1417
|
+
the list of unchanged paths
|
|
1418
|
+
|
|
1419
|
+
`usage` is the per-attempt cost/token dict from the agent dispatch when
|
|
1420
|
+
`cost_tracking` is True and the agent returned a parseable usage block;
|
|
1421
|
+
None otherwise (or when the dispatch_fn stub returns a plain string).
|
|
1422
|
+
|
|
1423
|
+
Backward-compatible dispatch_fn contract: stubs may return either a
|
|
1424
|
+
plain `str` (treated as text-only, usage=None) or `(str, dict|None)`.
|
|
1425
|
+
|
|
1426
|
+
`head_before` is the pre-attempt HEAD SHA the files_changed guard
|
|
1427
|
+
diffs against. None disables the guard — preserved for unit tests that
|
|
1428
|
+
exercise this function in isolation without a git working tree.
|
|
1429
|
+
"""
|
|
1430
|
+
if verify_fn is None:
|
|
1431
|
+
verify_fn = verify
|
|
1432
|
+
if dispatch_fn is None:
|
|
1433
|
+
result = dispatch(wu, failure_note, cost_tracking)
|
|
1434
|
+
else:
|
|
1435
|
+
result = dispatch_fn(wu, failure_note)
|
|
1436
|
+
if isinstance(result, tuple):
|
|
1437
|
+
stdout, usage = result
|
|
1438
|
+
else:
|
|
1439
|
+
stdout, usage = result, None
|
|
1440
|
+
# Zero-token guard runs BEFORE RESULT-block parsing: the agent did not
|
|
1441
|
+
# produce output, so any block in stdout is hallucinated upstream and
|
|
1442
|
+
# must not be trusted (FEAT-2026-0008/T01). Opt-in via cost tracking —
|
|
1443
|
+
# when disabled, usage is None and is_zero_token_attempt returns False.
|
|
1444
|
+
if is_zero_token_attempt(usage):
|
|
1445
|
+
return "zero_token", None, usage
|
|
1446
|
+
is_blocked, reason = agent_reported_blocked(stdout or "")
|
|
1447
|
+
if is_blocked:
|
|
1448
|
+
return "blocked", reason, usage
|
|
1449
|
+
passed, evidence = verify_fn(wu, feature_dir)
|
|
1450
|
+
if not passed:
|
|
1451
|
+
return "failed", evidence, usage
|
|
1452
|
+
# files_changed guard (FEAT-2026-0008/T02): the agent's RESULT claim
|
|
1453
|
+
# gets diffed against head_before BEFORE squash_commit. A non-empty
|
|
1454
|
+
# mismatch flags the attempt as a verification failure even though
|
|
1455
|
+
# verify() reported PASS — gates can't see "the diff is empty" when
|
|
1456
|
+
# the gate commands operate on files unrelated to the WU's scope.
|
|
1457
|
+
if head_before is not None:
|
|
1458
|
+
parsed = parse_result_block(stdout or "")
|
|
1459
|
+
if parsed:
|
|
1460
|
+
unchanged = verify_files_changed(parsed, head_before)
|
|
1461
|
+
if unchanged:
|
|
1462
|
+
return "files_changed_mismatch", unchanged, usage
|
|
1463
|
+
return "passed", evidence, usage
|
|
1464
|
+
|
|
1465
|
+
|
|
1466
|
+
# --------------------------------------------------------------------------- #
|
|
1467
|
+
# Roadmap row parser (header-name based) — issue #15 #
|
|
1468
|
+
# --------------------------------------------------------------------------- #
|
|
1469
|
+
|
|
1470
|
+
|
|
1471
|
+
def _parse_roadmap_row(roadmap_text: str, feature_id: str) -> dict | None:
|
|
1472
|
+
"""Find feature_id's row in roadmap.md and return columns mapped by header name.
|
|
1473
|
+
|
|
1474
|
+
Looks up the first markdown table header row containing a 'Status' cell,
|
|
1475
|
+
parses column names by name (not by positional index), then finds the
|
|
1476
|
+
feature_id data row after the header. Tolerates any column count and any
|
|
1477
|
+
ordering, including project-specific columns like 'Priority' or 'Budget'.
|
|
1478
|
+
|
|
1479
|
+
Returns a dict on success:
|
|
1480
|
+
'columns': {col_name: stripped_value, ...}
|
|
1481
|
+
'cell_spans': {col_name: (start, end), ...} absolute offsets into
|
|
1482
|
+
roadmap_text spanning the BETWEEN-PIPES content (suitable
|
|
1483
|
+
for whole-cell rewrites).
|
|
1484
|
+
'row_span': (start, end) absolute offsets of the full row line.
|
|
1485
|
+
|
|
1486
|
+
Returns None if no table header containing 'Status' is found, or if no
|
|
1487
|
+
feature_id row exists after that header.
|
|
1488
|
+
"""
|
|
1489
|
+
# Locate the table header — a line `| col1 | col2 | ... |` whose cells
|
|
1490
|
+
# include the literal 'Status'. The header row appears immediately above
|
|
1491
|
+
# the markdown separator line; we use 'Status' in its cells as the marker.
|
|
1492
|
+
header_re = re.compile(r"^\|([^\n]*)\|\s*$", re.MULTILINE)
|
|
1493
|
+
header_m = None
|
|
1494
|
+
col_names: list[str] = []
|
|
1495
|
+
for m in header_re.finditer(roadmap_text):
|
|
1496
|
+
cells = [c.strip() for c in m.group(1).split("|")]
|
|
1497
|
+
if "Status" in cells:
|
|
1498
|
+
header_m = m
|
|
1499
|
+
col_names = cells
|
|
1500
|
+
break
|
|
1501
|
+
if header_m is None:
|
|
1502
|
+
return None
|
|
1503
|
+
|
|
1504
|
+
# Locate the feature_id data row AFTER the header.
|
|
1505
|
+
row_re = re.compile(
|
|
1506
|
+
r"^\|\s*" + re.escape(feature_id) + r"\s*\|[^\n]*$",
|
|
1507
|
+
re.MULTILINE,
|
|
1508
|
+
)
|
|
1509
|
+
row_m = row_re.search(roadmap_text, pos=header_m.end())
|
|
1510
|
+
if not row_m:
|
|
1511
|
+
return None
|
|
1512
|
+
|
|
1513
|
+
row_text = row_m.group(0)
|
|
1514
|
+
row_start_abs = row_m.start()
|
|
1515
|
+
|
|
1516
|
+
# Pipe positions inside the row identify cell boundaries.
|
|
1517
|
+
pipes = [i for i, ch in enumerate(row_text) if ch == "|"]
|
|
1518
|
+
if len(pipes) < len(col_names) + 1:
|
|
1519
|
+
# Malformed row — fewer cells than the header declares.
|
|
1520
|
+
return None
|
|
1521
|
+
|
|
1522
|
+
columns: dict[str, str] = {}
|
|
1523
|
+
cell_spans: dict[str, tuple[int, int]] = {}
|
|
1524
|
+
for col_idx, col_name in enumerate(col_names):
|
|
1525
|
+
cell_start_rel = pipes[col_idx] + 1
|
|
1526
|
+
cell_end_rel = pipes[col_idx + 1]
|
|
1527
|
+
raw = row_text[cell_start_rel:cell_end_rel]
|
|
1528
|
+
columns[col_name] = raw.strip()
|
|
1529
|
+
cell_spans[col_name] = (
|
|
1530
|
+
row_start_abs + cell_start_rel,
|
|
1531
|
+
row_start_abs + cell_end_rel,
|
|
1532
|
+
)
|
|
1533
|
+
|
|
1534
|
+
return {
|
|
1535
|
+
"columns": columns,
|
|
1536
|
+
"cell_spans": cell_spans,
|
|
1537
|
+
"row_span": (row_start_abs, row_m.end()),
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
|
|
1541
|
+
# --------------------------------------------------------------------------- #
|
|
1542
|
+
# Auto-archive helper #
|
|
1543
|
+
# --------------------------------------------------------------------------- #
|
|
1544
|
+
|
|
1545
|
+
|
|
1546
|
+
def auto_archive_feature(feature_id: str, repo_root: Path) -> str:
|
|
1547
|
+
"""Re-implement roadmap-archive single-feature algorithm (Steps 1–6) in-driver.
|
|
1548
|
+
|
|
1549
|
+
Returns "archived", "already archived", or "refused: <reason>".
|
|
1550
|
+
No git operations; touches only roadmap.md and roadmap-archive.md under repo_root.
|
|
1551
|
+
"""
|
|
1552
|
+
roadmap_path = repo_root / ".specfuse" / "roadmap.md"
|
|
1553
|
+
archive_path = repo_root / ".specfuse" / "roadmap-archive.md"
|
|
1554
|
+
|
|
1555
|
+
feat_id_lower = feature_id.lower()
|
|
1556
|
+
anchor = f'<a id="{feat_id_lower}"></a>'
|
|
1557
|
+
back_link = f'[→ archive](roadmap-archive.md#{feat_id_lower})'
|
|
1558
|
+
marker = "<!-- Archived sections appended below -->"
|
|
1559
|
+
|
|
1560
|
+
# Step 1 — read and validate table row (header-name based; issue #15)
|
|
1561
|
+
if not roadmap_path.exists():
|
|
1562
|
+
return f"refused: {roadmap_path} not found"
|
|
1563
|
+
roadmap_text = roadmap_path.read_text()
|
|
1564
|
+
|
|
1565
|
+
parsed = _parse_roadmap_row(roadmap_text, feature_id)
|
|
1566
|
+
if parsed is None:
|
|
1567
|
+
return f"refused: {feature_id} not found in roadmap"
|
|
1568
|
+
|
|
1569
|
+
status = parsed["columns"].get("Status", "")
|
|
1570
|
+
detail = parsed["columns"].get("Detail", "")
|
|
1571
|
+
|
|
1572
|
+
if "roadmap-archive.md#" in detail:
|
|
1573
|
+
return "already archived"
|
|
1574
|
+
if status not in ("done", "abandoned"):
|
|
1575
|
+
return f"refused: status={status}"
|
|
1576
|
+
|
|
1577
|
+
# Step 2 — extract inline section
|
|
1578
|
+
section_re = re.compile(
|
|
1579
|
+
r'^(## ' + re.escape(feature_id) + r'[^\n]*(?:\n(?!## )[^\n]*)*\n?)',
|
|
1580
|
+
re.MULTILINE,
|
|
1581
|
+
)
|
|
1582
|
+
section_m = section_re.search(roadmap_text)
|
|
1583
|
+
had_inline_section = section_m is not None
|
|
1584
|
+
if had_inline_section:
|
|
1585
|
+
section_text = section_m.group(1).rstrip('\n') + '\n'
|
|
1586
|
+
else:
|
|
1587
|
+
# Row-only feature: a roadmap table row exists (status done/abandoned,
|
|
1588
|
+
# Detail still '—' — the back-link case already returned at Step 1) but
|
|
1589
|
+
# there is no inline `## FEAT-ID` detail section to move. /draft-feature
|
|
1590
|
+
# emits a table row without a detail section, so an auto-closed feature
|
|
1591
|
+
# drafted that way reaches here. Returning "already archived" without
|
|
1592
|
+
# writing the anchor leaves assert_terminal_flips_fired unsatisfiable
|
|
1593
|
+
# and halts the driver on archive_anchor_missing (FEAT-2026-0022
|
|
1594
|
+
# surfaced this live). Synthesize a minimal stub section so the anchor
|
|
1595
|
+
# and back-link still materialize.
|
|
1596
|
+
title = parsed["columns"].get("Title", "").strip()
|
|
1597
|
+
heading = f"## {feature_id}" + (f" — {title}" if title else "")
|
|
1598
|
+
section_text = (
|
|
1599
|
+
f"{heading}\n\n"
|
|
1600
|
+
"_No inline detail section was recorded for this feature; "
|
|
1601
|
+
"stub written at archive time._\n"
|
|
1602
|
+
)
|
|
1603
|
+
|
|
1604
|
+
# Step 3 — append anchor + section to archive after marker.
|
|
1605
|
+
# Auto-create the archive file if a project never shipped it (the
|
|
1606
|
+
# roadmap-archive skill requires it to pre-exist; the unattended driver
|
|
1607
|
+
# must not crash on its absence — see FileNotFoundError on read_text).
|
|
1608
|
+
if not archive_path.exists():
|
|
1609
|
+
project = ""
|
|
1610
|
+
fm = re.match(r'^---\n(.*?)\n---', roadmap_text, re.DOTALL)
|
|
1611
|
+
if fm:
|
|
1612
|
+
pm = re.search(r'^project:\s*(.+)$', fm.group(1), re.MULTILINE)
|
|
1613
|
+
if pm:
|
|
1614
|
+
project = pm.group(1).strip()
|
|
1615
|
+
header = (
|
|
1616
|
+
(f"---\nproject: {project}\n---\n\n" if project else "")
|
|
1617
|
+
+ "# Archived feature details\n\n"
|
|
1618
|
+
"This file holds the detail sections for features whose status has "
|
|
1619
|
+
"reached `done` or `abandoned`.\n\n"
|
|
1620
|
+
f"{marker}\n"
|
|
1621
|
+
)
|
|
1622
|
+
archive_path.write_text(header)
|
|
1623
|
+
archive_text = archive_path.read_text()
|
|
1624
|
+
if marker not in archive_text:
|
|
1625
|
+
return "refused: archive marker absent"
|
|
1626
|
+
marker_end = archive_text.index(marker) + len(marker)
|
|
1627
|
+
new_archive = archive_text[:marker_end] + f"\n{anchor}\n{section_text}" + archive_text[marker_end:]
|
|
1628
|
+
archive_path.write_text(new_archive)
|
|
1629
|
+
|
|
1630
|
+
# Step 4 — update Detail cell with back-link (skip if column absent; issue #15)
|
|
1631
|
+
if "Detail" in parsed["cell_spans"]:
|
|
1632
|
+
detail_start, detail_end = parsed["cell_spans"]["Detail"]
|
|
1633
|
+
roadmap_text = (
|
|
1634
|
+
roadmap_text[:detail_start] + f" {back_link} " + roadmap_text[detail_end:]
|
|
1635
|
+
)
|
|
1636
|
+
|
|
1637
|
+
# Step 5 — remove inline section (re-search since row update shifted
|
|
1638
|
+
# offsets). Only when one actually existed to move; a synthesized stub
|
|
1639
|
+
# was never in roadmap.md, so there is nothing to strip.
|
|
1640
|
+
if had_inline_section:
|
|
1641
|
+
section_m2 = section_re.search(roadmap_text)
|
|
1642
|
+
if section_m2:
|
|
1643
|
+
roadmap_text = roadmap_text[:section_m2.start()] + roadmap_text[section_m2.end():]
|
|
1644
|
+
roadmap_text = re.sub(r'\n{3,}', '\n\n', roadmap_text)
|
|
1645
|
+
roadmap_path.write_text(roadmap_text)
|
|
1646
|
+
|
|
1647
|
+
return "archived"
|
|
1648
|
+
|
|
1649
|
+
|
|
1650
|
+
def _legacy_4wu_terminal_close_complete(
|
|
1651
|
+
wu: "WorkUnit",
|
|
1652
|
+
units: "list[WorkUnit]",
|
|
1653
|
+
gate,
|
|
1654
|
+
gates: list,
|
|
1655
|
+
) -> bool:
|
|
1656
|
+
"""Detect legacy 4-WU close sequence completion on a terminal gate (issue #16).
|
|
1657
|
+
|
|
1658
|
+
Pre-FEAT-2026-0015 feature scaffolds use the four-WU closing sequence
|
|
1659
|
+
(`retrospective` → `lessons` → `docs` → `plan-next`). FEAT-2026-0015 wired
|
|
1660
|
+
`fire_terminal_flips` to fire only on `close`-type WUs, leaving the legacy
|
|
1661
|
+
sequence with no terminating-equivalent trigger. This helper recognizes
|
|
1662
|
+
completion of the 4-WU sequence as terminating-equivalent so the driver
|
|
1663
|
+
can fire `fire_terminal_flips` on the gate.
|
|
1664
|
+
|
|
1665
|
+
Returns True iff:
|
|
1666
|
+
- `wu.type == "plan-next"` (the last WU in the sequence)
|
|
1667
|
+
- `gate is gates[-1]` (terminal gate)
|
|
1668
|
+
- The gate's `units` include all four legacy types
|
|
1669
|
+
(`retrospective`, `lessons`, `docs`, `plan-next`) AND each is `done`.
|
|
1670
|
+
"""
|
|
1671
|
+
if wu.type != "plan-next":
|
|
1672
|
+
return False
|
|
1673
|
+
if gate is not gates[-1]:
|
|
1674
|
+
return False
|
|
1675
|
+
required = {"retrospective", "lessons", "docs", "plan-next"}
|
|
1676
|
+
have_done = {u.type for u in units if u.type in required and u.status == DONE}
|
|
1677
|
+
return required.issubset(have_done)
|
|
1678
|
+
|
|
1679
|
+
|
|
1680
|
+
def fire_terminal_flips(wu: WorkUnit, feature_dir: Path, repo_root: Path) -> list[Path]:
|
|
1681
|
+
"""Flip terminal gate → passed, roadmap row → done, call auto_archive_feature.
|
|
1682
|
+
|
|
1683
|
+
Called for close-type WUs after squash when verdict_permits_terminal_flips is True.
|
|
1684
|
+
Non-fatal: skips via logging, only raises on internal exceptions.
|
|
1685
|
+
Returns the Paths actually modified (for the bookkeeping commit add list).
|
|
1686
|
+
"""
|
|
1687
|
+
modified: set[Path] = set()
|
|
1688
|
+
feature_id = wu.wu_id.rsplit("/", 1)[0]
|
|
1689
|
+
|
|
1690
|
+
_, gates = load_graph(feature_dir)
|
|
1691
|
+
if not gates:
|
|
1692
|
+
logging.warning("fire_terminal_flips: no gates in PLAN.md for %s", wu.wu_id)
|
|
1693
|
+
else:
|
|
1694
|
+
terminal_gate = gates[-1]
|
|
1695
|
+
gate_path = terminal_gate.file
|
|
1696
|
+
if not gate_path.exists():
|
|
1697
|
+
logging.warning(
|
|
1698
|
+
"fire_terminal_flips: terminal gate file absent: %s — skipping gate flip",
|
|
1699
|
+
gate_path,
|
|
1700
|
+
)
|
|
1701
|
+
else:
|
|
1702
|
+
current_gate_status = terminal_gate.status
|
|
1703
|
+
if current_gate_status == "passed":
|
|
1704
|
+
logging.info(
|
|
1705
|
+
"fire_terminal_flips: %s already passed — skipping gate flip",
|
|
1706
|
+
gate_path.name,
|
|
1707
|
+
)
|
|
1708
|
+
elif current_gate_status == "awaiting_review":
|
|
1709
|
+
write_frontmatter_field(gate_path, "status", "passed")
|
|
1710
|
+
modified.add(gate_path)
|
|
1711
|
+
else:
|
|
1712
|
+
logging.warning(
|
|
1713
|
+
"fire_terminal_flips: %s status is %r (not awaiting_review or passed)"
|
|
1714
|
+
" — skipping gate flip",
|
|
1715
|
+
gate_path.name,
|
|
1716
|
+
current_gate_status,
|
|
1717
|
+
)
|
|
1718
|
+
|
|
1719
|
+
roadmap_path = repo_root / ".specfuse" / "roadmap.md"
|
|
1720
|
+
if not roadmap_path.exists():
|
|
1721
|
+
logging.warning(
|
|
1722
|
+
"fire_terminal_flips: roadmap.md absent at %s — skipping row flip",
|
|
1723
|
+
roadmap_path,
|
|
1724
|
+
)
|
|
1725
|
+
else:
|
|
1726
|
+
# Header-name based parsing — tolerates projects with extra columns
|
|
1727
|
+
# (e.g. Priority). See issue #15.
|
|
1728
|
+
roadmap_text = roadmap_path.read_text()
|
|
1729
|
+
parsed = _parse_roadmap_row(roadmap_text, feature_id)
|
|
1730
|
+
if parsed is None:
|
|
1731
|
+
logging.warning(
|
|
1732
|
+
"fire_terminal_flips: %s not found in roadmap.md — skipping row flip",
|
|
1733
|
+
feature_id,
|
|
1734
|
+
)
|
|
1735
|
+
else:
|
|
1736
|
+
current_row_status = parsed["columns"].get("Status", "")
|
|
1737
|
+
status_start, status_end = parsed["cell_spans"]["Status"]
|
|
1738
|
+
if current_row_status == "done":
|
|
1739
|
+
logging.info(
|
|
1740
|
+
"fire_terminal_flips: roadmap row for %s already done — skipping",
|
|
1741
|
+
feature_id,
|
|
1742
|
+
)
|
|
1743
|
+
elif current_row_status == "active":
|
|
1744
|
+
status_cell = roadmap_text[status_start:status_end]
|
|
1745
|
+
new_roadmap = (
|
|
1746
|
+
roadmap_text[:status_start]
|
|
1747
|
+
+ status_cell.replace("active", "done", 1)
|
|
1748
|
+
+ roadmap_text[status_end:]
|
|
1749
|
+
)
|
|
1750
|
+
roadmap_path.write_text(new_roadmap)
|
|
1751
|
+
modified.add(roadmap_path)
|
|
1752
|
+
else:
|
|
1753
|
+
logging.warning(
|
|
1754
|
+
"fire_terminal_flips: roadmap row for %s has status %r"
|
|
1755
|
+
" (not active or done) — skipping row flip",
|
|
1756
|
+
feature_id,
|
|
1757
|
+
current_row_status,
|
|
1758
|
+
)
|
|
1759
|
+
|
|
1760
|
+
# PLAN.md status -> done (FEAT-2026-0023/T01, closes #49). Consolidate the
|
|
1761
|
+
# terminal PLAN flip into this one driver-side owner so BOTH the dispatched-
|
|
1762
|
+
# close path (loop.run's close branch) and the auto-close path
|
|
1763
|
+
# (_fire_and_verify_terminal_flips) get it for free — previously only the
|
|
1764
|
+
# dispatched path's *agent* flipped PLAN.md, so the agent-less auto-close
|
|
1765
|
+
# path left it `active`. Idempotent: a no-op when already `done`. Gated on
|
|
1766
|
+
# verdict_permits_terminal_flips so a hedged/non-met close does NOT flip PLAN
|
|
1767
|
+
# to done. Verdict is re-read from disk (not wu.verdict) to mirror
|
|
1768
|
+
# assert_terminal_flips_fired: the auto-close path writes verdict=met to the
|
|
1769
|
+
# WU file via mark_close_wu_auto_closed but leaves the in-memory wu.verdict
|
|
1770
|
+
# None, so disk is the authoritative source for both paths.
|
|
1771
|
+
# Re-read verdict from disk only when the WU file exists. The legacy 4-WU
|
|
1772
|
+
# close sequence reaches here with a plan-next WU that carries no verdict
|
|
1773
|
+
# field (and whose file may be a synthetic stub in tests); a missing file or
|
|
1774
|
+
# a non-met verdict simply skips the PLAN flip, leaving legacy behavior
|
|
1775
|
+
# unchanged (those features flip PLAN via the plan-next agent, as before).
|
|
1776
|
+
disk_verdict = None
|
|
1777
|
+
if wu.file.is_file():
|
|
1778
|
+
wu_fm, _ = read_frontmatter(wu.file)
|
|
1779
|
+
disk_verdict = wu_fm.get("verdict") or None
|
|
1780
|
+
if not verdict_permits_terminal_flips(disk_verdict):
|
|
1781
|
+
logging.info(
|
|
1782
|
+
"fire_terminal_flips: verdict %r does not permit terminal flips"
|
|
1783
|
+
" — skipping PLAN.md flip for %s",
|
|
1784
|
+
disk_verdict,
|
|
1785
|
+
wu.wu_id,
|
|
1786
|
+
)
|
|
1787
|
+
else:
|
|
1788
|
+
plan_path = feature_dir / "PLAN.md"
|
|
1789
|
+
if not plan_path.exists():
|
|
1790
|
+
logging.warning(
|
|
1791
|
+
"fire_terminal_flips: PLAN.md absent at %s — skipping PLAN flip",
|
|
1792
|
+
plan_path,
|
|
1793
|
+
)
|
|
1794
|
+
else:
|
|
1795
|
+
plan_fm, _ = read_frontmatter(plan_path)
|
|
1796
|
+
current_plan_status = plan_fm.get("status", "")
|
|
1797
|
+
if current_plan_status == "done":
|
|
1798
|
+
logging.info(
|
|
1799
|
+
"fire_terminal_flips: PLAN.md for %s already done — skipping",
|
|
1800
|
+
feature_id,
|
|
1801
|
+
)
|
|
1802
|
+
else:
|
|
1803
|
+
write_frontmatter_field(plan_path, "status", "done")
|
|
1804
|
+
modified.add(plan_path)
|
|
1805
|
+
|
|
1806
|
+
archive_result = auto_archive_feature(feature_id, repo_root)
|
|
1807
|
+
if archive_result == "archived":
|
|
1808
|
+
modified.add(roadmap_path)
|
|
1809
|
+
modified.add(repo_root / ".specfuse" / "roadmap-archive.md")
|
|
1810
|
+
elif archive_result == "already archived":
|
|
1811
|
+
logging.info(
|
|
1812
|
+
"fire_terminal_flips: %s already archived — skipping auto-archive",
|
|
1813
|
+
feature_id,
|
|
1814
|
+
)
|
|
1815
|
+
else:
|
|
1816
|
+
logging.warning(
|
|
1817
|
+
"fire_terminal_flips: auto_archive_feature: %s — run /roadmap-archive manually",
|
|
1818
|
+
archive_result,
|
|
1819
|
+
)
|
|
1820
|
+
|
|
1821
|
+
return list(modified)
|
|
1822
|
+
|
|
1823
|
+
|
|
1824
|
+
# --------------------------------------------------------------------------- #
|
|
1825
|
+
# Terminal auto-close helpers (FEAT-2026-0018/T04) #
|
|
1826
|
+
# --------------------------------------------------------------------------- #
|
|
1827
|
+
|
|
1828
|
+
|
|
1829
|
+
def _already_auto_closed(wu_file: Path) -> bool:
|
|
1830
|
+
"""Return True iff the WU's on-disk frontmatter already shows it has been
|
|
1831
|
+
auto-closed (status=done AND auto_close truthy).
|
|
1832
|
+
|
|
1833
|
+
Idempotency guard for both maybe_auto_close_intermediate and
|
|
1834
|
+
maybe_auto_close_terminal — prevents the duplicate `auto_close_decision`
|
|
1835
|
+
event and duplicate bookkeeping commit observed in issue #23 when the
|
|
1836
|
+
dispatch loop re-enters with a stale in-memory wu.status.
|
|
1837
|
+
"""
|
|
1838
|
+
if not wu_file.is_file():
|
|
1839
|
+
return False
|
|
1840
|
+
fm, _ = read_frontmatter(wu_file)
|
|
1841
|
+
if fm.get("status") != DONE:
|
|
1842
|
+
return False
|
|
1843
|
+
auto = fm.get("auto_close")
|
|
1844
|
+
return auto in (True, "true", "True")
|
|
1845
|
+
|
|
1846
|
+
|
|
1847
|
+
def write_stub_retrospective_terminal(
|
|
1848
|
+
feature_dir: Path,
|
|
1849
|
+
gate_number: int,
|
|
1850
|
+
decision: AutoCloseDecision,
|
|
1851
|
+
) -> None:
|
|
1852
|
+
"""Write (or append) the auto-close stub section to RETROSPECTIVE.md.
|
|
1853
|
+
|
|
1854
|
+
Satisfies both assert_retrospective_exists (non-empty file) and
|
|
1855
|
+
assert_retrospective_gate_section (^#{1,3} Gate N heading).
|
|
1856
|
+
"""
|
|
1857
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
1858
|
+
metrics = decision.metrics
|
|
1859
|
+
budget = metrics.get("gate_budget")
|
|
1860
|
+
budget_str = f"${budget:.2f}" if budget is not None else "<unset>"
|
|
1861
|
+
total_cost = metrics.get("gate_total_cost", 0.0)
|
|
1862
|
+
section = (
|
|
1863
|
+
f"## Gate {gate_number} — auto-closed (predicate=v1)\n\n"
|
|
1864
|
+
f"On-plan close; full retrospective ceremony skipped per\n"
|
|
1865
|
+
f"`evaluate_auto_close`.\n\n"
|
|
1866
|
+
f"- feature_id: {decision.feature_id}\n"
|
|
1867
|
+
f"- predicate_version: {decision.predicate_version}\n"
|
|
1868
|
+
f"- gate_total_cost: ${total_cost:.2f}\n"
|
|
1869
|
+
f"- gate_budget: {budget_str}\n"
|
|
1870
|
+
f"- reasons: [] (auto=True)\n"
|
|
1871
|
+
)
|
|
1872
|
+
if retro.exists():
|
|
1873
|
+
with retro.open("a") as fh:
|
|
1874
|
+
fh.write("\n" + section)
|
|
1875
|
+
else:
|
|
1876
|
+
retro.write_text(section)
|
|
1877
|
+
|
|
1878
|
+
|
|
1879
|
+
def mark_close_wu_auto_closed(
|
|
1880
|
+
wu: "WorkUnit | None",
|
|
1881
|
+
decision: AutoCloseDecision,
|
|
1882
|
+
) -> None:
|
|
1883
|
+
"""Flip close-WU frontmatter fields for the auto-close path.
|
|
1884
|
+
|
|
1885
|
+
Sets status=done, verdict=met (so assert_terminal_flips_fired fires),
|
|
1886
|
+
auto_close=true, auto_close_reasons=[] for downstream discoverability.
|
|
1887
|
+
No-op when wu is None (legacy gate without a close WU).
|
|
1888
|
+
"""
|
|
1889
|
+
if wu is None:
|
|
1890
|
+
return
|
|
1891
|
+
write_frontmatter_field(wu.file, "status", "done")
|
|
1892
|
+
write_frontmatter_field(wu.file, "verdict", "met")
|
|
1893
|
+
write_frontmatter_field(wu.file, "auto_close", "true")
|
|
1894
|
+
write_frontmatter_field(wu.file, "auto_close_reasons", "[]")
|
|
1895
|
+
|
|
1896
|
+
|
|
1897
|
+
def resolve_auto_close_override(
|
|
1898
|
+
args: "argparse.Namespace",
|
|
1899
|
+
feature_dir: Path,
|
|
1900
|
+
) -> tuple[bool, str]:
|
|
1901
|
+
"""Return (override_active, reason) for --force-full-close or PLAN.md field."""
|
|
1902
|
+
if getattr(args, "force_full_close", None):
|
|
1903
|
+
return (True, "force_full_close_cli_flag")
|
|
1904
|
+
plan_path = feature_dir / "PLAN.md"
|
|
1905
|
+
if plan_path.is_file():
|
|
1906
|
+
fm, _ = read_frontmatter(plan_path)
|
|
1907
|
+
if fm.get("auto_close_disabled") in (True, "true", "True"):
|
|
1908
|
+
return (True, "auto_close_disabled_per_plan")
|
|
1909
|
+
return (False, "")
|
|
1910
|
+
|
|
1911
|
+
|
|
1912
|
+
def maybe_auto_close_terminal(
|
|
1913
|
+
feature_dir: Path,
|
|
1914
|
+
feature_id: str,
|
|
1915
|
+
gate: "GateNode",
|
|
1916
|
+
gates: "list[GateNode]",
|
|
1917
|
+
events_path: Path,
|
|
1918
|
+
close_wu_for_terminal: "WorkUnit | None",
|
|
1919
|
+
repo_root: Path = REPO_ROOT,
|
|
1920
|
+
) -> tuple[bool, AutoCloseDecision]:
|
|
1921
|
+
"""Evaluate the auto-close predicate for the terminal gate.
|
|
1922
|
+
|
|
1923
|
+
Returns (True, decision) when predicate fires and the auto path was taken.
|
|
1924
|
+
Returns (False, decision) when predicate refuses; caller falls through to
|
|
1925
|
+
the existing close-WU dispatch path unchanged.
|
|
1926
|
+
|
|
1927
|
+
Idempotent: a second call after the WU has already been auto-closed on
|
|
1928
|
+
disk short-circuits without re-emitting events (see
|
|
1929
|
+
`maybe_auto_close_intermediate` and issue #23 for the rationale).
|
|
1930
|
+
"""
|
|
1931
|
+
if close_wu_for_terminal is not None and _already_auto_closed(close_wu_for_terminal.file):
|
|
1932
|
+
return False, AutoCloseDecision(
|
|
1933
|
+
auto=False,
|
|
1934
|
+
reasons=["already_auto_closed"],
|
|
1935
|
+
metrics={},
|
|
1936
|
+
gate_id=gate.number,
|
|
1937
|
+
feature_id=feature_id,
|
|
1938
|
+
predicate_version="v1",
|
|
1939
|
+
)
|
|
1940
|
+
decision = evaluate_auto_close(feature_dir, gate.number)
|
|
1941
|
+
if not decision.auto:
|
|
1942
|
+
return False, decision
|
|
1943
|
+
write_stub_retrospective_terminal(feature_dir, gate.number, decision)
|
|
1944
|
+
mark_close_wu_auto_closed(close_wu_for_terminal, decision)
|
|
1945
|
+
metrics = decision.metrics
|
|
1946
|
+
flush_events(events_path, [build_event(
|
|
1947
|
+
"auto_close_decision", feature_id, {
|
|
1948
|
+
"gate": gate.number,
|
|
1949
|
+
"auto": True,
|
|
1950
|
+
"reasons": decision.reasons,
|
|
1951
|
+
"predicate_version": decision.predicate_version,
|
|
1952
|
+
"metrics": {
|
|
1953
|
+
"gate_total_cost": metrics.get("gate_total_cost", 0.0),
|
|
1954
|
+
"gate_budget": metrics.get("gate_budget"),
|
|
1955
|
+
"blocked_human_events": metrics.get("blocked_human_events", []),
|
|
1956
|
+
"replan_events": metrics.get("replan_events", []),
|
|
1957
|
+
},
|
|
1958
|
+
},
|
|
1959
|
+
)])
|
|
1960
|
+
return True, decision
|
|
1961
|
+
|
|
1962
|
+
|
|
1963
|
+
# --------------------------------------------------------------------------- #
|
|
1964
|
+
# Intermediate auto-close helpers (FEAT-2026-0018/T05) #
|
|
1965
|
+
# --------------------------------------------------------------------------- #
|
|
1966
|
+
|
|
1967
|
+
|
|
1968
|
+
def append_stub_retrospective_intermediate(
|
|
1969
|
+
feature_dir: Path,
|
|
1970
|
+
gate_number: int,
|
|
1971
|
+
decision: AutoCloseDecision,
|
|
1972
|
+
) -> None:
|
|
1973
|
+
"""APPEND a Gate N auto-close stub to RETROSPECTIVE.md; create file if absent.
|
|
1974
|
+
|
|
1975
|
+
Idempotent: skips if a '## Gate N ... auto-closed' heading already exists
|
|
1976
|
+
(re-arm guard, AC5). Satisfies assert_retrospective_gate_section.
|
|
1977
|
+
"""
|
|
1978
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
1979
|
+
if retro.exists() and re.search(
|
|
1980
|
+
rf"^##\s+Gate\s+{gate_number}\b.*auto-closed",
|
|
1981
|
+
retro.read_text(),
|
|
1982
|
+
re.MULTILINE,
|
|
1983
|
+
):
|
|
1984
|
+
return
|
|
1985
|
+
metrics = decision.metrics
|
|
1986
|
+
budget = metrics.get("gate_budget")
|
|
1987
|
+
budget_str = f"${budget:.2f}" if budget is not None else "<unset>"
|
|
1988
|
+
total_cost = metrics.get("gate_total_cost", 0.0)
|
|
1989
|
+
section = (
|
|
1990
|
+
f"## Gate {gate_number} — auto-closed (predicate=v1)\n\n"
|
|
1991
|
+
f"On-plan intermediate close; full close-intermediate ceremony\n"
|
|
1992
|
+
f"skipped per `evaluate_auto_close`. `plan-next` WU dispatched\n"
|
|
1993
|
+
f"to draft gate {gate_number + 1}.\n\n"
|
|
1994
|
+
f"- feature_id: {decision.feature_id}\n"
|
|
1995
|
+
f"- predicate_version: {decision.predicate_version}\n"
|
|
1996
|
+
f"- gate_total_cost: ${total_cost:.2f}\n"
|
|
1997
|
+
f"- gate_budget: {budget_str}\n"
|
|
1998
|
+
f"- reasons: [] (auto=True)\n"
|
|
1999
|
+
)
|
|
2000
|
+
if retro.exists():
|
|
2001
|
+
with retro.open("a") as fh:
|
|
2002
|
+
fh.write("\n" + section)
|
|
2003
|
+
else:
|
|
2004
|
+
retro.write_text(section)
|
|
2005
|
+
|
|
2006
|
+
|
|
2007
|
+
def maybe_auto_close_intermediate(
|
|
2008
|
+
feature_dir: Path,
|
|
2009
|
+
feature_id: str,
|
|
2010
|
+
gate: "GateNode",
|
|
2011
|
+
gates: "list[GateNode]",
|
|
2012
|
+
events_path: Path,
|
|
2013
|
+
repo_root: Path,
|
|
2014
|
+
close_intermediate_wu: "WorkUnit | None",
|
|
2015
|
+
plan_next_wu: "WorkUnit | None",
|
|
2016
|
+
) -> tuple[bool, AutoCloseDecision]:
|
|
2017
|
+
"""Evaluate auto-close predicate for an intermediate (non-terminal) gate.
|
|
2018
|
+
|
|
2019
|
+
Returns (True, decision) when predicate fires and the auto path was taken.
|
|
2020
|
+
Returns (False, decision) when predicate refuses; caller falls through to
|
|
2021
|
+
the existing close-intermediate dispatch unchanged.
|
|
2022
|
+
Caller is responsible for dispatching plan_next_wu afterward (AC4).
|
|
2023
|
+
Does NOT set verdict: met — close-intermediate has no terminal verdict.
|
|
2024
|
+
|
|
2025
|
+
Idempotent: a second call after the WU has already been auto-closed on
|
|
2026
|
+
disk (status=done AND auto_close=true) short-circuits with
|
|
2027
|
+
(False, decision_with_auto=False) and emits NO `auto_close_decision`
|
|
2028
|
+
event. Prevents the double-fire observed in #23 where the dispatch
|
|
2029
|
+
loop re-entered with a stale in-memory wu.status and called this
|
|
2030
|
+
helper again, appending a duplicate event + producing a duplicate
|
|
2031
|
+
bookkeeping commit.
|
|
2032
|
+
"""
|
|
2033
|
+
if close_intermediate_wu is not None and _already_auto_closed(close_intermediate_wu.file):
|
|
2034
|
+
return False, AutoCloseDecision(
|
|
2035
|
+
auto=False,
|
|
2036
|
+
reasons=["already_auto_closed"],
|
|
2037
|
+
metrics={},
|
|
2038
|
+
gate_id=gate.number,
|
|
2039
|
+
feature_id=feature_id,
|
|
2040
|
+
predicate_version="v1",
|
|
2041
|
+
)
|
|
2042
|
+
decision = evaluate_auto_close(feature_dir, gate.number)
|
|
2043
|
+
if not decision.auto:
|
|
2044
|
+
return False, decision
|
|
2045
|
+
append_stub_retrospective_intermediate(feature_dir, gate.number, decision)
|
|
2046
|
+
if close_intermediate_wu is not None:
|
|
2047
|
+
write_frontmatter_field(close_intermediate_wu.file, "status", "done")
|
|
2048
|
+
write_frontmatter_field(close_intermediate_wu.file, "auto_close", "true")
|
|
2049
|
+
write_frontmatter_field(close_intermediate_wu.file, "auto_close_reasons", "[]")
|
|
2050
|
+
flush_events(events_path, [build_event(
|
|
2051
|
+
"auto_close_decision", feature_id, {
|
|
2052
|
+
"gate": gate.number,
|
|
2053
|
+
"gate_type": "intermediate",
|
|
2054
|
+
"auto": True,
|
|
2055
|
+
"reasons": decision.reasons,
|
|
2056
|
+
"plan_next_dispatched": True,
|
|
2057
|
+
"predicate_version": decision.predicate_version,
|
|
2058
|
+
},
|
|
2059
|
+
)])
|
|
2060
|
+
return True, decision
|
|
2061
|
+
|
|
2062
|
+
|
|
2063
|
+
def _fire_and_verify_terminal_flips(
|
|
2064
|
+
close_wu: "WorkUnit",
|
|
2065
|
+
feature_dir: Path,
|
|
2066
|
+
events_path: Path,
|
|
2067
|
+
feature_id: str,
|
|
2068
|
+
) -> int:
|
|
2069
|
+
"""Fire terminal state flips and run the post-pass invariant guard.
|
|
2070
|
+
|
|
2071
|
+
Returns 0 on success, 1 when the guard fires. Called from both the
|
|
2072
|
+
auto-close path and the normal close-WU path; factored here to avoid
|
|
2073
|
+
duplicating the fire+verify block across both branches (FEAT-2026-0018/T04).
|
|
2074
|
+
"""
|
|
2075
|
+
flip_paths = fire_terminal_flips(close_wu, feature_dir, REPO_ROOT)
|
|
2076
|
+
if flip_paths:
|
|
2077
|
+
commit_bookkeeping(
|
|
2078
|
+
flip_paths,
|
|
2079
|
+
f"chore(loop): {close_wu.wu_id} terminal flips"
|
|
2080
|
+
f"\n\nFeature: {feature_id}",
|
|
2081
|
+
)
|
|
2082
|
+
head_post = git("rev-parse", "HEAD")
|
|
2083
|
+
ok, reason = verify_post_pass_invariants(close_wu, feature_dir, REPO_ROOT, head_post)
|
|
2084
|
+
if not ok:
|
|
2085
|
+
flush_events(events_path, [build_event(
|
|
2086
|
+
"human_escalation", close_wu.wu_id, {
|
|
2087
|
+
"reason": "post_pass_invariant_failed",
|
|
2088
|
+
"assertion": reason.split(":", 1)[0].strip(),
|
|
2089
|
+
"summary": reason,
|
|
2090
|
+
})])
|
|
2091
|
+
commit_bookkeeping(
|
|
2092
|
+
[events_path],
|
|
2093
|
+
f"chore(loop): {close_wu.wu_id} "
|
|
2094
|
+
f"post_pass_invariant_failed\n\nFeature: {feature_id}",
|
|
2095
|
+
)
|
|
2096
|
+
print(f"\n POST-PASS INVARIANT FAILED — {reason}")
|
|
2097
|
+
print(
|
|
2098
|
+
"Close WU passed with verdict=met but a terminal flip did "
|
|
2099
|
+
"not materialize. This is the FEAT-2026-0015/T06 "
|
|
2100
|
+
"wiring-race regression surface. Inspect events.jsonl "
|
|
2101
|
+
"and the fire_terminal_flips wiring."
|
|
2102
|
+
)
|
|
2103
|
+
return 1
|
|
2104
|
+
return 0
|
|
2105
|
+
|
|
2106
|
+
|
|
2107
|
+
# --------------------------------------------------------------------------- #
|
|
2108
|
+
# Closing-ceremony deliverable guards (FEAT-2026-0015/T07) #
|
|
2109
|
+
# --------------------------------------------------------------------------- #
|
|
2110
|
+
|
|
2111
|
+
|
|
2112
|
+
def _gate_number_from_wu_id(wu_id: str) -> int | None:
|
|
2113
|
+
"""Parse gate number from a closing WU ID like FEAT-2026-0015/G1-PLAN."""
|
|
2114
|
+
segment = wu_id.rsplit("/", 1)[-1]
|
|
2115
|
+
m = re.match(r"G(\d+)-", segment)
|
|
2116
|
+
return int(m.group(1)) if m else None
|
|
2117
|
+
|
|
2118
|
+
|
|
2119
|
+
def assert_retrospective_exists(
|
|
2120
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2121
|
+
) -> tuple[bool, str]:
|
|
2122
|
+
"""(close-a) RETROSPECTIVE.md exists and is non-empty in the feature dir."""
|
|
2123
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
2124
|
+
if not retro.exists() or not retro.read_text().strip():
|
|
2125
|
+
return (
|
|
2126
|
+
False,
|
|
2127
|
+
"assert_retrospective_exists: RETROSPECTIVE.md absent or empty in feature dir",
|
|
2128
|
+
)
|
|
2129
|
+
return True, ""
|
|
2130
|
+
|
|
2131
|
+
|
|
2132
|
+
def assert_learnings_appended_or_noop(
|
|
2133
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2134
|
+
) -> tuple[bool, str]:
|
|
2135
|
+
"""(close-b) LEARNINGS.md has ≥1 added line in this squash, or RETRO says 'nothing generalizes'."""
|
|
2136
|
+
proc = subprocess.run(
|
|
2137
|
+
["git", "diff", head_before, "HEAD", "--", ".specfuse/LEARNINGS.md"],
|
|
2138
|
+
capture_output=True, text=True,
|
|
2139
|
+
)
|
|
2140
|
+
added = any(
|
|
2141
|
+
ln.startswith("+") and not ln.startswith("+++")
|
|
2142
|
+
for ln in proc.stdout.splitlines()
|
|
2143
|
+
)
|
|
2144
|
+
if added:
|
|
2145
|
+
return True, ""
|
|
2146
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
2147
|
+
if retro.exists() and "nothing generalizes" in retro.read_text().lower():
|
|
2148
|
+
return True, ""
|
|
2149
|
+
return (
|
|
2150
|
+
False,
|
|
2151
|
+
"assert_learnings_appended_or_noop: no LEARNINGS.md additions in squash "
|
|
2152
|
+
"and no 'nothing generalizes' note in RETROSPECTIVE.md",
|
|
2153
|
+
)
|
|
2154
|
+
|
|
2155
|
+
|
|
2156
|
+
def assert_doc_or_roadmap_diff(
|
|
2157
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2158
|
+
) -> tuple[bool, str]:
|
|
2159
|
+
"""(close-c) A documentation deliverable appears in the squash diff.
|
|
2160
|
+
|
|
2161
|
+
Accepts: docs/*, .specfuse/roadmap.md, .specfuse/LEARNINGS.md, or any
|
|
2162
|
+
file named RETROSPECTIVE.md (under a feature dir). The roadmap.md case
|
|
2163
|
+
survives only for close-intermediate WUs that legitimately edit it;
|
|
2164
|
+
terminal close WUs do NOT touch roadmap.md (FEAT-2026-0015/T06
|
|
2165
|
+
consolidated that driver-side) — they deliver RETROSPECTIVE.md and
|
|
2166
|
+
LEARNINGS.md instead.
|
|
2167
|
+
"""
|
|
2168
|
+
proc = subprocess.run(
|
|
2169
|
+
["git", "diff", "--name-only", head_before, "HEAD"],
|
|
2170
|
+
capture_output=True, text=True,
|
|
2171
|
+
)
|
|
2172
|
+
for path in proc.stdout.splitlines():
|
|
2173
|
+
if path == ".specfuse/roadmap.md" or path.startswith("docs/"):
|
|
2174
|
+
return True, ""
|
|
2175
|
+
if path == ".specfuse/LEARNINGS.md":
|
|
2176
|
+
return True, ""
|
|
2177
|
+
if path.endswith("/RETROSPECTIVE.md") or path == "RETROSPECTIVE.md":
|
|
2178
|
+
return True, ""
|
|
2179
|
+
# For close-intermediate: skip when the WU spec declares no doc surface.
|
|
2180
|
+
if wu.type == "close-intermediate":
|
|
2181
|
+
if "docs/" not in wu.body and "roadmap.md" not in wu.body:
|
|
2182
|
+
return True, ""
|
|
2183
|
+
return (
|
|
2184
|
+
False,
|
|
2185
|
+
"assert_doc_or_roadmap_diff: no docs/, .specfuse/roadmap.md, "
|
|
2186
|
+
".specfuse/LEARNINGS.md, or RETROSPECTIVE.md file in squash diff",
|
|
2187
|
+
)
|
|
2188
|
+
|
|
2189
|
+
|
|
2190
|
+
def assert_verdict_well_formed(
|
|
2191
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2192
|
+
) -> tuple[bool, str]:
|
|
2193
|
+
"""(close-d) verdict frontmatter field is present and in VERDICT_VALUES.
|
|
2194
|
+
|
|
2195
|
+
Re-reads frontmatter from disk: the agent writes `verdict:` DURING
|
|
2196
|
+
dispatch, but `wu.verdict` was populated by `load_wu` BEFORE dispatch.
|
|
2197
|
+
Without the re-read, the agent's verdict write is invisible and the
|
|
2198
|
+
assertion spins to MAX_ATTEMPTS, rolling back all artifacts on each
|
|
2199
|
+
attempt (issue #12). Mirrors the re-read at the terminal-flip path
|
|
2200
|
+
(FEAT-2026-0015/G2-CLOSE). Updates wu.verdict in-memory so downstream
|
|
2201
|
+
checks see the post-squash value.
|
|
2202
|
+
"""
|
|
2203
|
+
fm, _ = read_frontmatter(wu.file)
|
|
2204
|
+
verdict = fm.get("verdict")
|
|
2205
|
+
if verdict is None or verdict not in VERDICT_VALUES:
|
|
2206
|
+
return (
|
|
2207
|
+
False,
|
|
2208
|
+
f"assert_verdict_well_formed: verdict {verdict!r} absent or not in "
|
|
2209
|
+
f"VERDICT_VALUES ({sorted(VERDICT_VALUES)})",
|
|
2210
|
+
)
|
|
2211
|
+
wu.verdict = verdict
|
|
2212
|
+
return True, ""
|
|
2213
|
+
|
|
2214
|
+
|
|
2215
|
+
def assert_cost_analysis_section_when_met(
|
|
2216
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2217
|
+
) -> tuple[bool, str]:
|
|
2218
|
+
"""(close-e) When verdict=='met', RETROSPECTIVE.md must have a '## Cost analysis' header.
|
|
2219
|
+
|
|
2220
|
+
Re-reads frontmatter (same reasoning as `assert_verdict_well_formed`):
|
|
2221
|
+
the agent writes `verdict:` during dispatch and `wu.verdict` from
|
|
2222
|
+
`load_wu` is stale. Independent re-read keeps this assertion robust
|
|
2223
|
+
even if invoked outside the canonical close-d → close-e ordering.
|
|
2224
|
+
"""
|
|
2225
|
+
fm, _ = read_frontmatter(wu.file)
|
|
2226
|
+
verdict = fm.get("verdict")
|
|
2227
|
+
if verdict != "met":
|
|
2228
|
+
return True, ""
|
|
2229
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
2230
|
+
if retro.exists():
|
|
2231
|
+
if re.search(r"^##+ Cost analysis", retro.read_text(), re.MULTILINE | re.IGNORECASE):
|
|
2232
|
+
return True, ""
|
|
2233
|
+
return (
|
|
2234
|
+
False,
|
|
2235
|
+
"assert_cost_analysis_section_when_met: verdict=met but '## Cost analysis' "
|
|
2236
|
+
"section absent from RETROSPECTIVE.md",
|
|
2237
|
+
)
|
|
2238
|
+
|
|
2239
|
+
|
|
2240
|
+
_NO_FAILURES_SENTINEL = "### Failure-class breakdown\n\n(no non-passing attempts in scope)\n"
|
|
2241
|
+
|
|
2242
|
+
|
|
2243
|
+
def summarize_attempt_failure_classes(
|
|
2244
|
+
feature_dir: Path,
|
|
2245
|
+
gate_n: int | None = None,
|
|
2246
|
+
) -> str:
|
|
2247
|
+
"""Render a '### Failure-class breakdown' markdown table from events.jsonl.
|
|
2248
|
+
|
|
2249
|
+
Reads attempt_outcome events whose outcome != 'passed'. When gate_n is
|
|
2250
|
+
provided, restricts to events whose correlation_id belongs to that gate
|
|
2251
|
+
(resolved via _gate_number_from_wu_id). Returns _NO_FAILURES_SENTINEL when
|
|
2252
|
+
no non-passing attempts match the filter.
|
|
2253
|
+
|
|
2254
|
+
Pure function — reads events.jsonl; no writes, no side effects.
|
|
2255
|
+
Malformed JSONL lines are skipped (legacy-event tolerance, AC5).
|
|
2256
|
+
"""
|
|
2257
|
+
events_path = feature_dir / "events.jsonl"
|
|
2258
|
+
if not events_path.exists():
|
|
2259
|
+
return _NO_FAILURES_SENTINEL
|
|
2260
|
+
|
|
2261
|
+
non_passing: list[dict] = []
|
|
2262
|
+
for raw in events_path.read_text(encoding="utf-8").splitlines():
|
|
2263
|
+
raw = raw.strip()
|
|
2264
|
+
if not raw:
|
|
2265
|
+
continue
|
|
2266
|
+
try:
|
|
2267
|
+
evt = json.loads(raw)
|
|
2268
|
+
except json.JSONDecodeError:
|
|
2269
|
+
continue
|
|
2270
|
+
if evt.get("event_type") != "attempt_outcome":
|
|
2271
|
+
continue
|
|
2272
|
+
payload = evt.get("payload") or {}
|
|
2273
|
+
if payload.get("outcome") == "passed":
|
|
2274
|
+
continue
|
|
2275
|
+
if gate_n is not None:
|
|
2276
|
+
cid = evt.get("correlation_id", "")
|
|
2277
|
+
if _gate_number_from_wu_id(cid) != gate_n:
|
|
2278
|
+
continue
|
|
2279
|
+
non_passing.append(payload)
|
|
2280
|
+
|
|
2281
|
+
if not non_passing:
|
|
2282
|
+
return _NO_FAILURES_SENTINEL
|
|
2283
|
+
|
|
2284
|
+
# Group by failure_class; collect signatures for dominant-sig resolution.
|
|
2285
|
+
class_counts: dict[str, int] = {}
|
|
2286
|
+
class_signatures: dict[str, list[str]] = {}
|
|
2287
|
+
for p in non_passing:
|
|
2288
|
+
fc = str(p.get("failure_class") or "null")
|
|
2289
|
+
sig = str(p.get("failure_signature") or "")
|
|
2290
|
+
class_counts[fc] = class_counts.get(fc, 0) + 1
|
|
2291
|
+
class_signatures.setdefault(fc, []).append(sig)
|
|
2292
|
+
|
|
2293
|
+
def _dominant(sigs: list[str]) -> str:
|
|
2294
|
+
freq: dict[str, int] = {}
|
|
2295
|
+
for s in sigs:
|
|
2296
|
+
freq[s] = freq.get(s, 0) + 1
|
|
2297
|
+
return max(freq, key=lambda k: (freq[k], -sigs.index(k)))
|
|
2298
|
+
|
|
2299
|
+
# Sort: count descending, class ascending for ties.
|
|
2300
|
+
rows = sorted(
|
|
2301
|
+
class_counts.items(),
|
|
2302
|
+
key=lambda item: (-item[1], item[0]),
|
|
2303
|
+
)
|
|
2304
|
+
|
|
2305
|
+
lines = [
|
|
2306
|
+
"### Failure-class breakdown",
|
|
2307
|
+
"",
|
|
2308
|
+
"| failure_class | non-passed attempts | dominant signature |",
|
|
2309
|
+
"|---------------|---------------------|--------------------|",
|
|
2310
|
+
]
|
|
2311
|
+
total = 0
|
|
2312
|
+
for fc, count in rows:
|
|
2313
|
+
dom = _dominant(class_signatures[fc])
|
|
2314
|
+
lines.append(f"| {fc} | {count} | {dom} |")
|
|
2315
|
+
total += count
|
|
2316
|
+
lines.append(f"| **total** | **{total}** | — |")
|
|
2317
|
+
lines.append("")
|
|
2318
|
+
return "\n".join(lines)
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
def assert_failure_class_breakdown_when_failures_present(
|
|
2322
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2323
|
+
) -> tuple[bool, str]:
|
|
2324
|
+
"""(close-f / close-intermediate-d) RETROSPECTIVE.md has '### Failure-class breakdown'
|
|
2325
|
+
when non-passing attempt_outcome events exist for the gate.
|
|
2326
|
+
|
|
2327
|
+
Returns (True, "") when:
|
|
2328
|
+
- RETROSPECTIVE.md is absent (assert_retrospective_exists fires first for 'close';
|
|
2329
|
+
assert_retrospective_gate_section fires first for 'close-intermediate').
|
|
2330
|
+
- No non-passing attempts exist in events.jsonl for the gate.
|
|
2331
|
+
- The heading is present.
|
|
2332
|
+
|
|
2333
|
+
Returns (False, reason) when non-passing attempts exist but the heading is absent.
|
|
2334
|
+
"""
|
|
2335
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
2336
|
+
if not retro.exists():
|
|
2337
|
+
return True, ""
|
|
2338
|
+
|
|
2339
|
+
gate_n = _gate_number_from_wu_id(wu.wu_id)
|
|
2340
|
+
summary = summarize_attempt_failure_classes(feature_dir, gate_n)
|
|
2341
|
+
|
|
2342
|
+
if summary == _NO_FAILURES_SENTINEL:
|
|
2343
|
+
return True, ""
|
|
2344
|
+
|
|
2345
|
+
if re.search(r"^#{3} Failure-class breakdown\b", retro.read_text(), re.MULTILINE):
|
|
2346
|
+
return True, ""
|
|
2347
|
+
|
|
2348
|
+
# Count non-passing attempts for the error message.
|
|
2349
|
+
events_path = feature_dir / "events.jsonl"
|
|
2350
|
+
count = 0
|
|
2351
|
+
if events_path.exists():
|
|
2352
|
+
for raw in events_path.read_text(encoding="utf-8").splitlines():
|
|
2353
|
+
raw = raw.strip()
|
|
2354
|
+
if not raw:
|
|
2355
|
+
continue
|
|
2356
|
+
try:
|
|
2357
|
+
evt = json.loads(raw)
|
|
2358
|
+
except json.JSONDecodeError:
|
|
2359
|
+
continue
|
|
2360
|
+
if evt.get("event_type") != "attempt_outcome":
|
|
2361
|
+
continue
|
|
2362
|
+
payload = evt.get("payload") or {}
|
|
2363
|
+
if payload.get("outcome") == "passed":
|
|
2364
|
+
continue
|
|
2365
|
+
if gate_n is not None:
|
|
2366
|
+
cid = evt.get("correlation_id", "")
|
|
2367
|
+
if _gate_number_from_wu_id(cid) != gate_n:
|
|
2368
|
+
continue
|
|
2369
|
+
count += 1
|
|
2370
|
+
|
|
2371
|
+
gate_label = f"gate {gate_n}" if gate_n is not None else "all gates"
|
|
2372
|
+
return (
|
|
2373
|
+
False,
|
|
2374
|
+
f"assert_failure_class_breakdown_when_failures_present: {count} "
|
|
2375
|
+
f"non-passing attempt(s) in {gate_label} but '### Failure-class breakdown' "
|
|
2376
|
+
f"subsection absent from RETROSPECTIVE.md",
|
|
2377
|
+
)
|
|
2378
|
+
|
|
2379
|
+
|
|
2380
|
+
def assert_retrospective_gate_section(
|
|
2381
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2382
|
+
) -> tuple[bool, str]:
|
|
2383
|
+
"""(close-intermediate-a) RETROSPECTIVE.md contains a '## Gate N' or '### Gate N' section."""
|
|
2384
|
+
gate_n = _gate_number_from_wu_id(wu.wu_id)
|
|
2385
|
+
if gate_n is None:
|
|
2386
|
+
return (
|
|
2387
|
+
False,
|
|
2388
|
+
"assert_retrospective_gate_section: cannot parse gate number from wu_id",
|
|
2389
|
+
)
|
|
2390
|
+
retro = feature_dir / "RETROSPECTIVE.md"
|
|
2391
|
+
if not retro.exists():
|
|
2392
|
+
return (
|
|
2393
|
+
False,
|
|
2394
|
+
"assert_retrospective_gate_section: RETROSPECTIVE.md absent in feature dir",
|
|
2395
|
+
)
|
|
2396
|
+
if re.search(rf"^#{{1,3}} Gate {gate_n}\b", retro.read_text(), re.MULTILINE):
|
|
2397
|
+
return True, ""
|
|
2398
|
+
return (
|
|
2399
|
+
False,
|
|
2400
|
+
f"assert_retrospective_gate_section: RETROSPECTIVE.md has no "
|
|
2401
|
+
f"'## Gate {gate_n}' or '### Gate {gate_n}' section",
|
|
2402
|
+
)
|
|
2403
|
+
|
|
2404
|
+
|
|
2405
|
+
def assert_gate_review_exists(
|
|
2406
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2407
|
+
) -> tuple[bool, str]:
|
|
2408
|
+
"""(plan-next-a) GATE-(N+1)-REVIEW.md exists + non-empty, or no next gate (terminal)."""
|
|
2409
|
+
gate_n = _gate_number_from_wu_id(wu.wu_id)
|
|
2410
|
+
if gate_n is None:
|
|
2411
|
+
return (
|
|
2412
|
+
False,
|
|
2413
|
+
"assert_gate_review_exists: cannot parse gate number from wu_id",
|
|
2414
|
+
)
|
|
2415
|
+
# If no next gate is defined in PLAN.md the feature is terminal: no review expected.
|
|
2416
|
+
_, gates = load_graph(feature_dir)
|
|
2417
|
+
if not any(g.number == gate_n + 1 for g in gates):
|
|
2418
|
+
return True, ""
|
|
2419
|
+
next_gate = gate_n + 1
|
|
2420
|
+
review = feature_dir / f"GATE-{next_gate:02d}-REVIEW.md"
|
|
2421
|
+
if not review.exists() or not review.read_text().strip():
|
|
2422
|
+
return (
|
|
2423
|
+
False,
|
|
2424
|
+
f"assert_gate_review_exists: GATE-{next_gate:02d}-REVIEW.md absent or empty",
|
|
2425
|
+
)
|
|
2426
|
+
return True, ""
|
|
2427
|
+
|
|
2428
|
+
|
|
2429
|
+
def assert_next_gate_drafted_or_terminal(
|
|
2430
|
+
wu: WorkUnit, feature_dir: Path, repo_root: Path, head_before: str,
|
|
2431
|
+
) -> tuple[bool, str]:
|
|
2432
|
+
"""(plan-next-b) Next gate has ≥1 drafted WU in PLAN.md, or PLAN.md/roadmap is terminal."""
|
|
2433
|
+
plan_path = feature_dir / "PLAN.md"
|
|
2434
|
+
plan_fm, _ = read_frontmatter(plan_path)
|
|
2435
|
+
if plan_fm.get("status") == "done":
|
|
2436
|
+
return True, ""
|
|
2437
|
+
feature_id = wu.wu_id.rsplit("/", 1)[0]
|
|
2438
|
+
roadmap_path = repo_root / ".specfuse" / "roadmap.md"
|
|
2439
|
+
if roadmap_path.exists():
|
|
2440
|
+
row_re = re.compile(
|
|
2441
|
+
r"^\|\s*" + re.escape(feature_id) + r"\s*\|([^|]*)\|([^|]*)\|",
|
|
2442
|
+
re.MULTILINE,
|
|
2443
|
+
)
|
|
2444
|
+
rm = row_re.search(roadmap_path.read_text())
|
|
2445
|
+
if rm and rm.group(2).strip() == "done":
|
|
2446
|
+
return True, ""
|
|
2447
|
+
gate_n = _gate_number_from_wu_id(wu.wu_id)
|
|
2448
|
+
if gate_n is None:
|
|
2449
|
+
return (
|
|
2450
|
+
False,
|
|
2451
|
+
"assert_next_gate_drafted_or_terminal: cannot parse gate number from wu_id",
|
|
2452
|
+
)
|
|
2453
|
+
_, gates = load_graph(feature_dir)
|
|
2454
|
+
next_gates = [g for g in gates if g.number == gate_n + 1]
|
|
2455
|
+
# No gate N+1 in PLAN.md → terminal (plan-next set PLAN.md done or feature is single-gate).
|
|
2456
|
+
if not next_gates:
|
|
2457
|
+
return True, ""
|
|
2458
|
+
if next_gates[0].refs:
|
|
2459
|
+
return True, ""
|
|
2460
|
+
return (
|
|
2461
|
+
False,
|
|
2462
|
+
f"assert_next_gate_drafted_or_terminal: gate {gate_n + 1} has no drafted "
|
|
2463
|
+
f"work_units in PLAN.md and neither PLAN.md nor roadmap marks done",
|
|
2464
|
+
)
|
|
2465
|
+
|
|
2466
|
+
|
|
2467
|
+
CLOSING_ASSERTIONS_BY_TYPE: dict[str, list] = {
|
|
2468
|
+
"close": [
|
|
2469
|
+
assert_retrospective_exists,
|
|
2470
|
+
assert_learnings_appended_or_noop,
|
|
2471
|
+
assert_doc_or_roadmap_diff,
|
|
2472
|
+
assert_verdict_well_formed,
|
|
2473
|
+
assert_cost_analysis_section_when_met,
|
|
2474
|
+
assert_failure_class_breakdown_when_failures_present,
|
|
2475
|
+
],
|
|
2476
|
+
"close-intermediate": [
|
|
2477
|
+
assert_retrospective_gate_section,
|
|
2478
|
+
assert_learnings_appended_or_noop,
|
|
2479
|
+
assert_doc_or_roadmap_diff,
|
|
2480
|
+
assert_failure_class_breakdown_when_failures_present,
|
|
2481
|
+
],
|
|
2482
|
+
"plan-next": [
|
|
2483
|
+
assert_gate_review_exists,
|
|
2484
|
+
assert_next_gate_drafted_or_terminal,
|
|
2485
|
+
],
|
|
2486
|
+
}
|
|
2487
|
+
|
|
2488
|
+
|
|
2489
|
+
def assert_closing_deliverables(
|
|
2490
|
+
wu: WorkUnit,
|
|
2491
|
+
feature_dir: Path,
|
|
2492
|
+
repo_root: Path,
|
|
2493
|
+
head_before: str,
|
|
2494
|
+
) -> tuple[bool, str]:
|
|
2495
|
+
"""Fire the type-keyed closing deliverable guard (FEAT-2026-0015/T07).
|
|
2496
|
+
|
|
2497
|
+
Returns (True, "") if the WU type has no assertions (implementation type —
|
|
2498
|
+
other guards handle it) or all assertions pass. On the first failure returns
|
|
2499
|
+
(False, reason) where reason names the failing assertion function.
|
|
2500
|
+
|
|
2501
|
+
No "diff is empty" bypass: a close-type WU whose squash contains only the
|
|
2502
|
+
driver's own WU-file bookkeeping is a hollow pass and MUST fail one of the
|
|
2503
|
+
typed assertions (assert_retrospective_exists fires first for ``close``).
|
|
2504
|
+
The earlier bypass introduced for test-fixture convenience also silently
|
|
2505
|
+
passed real hollow-pass close ceremonies (FEAT-2026-0017/G1-CLOSE attempt-3
|
|
2506
|
+
surface).
|
|
2507
|
+
"""
|
|
2508
|
+
assertions = CLOSING_ASSERTIONS_BY_TYPE.get(wu.type, [])
|
|
2509
|
+
if not assertions:
|
|
2510
|
+
return True, ""
|
|
2511
|
+
for fn in assertions:
|
|
2512
|
+
ok, reason = fn(wu, feature_dir, repo_root, head_before)
|
|
2513
|
+
if not ok:
|
|
2514
|
+
return False, reason
|
|
2515
|
+
return True, ""
|
|
2516
|
+
|
|
2517
|
+
|
|
2518
|
+
def assert_implementation_touched_files(
|
|
2519
|
+
wu: WorkUnit,
|
|
2520
|
+
touched: list[str],
|
|
2521
|
+
) -> tuple[bool, str]:
|
|
2522
|
+
"""Empty-files escalation for implementation WUs (FEAT-2026-0022/T03).
|
|
2523
|
+
|
|
2524
|
+
A hard, ``produces:``-independent gate on the ``files_touched`` signal
|
|
2525
|
+
every WU already produces. Returns ``(True, "")`` when ``wu.type`` is not
|
|
2526
|
+
``implementation`` (close/plan-next/etc. produce reflective artifacts
|
|
2527
|
+
gated by ``assert_closing_deliverables``), or when ``touched`` — after
|
|
2528
|
+
removing the WU's own file and any ``events.jsonl`` entry — still names a
|
|
2529
|
+
file. Otherwise returns ``(False, summary)``: an ``implementation`` WU that
|
|
2530
|
+
produced no deliverable file diff cannot be ``done``.
|
|
2531
|
+
|
|
2532
|
+
This closes the zero-deliverable hollow pass from the other side of
|
|
2533
|
+
``verify_files_changed`` (which opts out when the agent claims nothing):
|
|
2534
|
+
regardless of what the agent claimed, the squash diff must name a real
|
|
2535
|
+
deliverable. ``touched`` MUST be derived from the post-squash ``sha`` so the
|
|
2536
|
+
WU's own status flip is present — the filter strips it; without that strip
|
|
2537
|
+
the guard never fires and is a silent no-op (escalation trigger 2).
|
|
2538
|
+
"""
|
|
2539
|
+
if wu.type != "implementation":
|
|
2540
|
+
return True, ""
|
|
2541
|
+
wu_name = wu.file.name
|
|
2542
|
+
deliverables = [
|
|
2543
|
+
t for t in touched
|
|
2544
|
+
if Path(t).name not in (wu_name, "events.jsonl")
|
|
2545
|
+
]
|
|
2546
|
+
if deliverables:
|
|
2547
|
+
return True, ""
|
|
2548
|
+
return (
|
|
2549
|
+
False,
|
|
2550
|
+
f"implementation WU {wu.wu_id} produced no deliverable files: the "
|
|
2551
|
+
f"squash diff names only its own WU file and/or events.jsonl",
|
|
2552
|
+
)
|
|
2553
|
+
|
|
2554
|
+
|
|
2555
|
+
def assert_declared_deliverables(wu: WorkUnit) -> tuple[bool, str]:
|
|
2556
|
+
"""Deliverable-presence gate (FEAT-2026-0022/T02).
|
|
2557
|
+
|
|
2558
|
+
Verify every path the WU declared in ``produces:`` exists on disk and is
|
|
2559
|
+
non-empty (``test -s`` semantics: ``Path(p).exists()`` and
|
|
2560
|
+
``Path(p).stat().st_size > 0``). Returns ``(True, "")`` when ``wu.produces``
|
|
2561
|
+
is empty — the opt-out: an undeclared ``produces:`` means no gate, exactly
|
|
2562
|
+
as ``verify_files_changed``'s absence opt-out (loop.py:994) — or when every
|
|
2563
|
+
declared path exists and is non-empty. On the first offending path returns
|
|
2564
|
+
``(False, summary)`` naming that path and whether it was absent or empty.
|
|
2565
|
+
|
|
2566
|
+
A path that exists but is zero-length is treated as missing: an empty
|
|
2567
|
+
deliverable is a hollow deliverable. This catches the partial-bundle hollow
|
|
2568
|
+
pass (FEAT-2026-0020/T12: SECURITY.md present, bundled CODE_OF_CONDUCT.md
|
|
2569
|
+
absent). The check is file-level only; symbol-level checks are out of scope
|
|
2570
|
+
(PLAN Scope OUT).
|
|
2571
|
+
"""
|
|
2572
|
+
if not wu.produces:
|
|
2573
|
+
return True, ""
|
|
2574
|
+
for raw in wu.produces:
|
|
2575
|
+
path = str(raw)
|
|
2576
|
+
p = Path(path)
|
|
2577
|
+
if not p.exists():
|
|
2578
|
+
return False, f"declared deliverable absent: {path}"
|
|
2579
|
+
if p.stat().st_size == 0:
|
|
2580
|
+
return False, f"declared deliverable empty: {path}"
|
|
2581
|
+
return True, ""
|
|
2582
|
+
|
|
2583
|
+
|
|
2584
|
+
# --------------------------------------------------------------------------- #
|
|
2585
|
+
# Post-pass driver-state invariants (FEAT-2026-0017/T01) #
|
|
2586
|
+
# --------------------------------------------------------------------------- #
|
|
2587
|
+
|
|
2588
|
+
|
|
2589
|
+
def assert_terminal_flips_fired(
|
|
2590
|
+
wu: WorkUnit,
|
|
2591
|
+
feature_dir: Path,
|
|
2592
|
+
repo_root: Path,
|
|
2593
|
+
head_before: str,
|
|
2594
|
+
) -> tuple[bool, str]:
|
|
2595
|
+
"""Post-pass invariant: when a close WU writes verdict=met, the terminal
|
|
2596
|
+
state-flips must have materialized.
|
|
2597
|
+
|
|
2598
|
+
Checks (in order):
|
|
2599
|
+
- WU frontmatter verdict (re-read from disk); skip if not "met"
|
|
2600
|
+
- Terminal gate file's `status: passed`
|
|
2601
|
+
- Roadmap row Status column == `done`
|
|
2602
|
+
- Roadmap-archive anchor `<a id="<feat_lc>"></a>` present
|
|
2603
|
+
|
|
2604
|
+
head_before is accepted to mirror the assertion-function signature shape;
|
|
2605
|
+
this check is pure file-state and does not need it.
|
|
2606
|
+
"""
|
|
2607
|
+
fm, _ = read_frontmatter(wu.file)
|
|
2608
|
+
verdict = fm.get("verdict") or None
|
|
2609
|
+
if verdict != "met":
|
|
2610
|
+
return True, ""
|
|
2611
|
+
|
|
2612
|
+
feature_id = wu.wu_id.rsplit("/", 1)[0]
|
|
2613
|
+
|
|
2614
|
+
_, gates = load_graph(feature_dir)
|
|
2615
|
+
if not gates:
|
|
2616
|
+
return False, "terminal_gate_not_passed: PLAN.md has no gates"
|
|
2617
|
+
terminal_gate = gates[-1]
|
|
2618
|
+
gate_path = terminal_gate.file
|
|
2619
|
+
if not gate_path.exists():
|
|
2620
|
+
return (
|
|
2621
|
+
False,
|
|
2622
|
+
f"terminal_gate_not_passed: {gate_path.name} absent",
|
|
2623
|
+
)
|
|
2624
|
+
gate_fm, _ = read_frontmatter(gate_path)
|
|
2625
|
+
gate_status = gate_fm.get("status", "")
|
|
2626
|
+
if gate_status != "passed":
|
|
2627
|
+
return (
|
|
2628
|
+
False,
|
|
2629
|
+
f"terminal_gate_not_passed: {gate_path.name} status={gate_status!r}",
|
|
2630
|
+
)
|
|
2631
|
+
|
|
2632
|
+
roadmap_path = repo_root / ".specfuse" / "roadmap.md"
|
|
2633
|
+
if not roadmap_path.exists():
|
|
2634
|
+
return (
|
|
2635
|
+
False,
|
|
2636
|
+
f"roadmap_row_not_done: roadmap.md absent at {roadmap_path}",
|
|
2637
|
+
)
|
|
2638
|
+
row_re = re.compile(
|
|
2639
|
+
r"^\|\s*" + re.escape(feature_id) + r"\s*\|([^|]*)\|([^|]*)\|",
|
|
2640
|
+
re.MULTILINE,
|
|
2641
|
+
)
|
|
2642
|
+
rm = row_re.search(roadmap_path.read_text())
|
|
2643
|
+
if not rm:
|
|
2644
|
+
return (
|
|
2645
|
+
False,
|
|
2646
|
+
f"roadmap_row_not_done: row for {feature_id} not found",
|
|
2647
|
+
)
|
|
2648
|
+
row_status = rm.group(2).strip()
|
|
2649
|
+
if row_status != "done":
|
|
2650
|
+
return False, f"roadmap_row_not_done: status={row_status!r}"
|
|
2651
|
+
|
|
2652
|
+
archive_path = repo_root / ".specfuse" / "roadmap-archive.md"
|
|
2653
|
+
feat_id_lower = feature_id.lower()
|
|
2654
|
+
anchor = f'<a id="{feat_id_lower}"></a>'
|
|
2655
|
+
if not archive_path.exists():
|
|
2656
|
+
return (
|
|
2657
|
+
False,
|
|
2658
|
+
f"archive_anchor_missing: {feat_id_lower} (roadmap-archive.md absent)",
|
|
2659
|
+
)
|
|
2660
|
+
if anchor not in archive_path.read_text():
|
|
2661
|
+
return False, f"archive_anchor_missing: {feat_id_lower}"
|
|
2662
|
+
return True, ""
|
|
2663
|
+
|
|
2664
|
+
|
|
2665
|
+
POST_PASS_INVARIANTS_BY_TYPE: dict[str, list] = {
|
|
2666
|
+
"close": [assert_terminal_flips_fired],
|
|
2667
|
+
}
|
|
2668
|
+
|
|
2669
|
+
|
|
2670
|
+
def verify_post_pass_invariants(
|
|
2671
|
+
wu: WorkUnit,
|
|
2672
|
+
feature_dir: Path,
|
|
2673
|
+
repo_root: Path,
|
|
2674
|
+
head_before: str,
|
|
2675
|
+
) -> tuple[bool, str]:
|
|
2676
|
+
"""Dispatch the type-keyed post-pass invariant guard (FEAT-2026-0017/T01).
|
|
2677
|
+
|
|
2678
|
+
Returns (True, "") when the WU type has no invariants or all pass. On the
|
|
2679
|
+
first failure returns (False, reason).
|
|
2680
|
+
|
|
2681
|
+
Distinct from `assert_closing_deliverables`: that guard fires immediately
|
|
2682
|
+
after squash and checks the WU's own ceremony deliverables (retrospective,
|
|
2683
|
+
learnings, etc.). This guard fires after the gate-boundary
|
|
2684
|
+
`fire_terminal_flips` invocation and checks that driver-side state
|
|
2685
|
+
transitions actually materialized — independent of the agent's RESULT.
|
|
2686
|
+
|
|
2687
|
+
Defends against the FEAT-2026-0015/T06 wiring-race surface: a close WU
|
|
2688
|
+
passed cleanly with `verdict: met` but `fire_terminal_flips` was never
|
|
2689
|
+
invoked because the in-memory `wu.verdict` snapshot (loaded BEFORE
|
|
2690
|
+
dispatch by `load_wu`) shadowed the agent's just-written frontmatter
|
|
2691
|
+
value. The re-read fix landed in PR #11 (commit 7f403bf); this guard is
|
|
2692
|
+
the canary against re-introducing that or any equivalent close-path race.
|
|
2693
|
+
"""
|
|
2694
|
+
invariants = POST_PASS_INVARIANTS_BY_TYPE.get(wu.type, [])
|
|
2695
|
+
if not invariants:
|
|
2696
|
+
return True, ""
|
|
2697
|
+
for fn in invariants:
|
|
2698
|
+
ok, reason = fn(wu, feature_dir, repo_root, head_before)
|
|
2699
|
+
if not ok:
|
|
2700
|
+
return False, reason
|
|
2701
|
+
return True, ""
|
|
2702
|
+
|
|
2703
|
+
|
|
2704
|
+
# --------------------------------------------------------------------------- #
|
|
2705
|
+
# The loop #
|
|
2706
|
+
# --------------------------------------------------------------------------- #
|
|
2707
|
+
|
|
2708
|
+
|
|
2709
|
+
def ready(units: list[WorkUnit], done_ids: set[str]) -> list[WorkUnit]:
|
|
2710
|
+
return [u for u in units
|
|
2711
|
+
if u.status in DISPATCHABLE and all(d in done_ids for d in u.depends_on)]
|
|
2712
|
+
|
|
2713
|
+
|
|
2714
|
+
def run(
|
|
2715
|
+
feature_arg: str | None,
|
|
2716
|
+
dry_run: bool,
|
|
2717
|
+
force_full_close: str | None = None,
|
|
2718
|
+
) -> int:
|
|
2719
|
+
# Fail-fast on a malformed verification.yml BEFORE we touch any WU state.
|
|
2720
|
+
# The per-gate `verify()` call lazy-loads the same file; if it's malformed,
|
|
2721
|
+
# the crash lands mid-WU with `status: in_progress` already on disk,
|
|
2722
|
+
# corrupting the recovery surface (see specfuse/loop#35). Validating once
|
|
2723
|
+
# here collapses that into "bad config, no work started."
|
|
2724
|
+
try:
|
|
2725
|
+
load_verification()
|
|
2726
|
+
except _miniyaml.MiniYAMLError as exc:
|
|
2727
|
+
print(
|
|
2728
|
+
f"loop.py: .specfuse/verification.yml is malformed — {exc}",
|
|
2729
|
+
file=sys.stderr,
|
|
2730
|
+
)
|
|
2731
|
+
print(
|
|
2732
|
+
"Fix the file and re-run. No WUs were touched.",
|
|
2733
|
+
file=sys.stderr,
|
|
2734
|
+
)
|
|
2735
|
+
return 1
|
|
2736
|
+
feature_dir = find_feature(feature_arg)
|
|
2737
|
+
feat_fm, gates = load_graph(feature_dir)
|
|
2738
|
+
feature_id = feat_fm.get("feature_id", feature_dir.name)
|
|
2739
|
+
if force_full_close is not None and force_full_close != feature_id:
|
|
2740
|
+
sys.exit(
|
|
2741
|
+
f"loop.py: --force-full-close {force_full_close} does not match "
|
|
2742
|
+
f"feature being processed {feature_id}"
|
|
2743
|
+
)
|
|
2744
|
+
_override_ns = argparse.Namespace(force_full_close=force_full_close)
|
|
2745
|
+
_override_active, _override_reason = resolve_auto_close_override(_override_ns, feature_dir)
|
|
2746
|
+
events_path = feature_dir / "events.jsonl"
|
|
2747
|
+
work_dir = feature_dir / "work"
|
|
2748
|
+
backend = make_backend(feat_fm)
|
|
2749
|
+
backend.on_feature_start(feature_id, feat_fm)
|
|
2750
|
+
|
|
2751
|
+
gate = next((g for g in gates if g.status != "passed"), None)
|
|
2752
|
+
if gate is None:
|
|
2753
|
+
print(f"{feature_id}: all gates passed — feature complete.")
|
|
2754
|
+
backend.on_feature_complete(feature_id)
|
|
2755
|
+
write_frontmatter_field(feature_dir / "PLAN.md", "status", "complete")
|
|
2756
|
+
return 0
|
|
2757
|
+
|
|
2758
|
+
lock_fd = None
|
|
2759
|
+
if not dry_run:
|
|
2760
|
+
# dry-run performs no mutation; inspecting while a real run is active must stay allowed.
|
|
2761
|
+
try:
|
|
2762
|
+
lock_fd = acquire_tree_lock(SPECFUSE_DIR)
|
|
2763
|
+
except BlockingIOError:
|
|
2764
|
+
print(
|
|
2765
|
+
"another loop driver is already running in this working tree "
|
|
2766
|
+
"(.specfuse/.loop.lock held)",
|
|
2767
|
+
file=sys.stderr,
|
|
2768
|
+
)
|
|
2769
|
+
return 1
|
|
2770
|
+
require_git_ready()
|
|
2771
|
+
ensure_feature_branch(feat_fm, feature_dir)
|
|
2772
|
+
|
|
2773
|
+
try:
|
|
2774
|
+
|
|
2775
|
+
# Per-project cost-tracking toggle (top-level key in verification.yml,
|
|
2776
|
+
# default True). When True, the driver records cumulative cost / tokens
|
|
2777
|
+
# on each WU's frontmatter at outcome time and a per-attempt breakdown
|
|
2778
|
+
# in events.jsonl; when False the driver passes plain text mode to
|
|
2779
|
+
# `claude -p` and writes no cost fields.
|
|
2780
|
+
cfg = load_verification()
|
|
2781
|
+
cost_tracking = cfg.get("cost_tracking", True) is not False
|
|
2782
|
+
|
|
2783
|
+
units = [load_wu(feature_dir, ref) for ref in gate.refs]
|
|
2784
|
+
print(f"== {feature_id} — Gate {gate.number} [{gate.status}] "
|
|
2785
|
+
f"({len(units)} work units) ==")
|
|
2786
|
+
|
|
2787
|
+
# Arm check: a gate plan-next drafted starts with draft WUs. Don't execute drafts.
|
|
2788
|
+
drafts = [u for u in units if u.status == "draft"]
|
|
2789
|
+
if drafts and not dry_run:
|
|
2790
|
+
review = feature_dir / f"GATE-{gate.number:02d}-REVIEW.md"
|
|
2791
|
+
print(f"\nGate {gate.number} is drafted but not armed. {len(drafts)} work "
|
|
2792
|
+
f"unit(s) are in `draft`.")
|
|
2793
|
+
if review.exists():
|
|
2794
|
+
print(f"Read {review} for the planner's findings, review the draft WU "
|
|
2795
|
+
f"files, then flip the ones you accept to `status: pending` and "
|
|
2796
|
+
f"re-run.")
|
|
2797
|
+
return 2
|
|
2798
|
+
|
|
2799
|
+
# Done-set must include WUs from every PREVIOUS gate that are already done —
|
|
2800
|
+
# cross-gate `depends_on` references are valid (e.g. gate 2's implementation
|
|
2801
|
+
# WU may depend on gate 1's). Without this, the ready() filter sees the
|
|
2802
|
+
# cross-gate dep as unmet and silently no-ops the gate (then set_gate
|
|
2803
|
+
# awaiting_review fires on an empty run, leaving real WUs un-dispatched).
|
|
2804
|
+
done_ids: set[str] = set()
|
|
2805
|
+
for g in gates:
|
|
2806
|
+
if g.number > gate.number:
|
|
2807
|
+
continue
|
|
2808
|
+
for ref in g.refs:
|
|
2809
|
+
wu_path = feature_dir / ref["file"]
|
|
2810
|
+
if not wu_path.is_file():
|
|
2811
|
+
continue
|
|
2812
|
+
wfm, _ = read_frontmatter(wu_path)
|
|
2813
|
+
if wfm.get("status") == DONE:
|
|
2814
|
+
done_ids.add(ref["id"])
|
|
2815
|
+
blocked = False
|
|
2816
|
+
close_wu_for_terminal: WorkUnit | None = None
|
|
2817
|
+
_terminal_auto_closed_wu: WorkUnit | None = None # FEAT-2026-0018/T11H
|
|
2818
|
+
|
|
2819
|
+
while True:
|
|
2820
|
+
pending = ready(units, done_ids)
|
|
2821
|
+
if not pending:
|
|
2822
|
+
break
|
|
2823
|
+
for wu in pending: # sequential v1; the frontier is independent -> fan-out later
|
|
2824
|
+
# Per-gate cost budget brake — halt-between-WUs.
|
|
2825
|
+
# Mirrors MAX_ATTEMPTS' shape (a brake, not an estimator). Fires
|
|
2826
|
+
# before the next WU's set_wu(in_progress) so an in-progress WU
|
|
2827
|
+
# always runs to a terminal outcome (squash contract intact).
|
|
2828
|
+
# Skipped when the gate is already awaiting_review: the closing
|
|
2829
|
+
# sequence already flipped the gate; the reviewer will observe the
|
|
2830
|
+
# overshoot via the spent vs budget numbers in the next review.
|
|
2831
|
+
if not dry_run and gate.status != "awaiting_review":
|
|
2832
|
+
gate_dict = {"file": gate.file.name, "work_units": gate.refs}
|
|
2833
|
+
if _should_halt_for_budget(feat_fm, gate_dict, feature_dir):
|
|
2834
|
+
budget = gate_budget_usd(gate.file)
|
|
2835
|
+
spent = gate_spent_usd(feat_fm, gate_dict, feature_dir)
|
|
2836
|
+
backend.set_gate(gate, "awaiting_review")
|
|
2837
|
+
flush_events(events_path, [build_event(
|
|
2838
|
+
"human_escalation", feature_id, {
|
|
2839
|
+
"reason": "gate_budget_exceeded",
|
|
2840
|
+
"budget_usd": budget,
|
|
2841
|
+
"spent_usd": round(spent, 6),
|
|
2842
|
+
"next_wu_id": wu.wu_id,
|
|
2843
|
+
})])
|
|
2844
|
+
commit_bookkeeping(
|
|
2845
|
+
[gate.file, events_path],
|
|
2846
|
+
f"chore(loop): gate {gate.number} budget exceeded "
|
|
2847
|
+
f"— awaiting_review\n\nFeature: {feature_id}",
|
|
2848
|
+
)
|
|
2849
|
+
print(f"\nGate {gate.number} budget exceeded: spent "
|
|
2850
|
+
f"${spent:.4f} >= budget ${budget:.4f}. "
|
|
2851
|
+
f"Halted before {wu.wu_id}.")
|
|
2852
|
+
return 1
|
|
2853
|
+
|
|
2854
|
+
print(f"\n[{time.strftime('%H:%M:%S')}] -- {wu.wu_id} "
|
|
2855
|
+
f"[{wu.type}] model={wu.model} effort={wu.effort}")
|
|
2856
|
+
if dry_run:
|
|
2857
|
+
print(" (dry run — would dispatch)")
|
|
2858
|
+
wu.status = DONE
|
|
2859
|
+
done_ids.add(wu.wu_id)
|
|
2860
|
+
continue
|
|
2861
|
+
|
|
2862
|
+
# FEAT-2026-0018/T05 — intermediate auto-close branch
|
|
2863
|
+
if wu.type == "close-intermediate" and not _override_active:
|
|
2864
|
+
_plan_next_wu = next(
|
|
2865
|
+
(w for w in units if w.type == "plan-next"),
|
|
2866
|
+
None,
|
|
2867
|
+
)
|
|
2868
|
+
_auto_closed, _ = maybe_auto_close_intermediate(
|
|
2869
|
+
feature_dir, feature_id, gate, gates,
|
|
2870
|
+
events_path, REPO_ROOT, wu, _plan_next_wu,
|
|
2871
|
+
)
|
|
2872
|
+
if _auto_closed:
|
|
2873
|
+
commit_bookkeeping(
|
|
2874
|
+
[feature_dir / "RETROSPECTIVE.md",
|
|
2875
|
+
wu.file, events_path],
|
|
2876
|
+
f"chore(loop): {wu.wu_id} auto-closed "
|
|
2877
|
+
f"(predicate=v1)\n\nFeature: {feature_id}",
|
|
2878
|
+
)
|
|
2879
|
+
# Mirror the on-disk status flip into the in-memory
|
|
2880
|
+
# WorkUnit so ready()'s u.status in DISPATCHABLE filter
|
|
2881
|
+
# excludes it on the next while-loop pass — without
|
|
2882
|
+
# this, the same WU re-appears in pending, the helper
|
|
2883
|
+
# is called a second time, and (absent its idempotency
|
|
2884
|
+
# guard) a duplicate auto_close_decision event +
|
|
2885
|
+
# duplicate bookkeeping commit are produced (issue #23).
|
|
2886
|
+
wu.status = DONE
|
|
2887
|
+
done_ids.add(wu.wu_id)
|
|
2888
|
+
continue
|
|
2889
|
+
elif wu.type == "close-intermediate" and _override_active:
|
|
2890
|
+
flush_events(events_path, [build_event(
|
|
2891
|
+
"auto_close_decision", wu.wu_id, {
|
|
2892
|
+
"gate": gate.number,
|
|
2893
|
+
"gate_type": "intermediate",
|
|
2894
|
+
"auto": False,
|
|
2895
|
+
"reasons": [_override_reason],
|
|
2896
|
+
"predicate_version": "v1",
|
|
2897
|
+
"override": True,
|
|
2898
|
+
}
|
|
2899
|
+
)])
|
|
2900
|
+
|
|
2901
|
+
# FEAT-2026-0018/T11H — terminal auto-close branch (relocated from post-loop)
|
|
2902
|
+
# Guard wu.verdict is None: only attempt auto-close for WUs that have
|
|
2903
|
+
# not yet been dispatched (no verdict written). WUs with a pre-existing
|
|
2904
|
+
# verdict (e.g. met_locally from a prior attempt) fall through to
|
|
2905
|
+
# normal dispatch so their verdict semantics are honoured.
|
|
2906
|
+
if (wu.type == "close" and gate is gates[-1]
|
|
2907
|
+
and not _override_active and wu.verdict is None):
|
|
2908
|
+
_auto_closed, _decision = maybe_auto_close_terminal(
|
|
2909
|
+
feature_dir, feature_id, gate, gates,
|
|
2910
|
+
events_path, wu, repo_root=REPO_ROOT,
|
|
2911
|
+
)
|
|
2912
|
+
if _auto_closed:
|
|
2913
|
+
commit_bookkeeping(
|
|
2914
|
+
[feature_dir / "RETROSPECTIVE.md",
|
|
2915
|
+
wu.file, events_path],
|
|
2916
|
+
f"chore(loop): {wu.wu_id} auto-closed "
|
|
2917
|
+
f"(predicate=v1)\n\nFeature: {feature_id}",
|
|
2918
|
+
)
|
|
2919
|
+
# Terminal flips fire in post-loop after set_gate(awaiting_review)
|
|
2920
|
+
_terminal_auto_closed_wu = wu
|
|
2921
|
+
# See intermediate branch above: mirror disk → memory
|
|
2922
|
+
# so ready() filters this WU on the next pass (issue #23).
|
|
2923
|
+
wu.status = DONE
|
|
2924
|
+
done_ids.add(wu.wu_id)
|
|
2925
|
+
continue
|
|
2926
|
+
elif (wu.type == "close" and gate is gates[-1]
|
|
2927
|
+
and _override_active and wu.verdict is None):
|
|
2928
|
+
flush_events(events_path, [build_event(
|
|
2929
|
+
"auto_close_decision", wu.wu_id, {
|
|
2930
|
+
"gate": gate.number,
|
|
2931
|
+
"auto": False,
|
|
2932
|
+
"reasons": [_override_reason],
|
|
2933
|
+
"predicate_version": "v1",
|
|
2934
|
+
"override": True,
|
|
2935
|
+
}
|
|
2936
|
+
)])
|
|
2937
|
+
# Fall through to existing close-WU dispatch path
|
|
2938
|
+
|
|
2939
|
+
head_before = git("rev-parse", "HEAD")
|
|
2940
|
+
_is_rearm = detect_rearm_dispatch(wu)
|
|
2941
|
+
if _is_rearm:
|
|
2942
|
+
fold_cumulative_on_rearm(wu, backend)
|
|
2943
|
+
backend.set_wu(wu, "status", "in_progress")
|
|
2944
|
+
# Events and per-attempt notes are buffered in memory during the
|
|
2945
|
+
# WU's lifecycle and flushed at outcome time. This prevents the
|
|
2946
|
+
# `git reset --hard` between failed attempts from silently
|
|
2947
|
+
# wiping appended events / status flips — anything that should
|
|
2948
|
+
# be durable is either committed in the squash (PASS) or in a
|
|
2949
|
+
# bookkeeping commit (BLOCKED/SPINNING).
|
|
2950
|
+
_wu_fm_rearm, _ = read_frontmatter(wu.file)
|
|
2951
|
+
re_arm_count = int(_wu_fm_rearm.get("re_arm_count") or 0)
|
|
2952
|
+
wu_events = [build_event("task_started", wu.wu_id,
|
|
2953
|
+
{"type": wu.type, "model": wu.model,
|
|
2954
|
+
"re_arm_count": re_arm_count})]
|
|
2955
|
+
if _is_rearm:
|
|
2956
|
+
_rearm_history = _wu_fm_rearm.get("re_arm_history") or []
|
|
2957
|
+
_rearm_reason = ""
|
|
2958
|
+
if isinstance(_rearm_history, list) and _rearm_history:
|
|
2959
|
+
_last_entry = _rearm_history[-1]
|
|
2960
|
+
if isinstance(_last_entry, dict):
|
|
2961
|
+
_rearm_reason = str(_last_entry.get("reason", ""))
|
|
2962
|
+
wu_events.append(build_event("re_arm_dispatched", wu.wu_id, {
|
|
2963
|
+
"re_arm_count": re_arm_count,
|
|
2964
|
+
"reason": _rearm_reason,
|
|
2965
|
+
}))
|
|
2966
|
+
if wu.unsandboxed:
|
|
2967
|
+
# Audit signal: WU opted out of the claude -p sandbox.
|
|
2968
|
+
# Event logged before first attempt so the trail exists
|
|
2969
|
+
# even if the attempt crashes. Rationale carried verbatim.
|
|
2970
|
+
wu_events.append(build_event("unsandboxed_dispatch", wu.wu_id, {
|
|
2971
|
+
"rationale": wu.unsandboxed_rationale,
|
|
2972
|
+
}))
|
|
2973
|
+
print(f" ⚠ UNSANDBOXED dispatch — rationale: "
|
|
2974
|
+
f"{wu.unsandboxed_rationale}")
|
|
2975
|
+
attempt_notes: list[tuple[int, str]] = []
|
|
2976
|
+
attempt_outcomes: list[str] = []
|
|
2977
|
+
# Cost accumulators: per-attempt list goes to events.jsonl,
|
|
2978
|
+
# cumulative sum to WU frontmatter at outcome time.
|
|
2979
|
+
attempts_usage: list[dict] = []
|
|
2980
|
+
cum_usage = {"cost_usd": 0.0, "input_tokens": 0, "output_tokens": 0,
|
|
2981
|
+
"duration_seconds": 0.0}
|
|
2982
|
+
|
|
2983
|
+
failure_note = None
|
|
2984
|
+
prior_failure_signature: tuple[str | None, str | None] | None = None
|
|
2985
|
+
for attempt in range(1, MAX_ATTEMPTS + 1):
|
|
2986
|
+
backend.set_wu(wu, "attempts", attempt)
|
|
2987
|
+
print(f" [{time.strftime('%H:%M:%S')}] attempt "
|
|
2988
|
+
f"{attempt}/{MAX_ATTEMPTS} model={wu.model} "
|
|
2989
|
+
f"effort={wu.effort} — fresh session")
|
|
2990
|
+
if attempt > 1 and failure_note:
|
|
2991
|
+
reason = failure_note.strip().splitlines()[0][:200]
|
|
2992
|
+
print(f" retry reason: {reason}")
|
|
2993
|
+
t0 = time.monotonic()
|
|
2994
|
+
outcome, payload, usage = execute_unit_attempt(
|
|
2995
|
+
wu, feature_dir, failure_note, cost_tracking=cost_tracking,
|
|
2996
|
+
head_before=head_before,
|
|
2997
|
+
)
|
|
2998
|
+
duration = round(time.monotonic() - t0, 3)
|
|
2999
|
+
attempt_record: dict = {"attempt": attempt,
|
|
3000
|
+
"duration_seconds": duration}
|
|
3001
|
+
if usage:
|
|
3002
|
+
attempt_record.update(usage)
|
|
3003
|
+
cum_usage["cost_usd"] += float(usage.get("cost_usd", 0.0))
|
|
3004
|
+
cum_usage["input_tokens"] += int(usage.get("input_tokens", 0))
|
|
3005
|
+
cum_usage["output_tokens"] += int(usage.get("output_tokens", 0))
|
|
3006
|
+
attempts_usage.append(attempt_record)
|
|
3007
|
+
cum_usage["duration_seconds"] = round(
|
|
3008
|
+
cum_usage["duration_seconds"] + duration, 3)
|
|
3009
|
+
attempt_outcomes.append(outcome)
|
|
3010
|
+
|
|
3011
|
+
if outcome == "zero_token":
|
|
3012
|
+
# Agent never produced output (input_tokens=0). Skip
|
|
3013
|
+
# RESULT parsing, buffer an event, reset the tree, and
|
|
3014
|
+
# treat as a failed attempt for the purposes of the
|
|
3015
|
+
# attempt loop — counter already incremented at top.
|
|
3016
|
+
wu_events.append(emit_attempt_outcome(
|
|
3017
|
+
wu, attempt, "zero_token_skip",
|
|
3018
|
+
attempts_usage[-1],
|
|
3019
|
+
))
|
|
3020
|
+
reset_preserving_events(head_before, events_path)
|
|
3021
|
+
print(f" ZERO-TOKEN attempt {attempt}/{MAX_ATTEMPTS} "
|
|
3022
|
+
f"— no agent output, skipping")
|
|
3023
|
+
continue
|
|
3024
|
+
|
|
3025
|
+
if outcome == "blocked":
|
|
3026
|
+
# Reset agent work first; THEN write our bookkeeping; THEN
|
|
3027
|
+
# commit it. Doing the flip before the reset would let the
|
|
3028
|
+
# reset wipe the flip — the silent-state-loss bug.
|
|
3029
|
+
# Use reset_preserving_events to keep prior WU's
|
|
3030
|
+
# flushed-but-uncommitted events.jsonl entries.
|
|
3031
|
+
reset_preserving_events(head_before, events_path)
|
|
3032
|
+
backend.set_wu(wu, "status", "blocked_human")
|
|
3033
|
+
write_cost_to_wu(backend, wu, cum_usage)
|
|
3034
|
+
wu_events.append(emit_attempt_outcome(
|
|
3035
|
+
wu, attempt, "blocked",
|
|
3036
|
+
attempts_usage[-1],
|
|
3037
|
+
files_touched=git_diff_names(head_before, "HEAD"),
|
|
3038
|
+
agent_status="blocked",
|
|
3039
|
+
agent_blocked_reason=payload,
|
|
3040
|
+
))
|
|
3041
|
+
wu_events.append(build_event("human_escalation", wu.wu_id, {
|
|
3042
|
+
"reason": "agent_reported_blocked",
|
|
3043
|
+
"blocked_reason": payload,
|
|
3044
|
+
"attempts": attempt,
|
|
3045
|
+
"attempts_usage": attempts_usage,
|
|
3046
|
+
}))
|
|
3047
|
+
flush_events(events_path, wu_events)
|
|
3048
|
+
commit_bookkeeping(
|
|
3049
|
+
[wu.file, events_path],
|
|
3050
|
+
f"chore(loop): {wu.wu_id} blocked_human "
|
|
3051
|
+
f"(agent-reported)\n\nFeature: {wu.wu_id}",
|
|
3052
|
+
)
|
|
3053
|
+
print(f" BLOCKED by agent — "
|
|
3054
|
+
f"{payload or '(no reason given)'}")
|
|
3055
|
+
blocked = True
|
|
3056
|
+
break
|
|
3057
|
+
|
|
3058
|
+
if outcome == "passed":
|
|
3059
|
+
# Flip status to DONE BEFORE the squash so the flip is
|
|
3060
|
+
# included in the commit content — survives the next WU's
|
|
3061
|
+
# reset.
|
|
3062
|
+
backend.set_wu(wu, "status", DONE)
|
|
3063
|
+
write_cost_to_wu(backend, wu, cum_usage)
|
|
3064
|
+
try:
|
|
3065
|
+
sha = squash_commit(wu, head_before)
|
|
3066
|
+
except SquashCommitError as exc:
|
|
3067
|
+
# The squash commit was rejected (typically a
|
|
3068
|
+
# pre-commit hook). Treat as a failed attempt rather
|
|
3069
|
+
# than crashing the driver (issue #51): reset the
|
|
3070
|
+
# tree — which also discards the premature DONE flip
|
|
3071
|
+
# written just above — record the failure with git's
|
|
3072
|
+
# stderr, and retry within budget (MAX_ATTEMPTS
|
|
3073
|
+
# exhaustion escalates to blocked_human).
|
|
3074
|
+
reset_preserving_events(head_before, events_path)
|
|
3075
|
+
summary = str(exc)
|
|
3076
|
+
wu_events.append(emit_attempt_outcome(
|
|
3077
|
+
wu, attempt, "squash_commit_failed",
|
|
3078
|
+
attempts_usage[-1],
|
|
3079
|
+
extras={"summary": summary},
|
|
3080
|
+
))
|
|
3081
|
+
attempt_notes.append((attempt, summary))
|
|
3082
|
+
failure_note = summary
|
|
3083
|
+
print(f" SQUASH COMMIT REJECTED attempt "
|
|
3084
|
+
f"{attempt}/{MAX_ATTEMPTS}")
|
|
3085
|
+
continue
|
|
3086
|
+
# Smoke-import runner (FEAT-2026-0008/T03): after a
|
|
3087
|
+
# successful verify() AND squash, run each
|
|
3088
|
+
# `python3 -c "from X import Y"` line declared in the WU
|
|
3089
|
+
# body. A non-zero exit fails the attempt — the squash
|
|
3090
|
+
# is rolled back via `git reset --hard head_before` so
|
|
3091
|
+
# the verify-passing-but-smoke-failing tree does not
|
|
3092
|
+
# remain in history. Rollback FIRST (before event log
|
|
3093
|
+
# and before the next attempt iterates) per WU
|
|
3094
|
+
# escalation trigger 3.
|
|
3095
|
+
smoke_cmds = extract_smoke_imports(wu.body)
|
|
3096
|
+
if smoke_cmds:
|
|
3097
|
+
smoke_ok, smoke_summary = run_smoke_imports(
|
|
3098
|
+
smoke_cmds, Path("."),
|
|
3099
|
+
)
|
|
3100
|
+
if not smoke_ok:
|
|
3101
|
+
reset_preserving_events(head_before, events_path)
|
|
3102
|
+
wu_events.append(emit_attempt_outcome(
|
|
3103
|
+
wu, attempt, "smoke_import_failed",
|
|
3104
|
+
attempts_usage[-1],
|
|
3105
|
+
extras={"summary": smoke_summary},
|
|
3106
|
+
))
|
|
3107
|
+
attempt_notes.append((attempt, smoke_summary))
|
|
3108
|
+
failure_note = smoke_summary
|
|
3109
|
+
print(f" SMOKE FAIL attempt "
|
|
3110
|
+
f"{attempt}/{MAX_ATTEMPTS}")
|
|
3111
|
+
continue
|
|
3112
|
+
# Closing deliverable guard (FEAT-2026-0015/T07):
|
|
3113
|
+
# fires after smoke, before terminal-flip bookkeeping.
|
|
3114
|
+
closing_ok, closing_summary = assert_closing_deliverables(
|
|
3115
|
+
wu, feature_dir, REPO_ROOT, head_before,
|
|
3116
|
+
)
|
|
3117
|
+
if not closing_ok:
|
|
3118
|
+
reset_preserving_events(head_before, events_path)
|
|
3119
|
+
wu_events.append(emit_attempt_outcome(
|
|
3120
|
+
wu, attempt, "closing_deliverable_missing",
|
|
3121
|
+
attempts_usage[-1],
|
|
3122
|
+
extras={
|
|
3123
|
+
"assertion": closing_summary.split(":", 1)[0].strip(),
|
|
3124
|
+
"summary": closing_summary,
|
|
3125
|
+
},
|
|
3126
|
+
))
|
|
3127
|
+
attempt_notes.append((attempt, closing_summary))
|
|
3128
|
+
failure_note = closing_summary
|
|
3129
|
+
print(
|
|
3130
|
+
f" CLOSING DELIVERABLE MISSING attempt "
|
|
3131
|
+
f"{attempt}/{MAX_ATTEMPTS} — {closing_summary}"
|
|
3132
|
+
)
|
|
3133
|
+
continue
|
|
3134
|
+
# Deliverable-presence gate (FEAT-2026-0022/T02):
|
|
3135
|
+
# fires after smoke and closing-deliverable guards,
|
|
3136
|
+
# before the empty-files catch-all so the named-path
|
|
3137
|
+
# diagnostic wins. Every path the WU declared in
|
|
3138
|
+
# `produces:` must exist on disk and be non-empty; an
|
|
3139
|
+
# absent or zero-length declared deliverable refuses the
|
|
3140
|
+
# pass (the partial-bundle hollow pass,
|
|
3141
|
+
# FEAT-2026-0020/T12). Opt-out: a WU with empty
|
|
3142
|
+
# `produces:` never fires this — existing behavior for
|
|
3143
|
+
# every current WU is unchanged.
|
|
3144
|
+
deliv_ok, deliv_summary = assert_declared_deliverables(wu)
|
|
3145
|
+
if not deliv_ok:
|
|
3146
|
+
reset_preserving_events(head_before, events_path)
|
|
3147
|
+
missing = deliv_summary.split(": ", 1)[-1]
|
|
3148
|
+
wu_events.append(emit_attempt_outcome(
|
|
3149
|
+
wu, attempt, "deliverable_missing",
|
|
3150
|
+
attempts_usage[-1],
|
|
3151
|
+
extras={"summary": deliv_summary,
|
|
3152
|
+
"missing": missing},
|
|
3153
|
+
))
|
|
3154
|
+
attempt_notes.append((attempt, deliv_summary))
|
|
3155
|
+
failure_note = deliv_summary
|
|
3156
|
+
print(
|
|
3157
|
+
f" DELIVERABLE MISSING attempt "
|
|
3158
|
+
f"{attempt}/{MAX_ATTEMPTS} — {deliv_summary}"
|
|
3159
|
+
)
|
|
3160
|
+
continue
|
|
3161
|
+
# Empty-files escalation (FEAT-2026-0022/T03): compute
|
|
3162
|
+
# the post-squash touched-paths list ONCE here and reuse
|
|
3163
|
+
# it for the passed event below. An implementation WU
|
|
3164
|
+
# whose squash names only its own WU file + events.jsonl
|
|
3165
|
+
# produced no deliverable — refuse the pass, MAX_ATTEMPTS
|
|
3166
|
+
# exhaustion escalates via existing machinery.
|
|
3167
|
+
touched = git_diff_names(head_before, sha) if sha else []
|
|
3168
|
+
impl_ok, impl_summary = assert_implementation_touched_files(
|
|
3169
|
+
wu, touched,
|
|
3170
|
+
)
|
|
3171
|
+
if not impl_ok:
|
|
3172
|
+
reset_preserving_events(head_before, events_path)
|
|
3173
|
+
wu_events.append(emit_attempt_outcome(
|
|
3174
|
+
wu, attempt, "no_deliverable_files",
|
|
3175
|
+
attempts_usage[-1],
|
|
3176
|
+
extras={"summary": impl_summary},
|
|
3177
|
+
))
|
|
3178
|
+
attempt_notes.append((attempt, impl_summary))
|
|
3179
|
+
failure_note = impl_summary
|
|
3180
|
+
print(
|
|
3181
|
+
f" NO DELIVERABLE FILES attempt "
|
|
3182
|
+
f"{attempt}/{MAX_ATTEMPTS}"
|
|
3183
|
+
)
|
|
3184
|
+
continue
|
|
3185
|
+
if wu.type == "close":
|
|
3186
|
+
# Re-read frontmatter post-squash: the agent writes
|
|
3187
|
+
# `verdict:` to the WU file DURING dispatch, but
|
|
3188
|
+
# `wu.verdict` was populated by `load_wu` BEFORE
|
|
3189
|
+
# dispatch. Without this re-read, the agent's
|
|
3190
|
+
# verdict write is invisible to the close-path
|
|
3191
|
+
# check and `fire_terminal_flips` never fires.
|
|
3192
|
+
# Surfaced FEAT-2026-0015/G2-CLOSE: verdict: met
|
|
3193
|
+
# written by agent; in-memory wu.verdict stayed
|
|
3194
|
+
# None; terminal flips skipped silently.
|
|
3195
|
+
wu_fm_post, _ = read_frontmatter(wu.file)
|
|
3196
|
+
wu.verdict = wu_fm_post.get("verdict") or None
|
|
3197
|
+
if verdict_permits_terminal_flips(wu.verdict):
|
|
3198
|
+
close_wu_for_terminal = wu
|
|
3199
|
+
else:
|
|
3200
|
+
plan_path = feature_dir / "PLAN.md"
|
|
3201
|
+
plan_fm_recheck, _ = read_frontmatter(plan_path)
|
|
3202
|
+
if plan_fm_recheck.get("status") == "done":
|
|
3203
|
+
write_frontmatter_field(plan_path, "status", "active")
|
|
3204
|
+
commit_bookkeeping(
|
|
3205
|
+
[plan_path],
|
|
3206
|
+
f"chore(loop): {wu.wu_id} revert PLAN.md done"
|
|
3207
|
+
f" (hedged verdict)\n\nFeature: {wu.wu_id}",
|
|
3208
|
+
)
|
|
3209
|
+
elif _legacy_4wu_terminal_close_complete(
|
|
3210
|
+
wu, units, gate, gates,
|
|
3211
|
+
):
|
|
3212
|
+
# Legacy 4-WU close sequence completed on terminal gate
|
|
3213
|
+
# (issue #16). The pre-FEAT-2026-0015 shape
|
|
3214
|
+
# (retrospective + lessons + docs + plan-next) has no
|
|
3215
|
+
# close-type WU and no verdict field. Treat the
|
|
3216
|
+
# plan-next pass as terminating-equivalent so the
|
|
3217
|
+
# post-loop block fires fire_terminal_flips.
|
|
3218
|
+
close_wu_for_terminal = wu
|
|
3219
|
+
# FEAT-2026-0018/T07 — plan-next-draft lint hook (warn-only v1)
|
|
3220
|
+
if wu.type == "plan-next":
|
|
3221
|
+
try:
|
|
3222
|
+
from lint_plan import lint_plan_next_draft
|
|
3223
|
+
_warns = lint_plan_next_draft(feature_dir, gate.number)
|
|
3224
|
+
except Exception as _exc:
|
|
3225
|
+
_warns = [f"lint_plan_next_draft raised: {_exc}"]
|
|
3226
|
+
for _w in _warns:
|
|
3227
|
+
print(f" WARN (plan-next-draft lint): {_w}")
|
|
3228
|
+
if _warns:
|
|
3229
|
+
wu_events.append(build_event(
|
|
3230
|
+
"plan_next_draft_lint", wu.wu_id,
|
|
3231
|
+
{"gate": gate.number, "warns": list(_warns),
|
|
3232
|
+
"blocking": False},
|
|
3233
|
+
))
|
|
3234
|
+
wu_events.append(emit_attempt_outcome(
|
|
3235
|
+
wu, attempt, "passed",
|
|
3236
|
+
attempts_usage[-1],
|
|
3237
|
+
files_touched=touched,
|
|
3238
|
+
agent_status="complete",
|
|
3239
|
+
agent_blocked_reason=None,
|
|
3240
|
+
))
|
|
3241
|
+
wu_events.append(build_event("task_completed", wu.wu_id, {
|
|
3242
|
+
"attempts": attempt,
|
|
3243
|
+
"attempts_usage": attempts_usage,
|
|
3244
|
+
}))
|
|
3245
|
+
flush_events(events_path, wu_events)
|
|
3246
|
+
done_ids.add(wu.wu_id)
|
|
3247
|
+
print(f" PASS — committed {sha}")
|
|
3248
|
+
break
|
|
3249
|
+
|
|
3250
|
+
if outcome == "files_changed_mismatch":
|
|
3251
|
+
# RESULT declared files_changed paths that show no diff
|
|
3252
|
+
# against head_before. Treat as a failed attempt: skip
|
|
3253
|
+
# squash, reset the tree, record evidence, retry within
|
|
3254
|
+
# budget. payload is the list of unchanged paths.
|
|
3255
|
+
note = (
|
|
3256
|
+
"RESULT block declared `files_changed` paths that "
|
|
3257
|
+
"show NO diff against HEAD before this attempt:\n"
|
|
3258
|
+
+ "\n".join(f" - {p}" for p in payload)
|
|
3259
|
+
+ "\nEither actually modify them, or correct the "
|
|
3260
|
+
"files_changed list to match what you really edited."
|
|
3261
|
+
)
|
|
3262
|
+
wu_events.append(emit_attempt_outcome(
|
|
3263
|
+
wu, attempt, "files_changed_mismatch",
|
|
3264
|
+
attempts_usage[-1],
|
|
3265
|
+
extras={"unchanged_paths": list(payload)},
|
|
3266
|
+
))
|
|
3267
|
+
attempt_notes.append((attempt, note))
|
|
3268
|
+
failure_note = note
|
|
3269
|
+
reset_preserving_events(head_before, events_path)
|
|
3270
|
+
print(f" FILES_CHANGED MISMATCH attempt "
|
|
3271
|
+
f"{attempt}/{MAX_ATTEMPTS} — {len(payload)} path(s) "
|
|
3272
|
+
f"unchanged")
|
|
3273
|
+
continue
|
|
3274
|
+
|
|
3275
|
+
# outcome == "failed": evidence in payload, retry within budget.
|
|
3276
|
+
# Per-attempt notes are buffered (not written to disk) so they
|
|
3277
|
+
# ride with the spinning-escalation commit if we exhaust
|
|
3278
|
+
# attempts; on eventual PASS they're discarded as scratch.
|
|
3279
|
+
attempt_notes.append((attempt, payload))
|
|
3280
|
+
failure_note = payload
|
|
3281
|
+
_fc, _fs = parse_gate_failure_signature(payload)
|
|
3282
|
+
_ex = extract_failure_excerpt(payload)
|
|
3283
|
+
wu_events.append(emit_attempt_outcome(
|
|
3284
|
+
wu, attempt, "failed",
|
|
3285
|
+
attempts_usage[-1],
|
|
3286
|
+
failure_class=_fc,
|
|
3287
|
+
failure_signature=_fs,
|
|
3288
|
+
failure_excerpt=_ex,
|
|
3289
|
+
files_touched=git_diff_names(head_before, "HEAD"),
|
|
3290
|
+
agent_status="complete",
|
|
3291
|
+
agent_blocked_reason=None,
|
|
3292
|
+
))
|
|
3293
|
+
# T04: halt early when same (class, signature) repeats.
|
|
3294
|
+
if detect_spinning_signature_repeat((_fc, _fs), prior_failure_signature):
|
|
3295
|
+
wu_events.append(build_event("human_escalation", wu.wu_id, {
|
|
3296
|
+
"reason": "spinning_signature_repeat",
|
|
3297
|
+
"failure_class": _fc,
|
|
3298
|
+
"failure_signature": _fs,
|
|
3299
|
+
"attempts": attempt,
|
|
3300
|
+
"attempts_usage": attempts_usage,
|
|
3301
|
+
}))
|
|
3302
|
+
reset_preserving_events(head_before, events_path)
|
|
3303
|
+
backend.set_wu(wu, "status", "blocked_human")
|
|
3304
|
+
write_cost_to_wu(backend, wu, cum_usage)
|
|
3305
|
+
flush_events(events_path, wu_events)
|
|
3306
|
+
commit_bookkeeping(
|
|
3307
|
+
[wu.file, events_path],
|
|
3308
|
+
f"chore(loop): {wu.wu_id} blocked_human "
|
|
3309
|
+
f"(spinning_signature_repeat, attempt {attempt})"
|
|
3310
|
+
f"\n\nFeature: {wu.wu_id}",
|
|
3311
|
+
)
|
|
3312
|
+
print(f" BLOCKED — spinning signature repeat at "
|
|
3313
|
+
f"attempt {attempt}/{MAX_ATTEMPTS}")
|
|
3314
|
+
blocked = True
|
|
3315
|
+
break
|
|
3316
|
+
if (_fc, _fs) != ("other", "no_gate_marker"):
|
|
3317
|
+
prior_failure_signature = (_fc, _fs)
|
|
3318
|
+
flush_events(events_path, wu_events)
|
|
3319
|
+
wu_events.clear()
|
|
3320
|
+
reset_preserving_events(head_before, events_path)
|
|
3321
|
+
print(f" FAIL attempt {attempt}/{MAX_ATTEMPTS}")
|
|
3322
|
+
else:
|
|
3323
|
+
# for-else: ran out of attempts without break = spinning.
|
|
3324
|
+
# The reset has already happened in the failed/zero_token
|
|
3325
|
+
# branch above. Flush attempt notes to disk for human
|
|
3326
|
+
# inspection, mark the WU blocked_human, then commit it.
|
|
3327
|
+
#
|
|
3328
|
+
# Distinguish two spinning shapes in the event payload:
|
|
3329
|
+
# all_attempts_zero_token — every attempt billed 0 input
|
|
3330
|
+
# tokens (CLI/quota/connectivity issue, not a real
|
|
3331
|
+
# verification failure); no per-attempt notes to write.
|
|
3332
|
+
# spinning_detected — at least one attempt produced
|
|
3333
|
+
# output that failed verify(); per-attempt evidence is
|
|
3334
|
+
# buffered in attempt_notes.
|
|
3335
|
+
all_zero = bool(attempt_outcomes) and all(
|
|
3336
|
+
o == "zero_token" for o in attempt_outcomes)
|
|
3337
|
+
reason = ("all_attempts_zero_token" if all_zero
|
|
3338
|
+
else "spinning_detected")
|
|
3339
|
+
wu_key = wu.wu_id.replace("/", "_")
|
|
3340
|
+
note_paths = []
|
|
3341
|
+
for atmpt, evidence in attempt_notes:
|
|
3342
|
+
p = work_dir / wu_key / f"attempt-{atmpt}.md"
|
|
3343
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
3344
|
+
p.write_text(evidence)
|
|
3345
|
+
note_paths.append(p)
|
|
3346
|
+
backend.set_wu(wu, "status", "blocked_human")
|
|
3347
|
+
write_cost_to_wu(backend, wu, cum_usage)
|
|
3348
|
+
wu_events.append(build_event("human_escalation", wu.wu_id, {
|
|
3349
|
+
"reason": reason,
|
|
3350
|
+
"attempts": MAX_ATTEMPTS,
|
|
3351
|
+
"attempts_usage": attempts_usage,
|
|
3352
|
+
}))
|
|
3353
|
+
flush_events(events_path, wu_events)
|
|
3354
|
+
commit_bookkeeping(
|
|
3355
|
+
[wu.file, events_path, *note_paths],
|
|
3356
|
+
f"chore(loop): {wu.wu_id} blocked_human "
|
|
3357
|
+
f"({reason}, {MAX_ATTEMPTS} attempts)"
|
|
3358
|
+
f"\n\nFeature: {wu.wu_id}",
|
|
3359
|
+
)
|
|
3360
|
+
print(f" BLOCKED after {MAX_ATTEMPTS} attempts — "
|
|
3361
|
+
f"escalated ({reason})")
|
|
3362
|
+
blocked = True
|
|
3363
|
+
|
|
3364
|
+
if blocked:
|
|
3365
|
+
print("\nGate halted: work unit(s) need human attention.")
|
|
3366
|
+
return 1
|
|
3367
|
+
if dry_run:
|
|
3368
|
+
print(f"\n(dry run) Gate {gate.number} would complete and await review.")
|
|
3369
|
+
return 0
|
|
3370
|
+
|
|
3371
|
+
backend.set_gate(gate, "awaiting_review")
|
|
3372
|
+
# on_gate_passed fires here: WUs all done, gate now awaiting human review
|
|
3373
|
+
backend.on_gate_passed(feature_id, gate.number)
|
|
3374
|
+
flush_events(events_path,
|
|
3375
|
+
[build_event("gate_reached", feature_id, {"gate": gate.number})])
|
|
3376
|
+
commit_bookkeeping(
|
|
3377
|
+
[gate.file, events_path],
|
|
3378
|
+
f"chore(loop): gate {gate.number} awaiting_review\n\nFeature: {feature_id}",
|
|
3379
|
+
)
|
|
3380
|
+
is_terminal_gate = gate is gates[-1]
|
|
3381
|
+
# FEAT-2026-0018/T11H: in-loop auto-close sets _terminal_auto_closed_wu;
|
|
3382
|
+
# fire terminal flips here after gate is at awaiting_review.
|
|
3383
|
+
if _terminal_auto_closed_wu is not None:
|
|
3384
|
+
rc = _fire_and_verify_terminal_flips(
|
|
3385
|
+
_terminal_auto_closed_wu, feature_dir, events_path, feature_id,
|
|
3386
|
+
)
|
|
3387
|
+
if rc:
|
|
3388
|
+
return rc
|
|
3389
|
+
elif close_wu_for_terminal is not None:
|
|
3390
|
+
# Post-pass driver-state invariant guard (FEAT-2026-0017/T01):
|
|
3391
|
+
# fires AFTER fire_terminal_flips so the side-effect checks (gate
|
|
3392
|
+
# `passed`, roadmap row `done`, archive anchor) observe the flips.
|
|
3393
|
+
rc = _fire_and_verify_terminal_flips(
|
|
3394
|
+
close_wu_for_terminal, feature_dir, events_path, feature_id,
|
|
3395
|
+
)
|
|
3396
|
+
if rc:
|
|
3397
|
+
return rc
|
|
3398
|
+
used_combined_close = any(
|
|
3399
|
+
(feature_dir / ref["file"]).is_file()
|
|
3400
|
+
and read_frontmatter(feature_dir / ref["file"])[0].get("type") == "close"
|
|
3401
|
+
for ref in gate.refs
|
|
3402
|
+
)
|
|
3403
|
+
# Re-read PLAN.md status after the close ceremony: a `close` or
|
|
3404
|
+
# `plan-next` WU may have flipped it to `done` (single-gate combined
|
|
3405
|
+
# close always does; multi-gate terminal plan-next does on the
|
|
3406
|
+
# terminal gate). The branching below honors what the close ceremony
|
|
3407
|
+
# actually decided rather than guessing from gate shape alone.
|
|
3408
|
+
plan_fm_after, _ = read_frontmatter(feature_dir / "PLAN.md")
|
|
3409
|
+
feature_done = plan_fm_after.get("status") == "done"
|
|
3410
|
+
review = feature_dir / f"GATE-{gate.number:02d}-REVIEW.md"
|
|
3411
|
+
if feature_done:
|
|
3412
|
+
ceremony = ("combined close ceremony"
|
|
3413
|
+
if used_combined_close
|
|
3414
|
+
else "retro, lessons, docs, plan-next")
|
|
3415
|
+
print(f"\nGate {gate.number} complete ({ceremony}); "
|
|
3416
|
+
f"PLAN.md status: done.")
|
|
3417
|
+
print(
|
|
3418
|
+
"Terminal — feature ready to wrap. Next step:\n"
|
|
3419
|
+
" - /wrap-feature push branch + "
|
|
3420
|
+
"open PR + merge advisory (single-confirm per step).\n"
|
|
3421
|
+
" - Or manually: read RETROSPECTIVE.md, "
|
|
3422
|
+
"git push, gh pr create."
|
|
3423
|
+
)
|
|
3424
|
+
elif is_terminal_gate:
|
|
3425
|
+
print(f"\nGate {gate.number} complete (retro, lessons, docs, "
|
|
3426
|
+
f"plan-next); terminal gate but PLAN.md not yet `done`.")
|
|
3427
|
+
print(
|
|
3428
|
+
"Inconsistency: terminal gate closed without close ceremony "
|
|
3429
|
+
"flipping PLAN.md to `done`. Inspect RETROSPECTIVE.md / "
|
|
3430
|
+
"events.jsonl. Likely fix: manually flip PLAN.md `status: "
|
|
3431
|
+
"active -> done`, then `/wrap-feature`."
|
|
3432
|
+
)
|
|
3433
|
+
else:
|
|
3434
|
+
print(f"\nGate {gate.number} complete (retro, lessons, docs, "
|
|
3435
|
+
f"plan-next).")
|
|
3436
|
+
print(
|
|
3437
|
+
f"Next gate drafted. Next step:\n"
|
|
3438
|
+
f" - /arm-gate walk drafts, accept/revise/reject, "
|
|
3439
|
+
f"flip accepted WUs to `pending`,\n"
|
|
3440
|
+
f" mark this gate `passed`. "
|
|
3441
|
+
f"Reads {review.name} for planner findings.\n"
|
|
3442
|
+
f" - Resume python3 .specfuse/scripts/loop.py"
|
|
3443
|
+
)
|
|
3444
|
+
return 0
|
|
3445
|
+
finally:
|
|
3446
|
+
if lock_fd is not None:
|
|
3447
|
+
lock_fd.close()
|
|
3448
|
+
|
|
3449
|
+
|
|
3450
|
+
def _parse_version(s: str) -> tuple[int, ...]:
|
|
3451
|
+
"""Lenient dotted-int parse for version compare. Non-numeric leading junk in a
|
|
3452
|
+
component (e.g. a `-rc1` suffix) is dropped; missing components count as 0. No
|
|
3453
|
+
third-party `packaging` dependency — the driver stays stdlib-only."""
|
|
3454
|
+
parts: list[int] = []
|
|
3455
|
+
for tok in str(s).strip().split("."):
|
|
3456
|
+
m = re.match(r"\d+", tok)
|
|
3457
|
+
parts.append(int(m.group()) if m else 0)
|
|
3458
|
+
return tuple(parts) or (0,)
|
|
3459
|
+
|
|
3460
|
+
|
|
3461
|
+
def check_scaffold_version(scaffold_path: Path | None = None,
|
|
3462
|
+
driver_min: str = MIN_SCAFFOLD_VERSION) -> str:
|
|
3463
|
+
"""Fail loud (SystemExit) if the consumer's `.specfuse/VERSION` is missing, empty,
|
|
3464
|
+
or older than this driver supports. The scaffold declares its own version; the
|
|
3465
|
+
driver requires it to be >= MIN_SCAFFOLD_VERSION. Returns the scaffold version
|
|
3466
|
+
string on success. `scaffold_path` is injectable for testing."""
|
|
3467
|
+
path = scaffold_path or SCAFFOLD_VERSION_PATH
|
|
3468
|
+
if not path.exists():
|
|
3469
|
+
sys.exit(
|
|
3470
|
+
f"ERROR: {path} is missing — this scaffold predates driver version "
|
|
3471
|
+
f"checking. Run `specfuse upgrade` (or ./init.sh --upgrade <repo>) to "
|
|
3472
|
+
f"stamp it. Driver {DRIVER_VERSION} requires scaffold >= {driver_min}."
|
|
3473
|
+
)
|
|
3474
|
+
raw = path.read_text().strip()
|
|
3475
|
+
if not raw:
|
|
3476
|
+
sys.exit(f"ERROR: {path} is empty. Run `specfuse upgrade` to restamp it.")
|
|
3477
|
+
raw = raw.splitlines()[0].strip()
|
|
3478
|
+
if _parse_version(raw) < _parse_version(driver_min):
|
|
3479
|
+
sys.exit(
|
|
3480
|
+
f"ERROR: scaffold version {raw} is older than this driver requires "
|
|
3481
|
+
f"(driver {DRIVER_VERSION} needs scaffold >= {driver_min}). Run "
|
|
3482
|
+
f"`specfuse upgrade` (or ./init.sh --upgrade <repo>) to update the scaffold."
|
|
3483
|
+
)
|
|
3484
|
+
return raw
|
|
3485
|
+
|
|
3486
|
+
|
|
3487
|
+
def main() -> int:
|
|
3488
|
+
ap = argparse.ArgumentParser(description="Specfuse loop driver (single-repo).")
|
|
3489
|
+
ap.add_argument("--feature", help="Feature dir name under .specfuse/features/ "
|
|
3490
|
+
"(optional if exactly one feature is active).")
|
|
3491
|
+
ap.add_argument("--dry-run", action="store_true",
|
|
3492
|
+
help="Walk the current gate without dispatching or writing.")
|
|
3493
|
+
ap.add_argument("--force-full-close", metavar="FEATURE_ID",
|
|
3494
|
+
help="Bypass predicate consultation and run the existing close "
|
|
3495
|
+
"path for the named feature. Must match the feature being processed.")
|
|
3496
|
+
args = ap.parse_args()
|
|
3497
|
+
if not FEATURES_DIR.exists():
|
|
3498
|
+
sys.exit(f"No {FEATURES_DIR}. Run from your repo root.")
|
|
3499
|
+
check_scaffold_version()
|
|
3500
|
+
return run(args.feature, args.dry_run, force_full_close=args.force_full_close)
|
|
3501
|
+
|
|
3502
|
+
|
|
3503
|
+
if __name__ == "__main__":
|
|
3504
|
+
raise SystemExit(main())
|