stellars-claude-code-plugins 0.8.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2444 @@
1
+ #!/usr/bin/env python3
2
+ """YAML-driven declarative build iteration orchestrator.
3
+
4
+ All content loaded from YAML resources (phases, agents, workflow types,
5
+ guardian checklist, display strings). The engine is content-agnostic -
6
+ each plugin provides its own YAML resource files.
7
+
8
+ 10-command CLI with 2 calls per phase (start + end).
9
+ Stateful phases, agent review, automated testing, independent gatekeeper.
10
+
11
+ State: <artifacts_dir>/state.yaml
12
+ Audit: <artifacts_dir>/log.yaml
13
+ Failures: <artifacts_dir>/failures.yaml
14
+ Hypotheses: <artifacts_dir>/hypotheses.yaml
15
+ """
16
+
17
+ import argparse
18
+ import collections
19
+ from datetime import datetime, timezone
20
+ import os
21
+ from pathlib import Path
22
+ import re
23
+ import subprocess
24
+ import sys
25
+
26
+ import yaml
27
+
28
+ from stellars_claude_code_plugins.engine.model import (
29
+ load_model,
30
+ validate_model,
31
+ _resolve_key,
32
+ _KNOWN_VARS as _KNOWN_TEMPLATE_VARS,
33
+ )
34
+ from stellars_claude_code_plugins.engine.fsm import (
35
+ resolve_phase_key,
36
+ build_phase_lifecycle_fsm,
37
+ State as FSMState,
38
+ Event as FSMEvent,
39
+ )
40
+
41
+ # ── Module-level state (set by _initialize) ────────────────────────
42
+
43
+ PROJECT_ROOT = Path.cwd()
44
+
45
+ _MODEL = None
46
+ DEFAULT_ARTIFACTS_DIR = None
47
+ STATE_FILE = None
48
+ LOG_FILE = None
49
+ FAILURES_FILE = None
50
+ HYPOTHESES_FILE = None
51
+ CONTEXT_FILE = None
52
+ CMD = None
53
+ _SEP_CHAR = None
54
+ _SEP_WIDTH = None
55
+ _HDR_CHAR = None
56
+ _HDR_WIDTH = None
57
+ _PHASE_FSM = None
58
+ _FSM_STATE_VALUES = None
59
+ ITERATION_TYPES = {}
60
+ PHASE_AGENTS = {}
61
+ _PHASE_START = {}
62
+ _PHASE_END = {}
63
+
64
+ _AUTO_ACTION_REGISTRY = {}
65
+
66
+ _initialized = False
67
+
68
+
69
+ def _initialize(resources_dir: Path) -> None:
70
+ """Load model from YAML resources and set up all module-level state.
71
+
72
+ Called once by main() before any command handler runs. This defers
73
+ model loading so the engine module can be imported without requiring
74
+ a specific resources directory.
75
+ """
76
+ global _MODEL, DEFAULT_ARTIFACTS_DIR, STATE_FILE, LOG_FILE, FAILURES_FILE
77
+ global HYPOTHESES_FILE, CONTEXT_FILE, CMD, _SEP_CHAR, _SEP_WIDTH
78
+ global _HDR_CHAR, _HDR_WIDTH, _PHASE_FSM, _FSM_STATE_VALUES
79
+ global ITERATION_TYPES, PHASE_AGENTS, _PHASE_START, _PHASE_END
80
+ global _AUTO_ACTION_REGISTRY, _initialized
81
+
82
+ _MODEL = load_model(resources_dir)
83
+
84
+ DEFAULT_ARTIFACTS_DIR = PROJECT_ROOT / _MODEL.app.artifacts_dir
85
+ STATE_FILE = DEFAULT_ARTIFACTS_DIR / "state.yaml"
86
+ LOG_FILE = DEFAULT_ARTIFACTS_DIR / "log.yaml"
87
+ FAILURES_FILE = DEFAULT_ARTIFACTS_DIR / "failures.yaml"
88
+ HYPOTHESES_FILE = DEFAULT_ARTIFACTS_DIR / "hypotheses.yaml"
89
+ CONTEXT_FILE = DEFAULT_ARTIFACTS_DIR / "context.yaml"
90
+ CMD = _MODEL.app.cmd or "python orchestrate.py"
91
+ _SEP_CHAR = _MODEL.app.display.separator
92
+ _SEP_WIDTH = _MODEL.app.display.separator_width
93
+ _HDR_CHAR = _MODEL.app.display.header_char
94
+ _HDR_WIDTH = _MODEL.app.display.header_width
95
+
96
+ _PHASE_FSM = build_phase_lifecycle_fsm()
97
+ _FSM_STATE_VALUES = {s.value for s in FSMState}
98
+
99
+ # Build ITERATION_TYPES from model.workflow_types
100
+ ITERATION_TYPES.clear()
101
+ ITERATION_TYPES.update({
102
+ name: {
103
+ "description": wt.description,
104
+ "phases": wt.phase_names,
105
+ "required": wt.required,
106
+ "skippable": wt.skippable,
107
+ }
108
+ for name, wt in _MODEL.workflow_types.items()
109
+ })
110
+
111
+ # Extract flat agent name lists from model.agents
112
+ PHASE_AGENTS.clear()
113
+ PHASE_AGENTS.update({
114
+ phase: [a.name for a in agents]
115
+ for phase, agents in _MODEL.agents.items()
116
+ })
117
+
118
+ # Populate _PHASE_START and _PHASE_END from model.phases
119
+ _PHASE_START.clear()
120
+ _PHASE_END.clear()
121
+ for phase_name in _MODEL.phases:
122
+ _PHASE_START[phase_name] = _make_phase_callable(phase_name, "start")
123
+ _PHASE_END[phase_name] = _make_phase_callable(phase_name, "end")
124
+
125
+ # Auto-action registry
126
+ _AUTO_ACTION_REGISTRY.clear()
127
+ _AUTO_ACTION_REGISTRY.update({
128
+ "hypothesis_autowrite": _action_hypothesis_autowrite,
129
+ "hypothesis_gc": _action_hypothesis_gc,
130
+ "plan_save": _action_plan_save,
131
+ "iteration_summary": _action_iteration_summary,
132
+ "iteration_advance": _action_iteration_advance,
133
+ })
134
+
135
+ _initialized = True
136
+
137
+
138
+ # ── FSM helpers ─────────────────────────────────────────────────────
139
+
140
+
141
+ def _fire_fsm(event: FSMEvent, state: dict) -> FSMState:
142
+ """Fire FSM event and sync phase_status to state dict.
143
+
144
+ Syncs FSM from persisted state before firing, then writes back.
145
+ All phase_status mutations go through this function.
146
+ """
147
+ status = state.get("phase_status", "pending")
148
+ _PHASE_FSM.current_state = FSMState(status) if status in _FSM_STATE_VALUES else FSMState.PENDING
149
+ new_state = _PHASE_FSM.fire(event)
150
+ state["phase_status"] = new_state.value
151
+ return new_state
152
+
153
+
154
+ # ── Display helpers ─────────────────────────────────────────────────
155
+
156
+
157
+ def _msg(key: str, **kwargs) -> str:
158
+ """Look up a message template from app.yaml and render with kwargs.
159
+
160
+ This is the display text abstraction layer. All user-facing CLI output
161
+ goes through this function, making the Python engine content-agnostic.
162
+ Uses format_map with defaultdict(str) so missing variables render as
163
+ empty strings instead of raising KeyError.
164
+ """
165
+ template = _MODEL.app.messages.get(key, key)
166
+ ctx = {"cmd": CMD, "separator_line": _SEP_CHAR * _SEP_WIDTH, "header_line": _HDR_CHAR * _HDR_WIDTH}
167
+ ctx.update(kwargs)
168
+ return template.format_map(collections.defaultdict(str, ctx))
169
+
170
+
171
+ def _cli(section: str, key: str) -> str:
172
+ """Look up a CLI help string from app.yaml.
173
+
174
+ Provides argparse descriptions and help text from YAML so CLI
175
+ documentation can be customised without touching Python code.
176
+ Supports top-level keys (description, epilog) and nested
177
+ command/argument help via section.key lookup.
178
+ """
179
+ cli = _MODEL.app.cli
180
+ if section == "description":
181
+ return cli.description
182
+ if section == "epilog":
183
+ return cli.epilog.format_map(collections.defaultdict(str, {"cmd": CMD}))
184
+ val = cli.commands.get(key, key) if section == "commands" else cli.args.get(key, key)
185
+ return val.format_map(collections.defaultdict(str, {"cmd": CMD})) if "{" in val else val
186
+
187
+
188
+ # ── Exposed data structures ────────────────────────────────────────
189
+
190
+
191
+ def _guardian_checklist() -> str:
192
+ """Return the guardian checklist text from model agents.
193
+
194
+ Searches all phase agent definitions for the first guardian agent
195
+ that has a checklist field. The checklist is injected into phase
196
+ templates via the {{checklist}} template variable in _build_context().
197
+ Used by guardian agents in both PLAN and REVIEW phases.
198
+ """
199
+ for agent_list in _MODEL.agents.values():
200
+ for agent in agent_list:
201
+ if agent.name == "guardian" and agent.checklist:
202
+ return agent.checklist
203
+ return ""
204
+
205
+
206
+ def _current_workflow_type() -> str:
207
+ """Get current workflow type from state, defaulting to 'full'."""
208
+ state = _load_state()
209
+ return (state or {}).get("type", "full")
210
+
211
+
212
+ def _resolve_phase(phase: str) -> str:
213
+ """Resolve a phase name to its namespaced key in phases.yaml."""
214
+ return resolve_phase_key(_current_workflow_type(), phase, _MODEL.phases)
215
+
216
+
217
+ def _resolve_agents(phase: str) -> str:
218
+ """Resolve a phase name to its namespaced key in agents.yaml."""
219
+ return resolve_phase_key(_current_workflow_type(), phase, _MODEL.agents)
220
+
221
+
222
+ def _resolve_gate(phase: str, gate_type: str) -> str:
223
+ """Resolve a gate key for a phase using the :: fallback chain.
224
+
225
+ Gate keys are namespaced: FULL::RESEARCH::readback, FULL::TEST::gatekeeper.
226
+ Resolution follows the same WORKFLOW::PHASE -> PHASE -> FULL::PHASE chain.
227
+ """
228
+ gate_phases = {
229
+ k.rsplit("::", 1)[0]
230
+ for k in _MODEL.gates
231
+ if "::" in k and k.rsplit("::", 1)[1] == gate_type
232
+ }
233
+ resolved = _resolve_key(_current_workflow_type(), phase, gate_phases)
234
+ return f"{resolved}::{gate_type}"
235
+
236
+
237
+ def _build_agent_instructions(phase: str, ctx: dict | None = None) -> str:
238
+ """Generate formatted agent instructions from model agents for a phase.
239
+
240
+ Produces '### Agent N: DISPLAY_NAME' formatted text matching the
241
+ pattern that v1 hardcoded in phase templates. If an agent has a
242
+ checklist field, it is appended to the prompt. Template variables
243
+ like {{checklist}} in agent prompts are resolved using the context dict.
244
+ Called by _build_context() to populate the {{agents_instructions}} variable.
245
+ """
246
+ # Resolve namespaced agent key (FULL::RESEARCH, etc.) with fallback
247
+ resolved = _resolve_agents(phase)
248
+ agents = _MODEL.agents.get(resolved, [])
249
+ if not agents:
250
+ return ""
251
+
252
+ lines = []
253
+ for agent in agents:
254
+ prompt = agent.prompt
255
+ checklist = agent.checklist or ""
256
+ # Append checklist to prompt if agent has one
257
+ if checklist:
258
+ prompt = prompt.rstrip() + "\n\n" + checklist
259
+ # Resolve any template variables in the prompt (e.g., {checklist})
260
+ if ctx and "{" in prompt:
261
+ prompt = prompt.format_map(collections.defaultdict(str, ctx))
262
+ lines.append(f"### Agent {agent.number}: {agent.display_name}")
263
+ lines.append(prompt.rstrip())
264
+ lines.append("")
265
+ return "\n".join(lines).rstrip()
266
+
267
+
268
+ # ── Build context for template rendering ────────────────────────────
269
+
270
+
271
+ def _build_context(state: dict | None = None, phase: str = "", event: str = "") -> dict:
272
+ """Compute all template variables from state for phase rendering.
273
+
274
+ This is the central factory that every phase callable uses to
275
+ assemble the context dict for str.format_map(). Computes dynamic
276
+ content from iteration state: prior failures, hypothesis catalogue,
277
+ benchmark info, iteration plan. Also generates spawn instructions
278
+ and agent instructions from agents.yaml.
279
+
280
+ Args:
281
+ state: current iteration state from state.yaml
282
+ phase: phase name for agent instruction lookup
283
+ event: 'start' or 'end' to select correct agent set
284
+ """
285
+ s = state or {}
286
+
287
+ # Prior failures context
288
+ prior_context = ""
289
+ all_failures = _load_yaml_list(FAILURES_FILE)
290
+ if all_failures:
291
+ prior_context = f"\n**Prior failures** ({len(all_failures)} total):\n"
292
+ for f in all_failures[-5:]:
293
+ prior_context += (
294
+ f" - [{f.get('mode', '?')}] "
295
+ f"(iter {f.get('iteration', '?')}) "
296
+ f"{f.get('description', '?')}\n"
297
+ )
298
+
299
+ # Plan context from iteration 0
300
+ plan_context = ""
301
+ iteration_plan = s.get("iteration_plan", "")
302
+ iteration = s.get("iteration", 1)
303
+ if iteration_plan and iteration > 0:
304
+ plan_context = (
305
+ f"\n**Iteration plan** (from planning iteration 0):\n{iteration_plan[:300]}\n"
306
+ )
307
+
308
+ # Hypothesis catalogue summary
309
+ prior_hyp = ""
310
+ catalogue = _hypothesis_catalogue_summary()
311
+ if catalogue and catalogue != "(no hypotheses yet)":
312
+ prior_hyp = f"\n**Hypothesis catalogue** (rate, review, evolve this list):\n{catalogue}\n"
313
+
314
+ # Benchmark info
315
+ benchmark_info = ""
316
+ benchmark_cmd = s.get("benchmark_cmd", "")
317
+ if benchmark_cmd:
318
+ scores = s.get("benchmark_scores", [])
319
+ if scores:
320
+ last = scores[-1]["score"]
321
+ benchmark_info = f"""
322
+ **Benchmark**: `{benchmark_cmd}` (last score: {last})
323
+ The benchmark runs automatically after tests pass. Score is tracked across
324
+ iterations - lower is better. The trend is shown in the output."""
325
+ else:
326
+ benchmark_info = f"""
327
+ **Benchmark**: `{benchmark_cmd}` (no prior score - first run)
328
+ The benchmark runs automatically after tests pass. It must output a numeric
329
+ value. This score will be tracked across iterations - lower is better."""
330
+
331
+ # Iteration purpose - explains what this iteration is about
332
+ total_iters = s.get("total_iterations", 1)
333
+ itype = s.get("type", "full")
334
+ wf_def = _MODEL.workflow_types.get(itype)
335
+ if wf_def and wf_def.dependency:
336
+ iteration_purpose = "\n" + _msg(
337
+ "dependency_banner", description=wf_def.description
338
+ ) + "\n"
339
+ elif iteration > 0 and iteration_plan:
340
+ iteration_purpose = "\n" + _msg(
341
+ "iteration_n_banner", iteration=iteration, total=total_iters
342
+ ) + "\n"
343
+ else:
344
+ iteration_purpose = ""
345
+
346
+ ctx = {
347
+ "CMD": CMD,
348
+ "objective": s.get("objective", "not set"),
349
+ "iteration": iteration,
350
+ "iteration_purpose": iteration_purpose,
351
+ "total": total_iters,
352
+ "remaining": total_iters - iteration,
353
+ "prior_context": prior_context,
354
+ "plan_context": plan_context,
355
+ "prior_hyp": prior_hyp,
356
+ "checklist": _guardian_checklist(),
357
+ "benchmark_info": benchmark_info,
358
+ }
359
+ # Agent instructions - resolve via :: namespace (FULL::PLAN has agents for end review)
360
+ agent_phase_key = _resolve_agents(phase or s.get("current_phase", ""))
361
+ ctx["agents_instructions"] = _build_agent_instructions(agent_phase_key, ctx)
362
+
363
+ # Spawn instruction - derived from agent count
364
+ _NUM_WORDS = {1: "ONE", 2: "TWO", 3: "THREE", 4: "FOUR", 5: "FIVE", 6: "SIX"}
365
+ agent_count = len(_MODEL.agents.get(agent_phase_key, []))
366
+ spawn_mode = "PARALLEL" # all agents spawn in parallel
367
+ if agent_count > 0:
368
+ word = _NUM_WORDS.get(agent_count, str(agent_count))
369
+ ctx["spawn_instruction"] = (
370
+ f"**MANDATORY: Spawn {word} SEPARATE agents IN {spawn_mode}** "
371
+ f"(single message, {word} Agent tool calls)."
372
+ )
373
+ else:
374
+ ctx["spawn_instruction"] = ""
375
+
376
+ # PLAN end variant with "to review the plan" suffix
377
+ if agent_count > 0:
378
+ word = _NUM_WORDS.get(agent_count, str(agent_count))
379
+ ctx["spawn_instruction_plan"] = (
380
+ f"**MANDATORY: Spawn {word} SEPARATE agents IN {spawn_mode} "
381
+ f"to review the plan** (single message, {word} Agent tool calls)."
382
+ )
383
+ else:
384
+ ctx["spawn_instruction_plan"] = ""
385
+
386
+ return ctx
387
+
388
+
389
+ # ── Phase instruction registry (YAML-driven) ────────────────────────
390
+
391
+
392
+ def _make_phase_callable(phase: str, event: str) -> object:
393
+ """Create a zero-arg callable that loads state and renders a phase template.
394
+
395
+ Registered in _PHASE_START/_PHASE_END dicts, these closures are the
396
+ bridge between YAML templates and the orchestrator. Each callable:
397
+ 1. Loads current state from disk
398
+ 2. Builds context via _build_context()
399
+ 3. Selects the right template (handles NEXT remaining conditionals)
400
+ 4. Renders the template with format_map()
401
+ """
402
+
403
+ def _callable():
404
+ """Load state, build context, render the model Phase template for this phase/event."""
405
+ state = _load_state()
406
+ ctx = _build_context(state, phase=phase, event=event)
407
+ # Handle conditional templates (NEXT has remaining/final variants)
408
+ key = event
409
+ if phase == "NEXT":
410
+ remaining = ctx["remaining"]
411
+ if event == "start":
412
+ key = "start_continue" if remaining > 0 else "start_final"
413
+ elif event == "end":
414
+ key = "end_continue" if remaining > 0 else "end_final"
415
+ resolved_phase = _resolve_phase(phase)
416
+ phase_obj = _MODEL.phases.get(resolved_phase)
417
+ template = getattr(phase_obj, key, "") if phase_obj else ""
418
+ if not template:
419
+ template = f"Phase {phase} {event}"
420
+ return template.format_map(collections.defaultdict(str, ctx))
421
+
422
+ return _callable
423
+
424
+
425
+ # ── Auto-action handlers ──────────────────────────────────────────
426
+
427
+
428
+ def _action_hypothesis_autowrite(state: dict, phase: str):
429
+ output_content = state.get("phase_outputs", {}).get(phase, "")
430
+ if output_content:
431
+ _auto_write_hypotheses(output_content, state.get("iteration", 0))
432
+
433
+ def _action_hypothesis_gc(state: dict, phase: str):
434
+ print("\n" + _msg("auto_separator"))
435
+ print(_msg("auto_hypothesis_gc"))
436
+ print(_msg("auto_separator"))
437
+ _run_hypothesis_gc()
438
+
439
+ def _action_iteration_summary(state: dict, phase: str):
440
+ print("\n" + _msg("auto_separator"))
441
+ print(_msg("auto_summary"))
442
+ print(_msg("auto_separator"))
443
+ _run_summary(state)
444
+ nxt = _next_phase(state)
445
+ if nxt == "NEXT":
446
+ print("\n" + _msg("auto_separator"))
447
+ print(_msg("auto_next"))
448
+ print(_msg("auto_autonomous"))
449
+ print(_msg("auto_separator"))
450
+ next_instructions = _PHASE_START.get("NEXT", lambda: "")()
451
+ print(next_instructions)
452
+
453
+ def _action_iteration_advance(state: dict, phase: str):
454
+ _run_next_iteration(state)
455
+ return "return"
456
+
457
+ def _action_plan_save(state: dict, phase: str):
458
+ """Save PLAN output as plan.yaml for dependency workflows."""
459
+ wf_def = _MODEL.workflow_types.get(state.get("type", ""))
460
+ if not (wf_def and wf_def.dependency):
461
+ return
462
+ output_content = state.get("phase_outputs", {}).get(phase, "")
463
+ if not output_content:
464
+ return
465
+ plan_file = DEFAULT_ARTIFACTS_DIR / "plan.yaml"
466
+ plan_data = {
467
+ "objective": state.get("objective", ""),
468
+ "total_iterations": state.get("total_iterations", 1),
469
+ "plan": output_content,
470
+ "created_at": _now(),
471
+ }
472
+ plan_file.write_text(_yaml_dump(plan_data))
473
+ print(_msg("plan_saved", path=plan_file))
474
+
475
+
476
+ def _run_auto_actions(phase: str, state: dict) -> bool:
477
+ """Run auto_actions.on_complete for the resolved phase. Returns True if handler signalled early return."""
478
+ resolved = _resolve_phase(phase)
479
+ phase_obj = _MODEL.phases.get(resolved)
480
+ if not phase_obj or not phase_obj.auto_actions:
481
+ return False
482
+ actions = phase_obj.auto_actions.get("on_complete", [])
483
+ for action_name in actions:
484
+ handler = _AUTO_ACTION_REGISTRY.get(action_name)
485
+ if handler:
486
+ result = handler(state, phase)
487
+ if result == "return":
488
+ return True
489
+ return False
490
+
491
+
492
+ # ── Helper functions ─────────────────────────────────────────────────
493
+
494
+
495
+ def _now() -> str:
496
+ """Return current UTC timestamp as ISO 8601 string."""
497
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
498
+
499
+
500
+ def _load_state() -> dict | None:
501
+ """Load iteration state from state.yaml."""
502
+ if STATE_FILE.exists():
503
+ return yaml.safe_load(STATE_FILE.read_text())
504
+ return None
505
+
506
+
507
+ def _yaml_dump(data: object) -> str:
508
+ """Dump data to YAML with literal block style for readable output.
509
+
510
+ Uses a custom LiteralStr type and YAML representer to output
511
+ multiline strings as literal block scalars (|) instead of quoted
512
+ strings. Long single-line strings are word-wrapped at 80 chars.
513
+ This produces human-readable state.yaml and log.yaml files.
514
+ """
515
+
516
+ class LiteralStr(str):
517
+ pass
518
+
519
+ def _literal_representer(dumper, data):
520
+ """YAML representer that outputs strings as literal block scalars."""
521
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
522
+
523
+ def _wrap_long(text: str, width: int = 80) -> str:
524
+ """Wrap long single-line strings into multiline at sentence/clause boundaries."""
525
+ if len(text) <= width:
526
+ return text
527
+ lines = []
528
+ current = ""
529
+ for word in text.split():
530
+ if len(current) + len(word) + 1 > width:
531
+ lines.append(current)
532
+ current = word
533
+ else:
534
+ current = f"{current} {word}" if current else word
535
+ if current:
536
+ lines.append(current)
537
+ return "\n".join(lines)
538
+
539
+ def _prepare(obj):
540
+ """Recursively convert long or multiline strings to LiteralStr."""
541
+ if isinstance(obj, dict):
542
+ return {k: _prepare(v) for k, v in obj.items()}
543
+ if isinstance(obj, list):
544
+ return [_prepare(v) for v in obj]
545
+ if isinstance(obj, str) and ("\n" in obj or len(obj) > 80):
546
+ text = _wrap_long(obj) if "\n" not in obj else obj
547
+ if not text.endswith("\n"):
548
+ text += "\n"
549
+ return LiteralStr(text)
550
+ return obj
551
+
552
+ dumper = yaml.Dumper
553
+ dumper.add_representer(LiteralStr, _literal_representer)
554
+ return yaml.dump(
555
+ _prepare(data),
556
+ Dumper=dumper,
557
+ default_flow_style=False,
558
+ sort_keys=False,
559
+ )
560
+
561
+
562
+ def _save_state(state: dict) -> None:
563
+ """Write current iteration state dict to state.yaml.
564
+
565
+ Called after every state mutation (phase transitions, agent recording,
566
+ gatekeeper results, rejections) to persist progress to disk.
567
+ """
568
+ STATE_FILE.write_text(_yaml_dump(state))
569
+
570
+
571
+ def _save_objective(objective: str, iterations: int) -> None:
572
+ """Save objective to objective.yaml in artifacts dir."""
573
+ obj_file = DEFAULT_ARTIFACTS_DIR / "objective.yaml"
574
+ obj_file.write_text(
575
+ _yaml_dump(
576
+ {
577
+ "objective": objective,
578
+ "iterations": iterations,
579
+ "created_at": _now(),
580
+ }
581
+ )
582
+ )
583
+
584
+
585
+ def _load_yaml_list(path: Path) -> list[dict]:
586
+ """Load a YAML file containing a list of entries."""
587
+ if not path.exists():
588
+ return []
589
+ data = yaml.safe_load(path.read_text())
590
+ return data if isinstance(data, list) else []
591
+
592
+
593
+ def _append_yaml_entry(path: Path, entry: dict) -> None:
594
+ """Append an entry to a YAML list file."""
595
+ entries = _load_yaml_list(path)
596
+ entries.append(entry)
597
+ path.write_text(_yaml_dump(entries))
598
+
599
+
600
+ def _append_log(entry: dict) -> None:
601
+ """Append a timestamped entry to the audit log."""
602
+ entry["timestamp"] = _now()
603
+ _append_yaml_entry(LOG_FILE, entry)
604
+
605
+
606
+ def _append_failure(entry: dict) -> None:
607
+ """Append a timestamped failure entry to the failures log."""
608
+ entry["timestamp"] = _now()
609
+ _append_yaml_entry(FAILURES_FILE, entry)
610
+
611
+
612
+ def _append_hypothesis(entry: dict) -> None:
613
+ """Add or update hypothesis in the catalogue.
614
+
615
+ The catalogue is a persistent list of ALL hypotheses across iterations,
616
+ not per-iteration snapshots. Each entry has: id, hypothesis, predict,
617
+ evidence, risk, status, votes, avg_score.
618
+ """
619
+ entry["timestamp"] = _now()
620
+ _append_yaml_entry(HYPOTHESES_FILE, entry)
621
+
622
+
623
+ def _auto_write_hypotheses(output_content: str, iteration: int) -> None:
624
+ """Extract structured hypothesis entries from HYPOTHESIS phase output.
625
+
626
+ Splits the output on ID: boundaries and parses each block for
627
+ structured fields (ID/HYPOTHESIS/PREDICT/EVIDENCE/RISK/STARS).
628
+ Writes valid entries to hypotheses.yaml. Entries missing required
629
+ fields are skipped with a warning.
630
+ """
631
+ required_fields = {"id", "hypothesis", "predict", "evidence", "risk"}
632
+ fields_to_parse = ["ID", "HYPOTHESIS", "PREDICT", "EVIDENCE", "RISK",
633
+ "STARS", "WHAT TO DO", "STATUS"]
634
+
635
+ # Split on ID: boundaries to isolate each hypothesis block
636
+ blocks = re.split(r"(?=^ID:\s)", output_content, flags=re.MULTILINE)
637
+
638
+ entries = []
639
+ for block in blocks:
640
+ if not block.strip():
641
+ continue
642
+ entry: dict = {}
643
+ for line in block.split("\n"):
644
+ stripped = line.strip()
645
+ for field in fields_to_parse:
646
+ if stripped.upper().startswith(field + ":"):
647
+ value = stripped[len(field) + 1:].strip()
648
+ key = field.lower().replace(" ", "_")
649
+ if key == "stars":
650
+ try:
651
+ entry["avg_score"] = float(value.split("/")[0])
652
+ except (ValueError, IndexError):
653
+ entry["avg_score"] = 0.0
654
+ entry["votes"] = value
655
+ else:
656
+ entry[key] = value
657
+ break
658
+ if entry.get("id"):
659
+ entries.append(entry)
660
+
661
+ written = 0
662
+ for entry in entries:
663
+ missing = required_fields - set(entry.keys())
664
+ if missing:
665
+ print(_msg("auto_hypothesis_warn", hid=entry.get("id", "?"), missing=str(missing)))
666
+ continue
667
+ entry.setdefault("status", "proposed")
668
+ entry.setdefault("votes", "")
669
+ entry.setdefault("avg_score", 0.0)
670
+ entry["iteration"] = iteration
671
+ _append_hypothesis(entry)
672
+ written += 1
673
+
674
+ if written:
675
+ print(_msg("auto_hypothesis_wrote", count=written))
676
+
677
+
678
+ def _load_context() -> dict:
679
+ """Load context messages from context.yaml.
680
+
681
+ Returns a dict mapping phase names to message strings. Returns empty
682
+ dict if file doesn't exist (first run or never set).
683
+ """
684
+ if not CONTEXT_FILE.exists():
685
+ return {}
686
+ data = yaml.safe_load(CONTEXT_FILE.read_text())
687
+ return data if isinstance(data, dict) else {}
688
+
689
+
690
+ def _save_context(ctx: dict) -> None:
691
+ """Save context messages to context.yaml."""
692
+ CONTEXT_FILE.write_text(_yaml_dump(ctx))
693
+
694
+
695
+ def _load_prior_hypotheses() -> list[dict]:
696
+ """Load the full hypothesis catalogue for agents to review."""
697
+ return _load_yaml_list(HYPOTHESES_FILE)
698
+
699
+
700
+ def _hypothesis_catalogue_summary() -> str:
701
+ """Format hypothesis catalogue for agent context."""
702
+ hyps = _load_prior_hypotheses()
703
+ if not hyps:
704
+ return "(no hypotheses yet)"
705
+ lines = []
706
+ for h in hyps:
707
+ hid = h.get("id", "?")
708
+ text = h.get("hypothesis", "?")[:100]
709
+ status = h.get("status", "?")
710
+ avg = h.get("avg_score", "?")
711
+ lines.append(f" {hid} ({avg}/5, {status}): {text}")
712
+ return "\n".join(lines)
713
+
714
+
715
+ def _phase_dir(state: dict) -> Path:
716
+ """Get/create phase artifacts subfolder: phase_N_NAME/."""
717
+ itype = ITERATION_TYPES[state["type"]]
718
+ phases = itype["phases"]
719
+ phase = state["current_phase"]
720
+ idx = phases.index(phase) + 1 if phase in phases else 0
721
+ folder = DEFAULT_ARTIFACTS_DIR / f"phase_{idx:02d}_{phase.lower()}"
722
+ folder.mkdir(parents=True, exist_ok=True)
723
+ return folder
724
+
725
+
726
+ def _next_phase(state: dict) -> str | None:
727
+ """Return the next phase name in the workflow sequence.
728
+
729
+ Looks up the current phase in ITERATION_TYPES and returns the
730
+ following phase, or None if the current phase is the last one.
731
+ Used by cmd_end to advance the state machine.
732
+ """
733
+ itype = ITERATION_TYPES[state["type"]]
734
+ phases = itype["phases"]
735
+ try:
736
+ idx = phases.index(state["current_phase"])
737
+ if idx + 1 < len(phases):
738
+ return phases[idx + 1]
739
+ except ValueError:
740
+ pass
741
+ return None
742
+
743
+
744
+ def _prev_implementable(state: dict) -> str:
745
+ """Find the phase to return to when a reviewer rejects.
746
+
747
+ Walks backward through the phase sequence looking for IMPLEMENT.
748
+ Used by cmd_reject and the TEST auto-reject to determine which
749
+ phase to roll back to.
750
+ """
751
+ itype = ITERATION_TYPES[state["type"]]
752
+ phases = itype["phases"]
753
+ idx = phases.index(state["current_phase"])
754
+ for i in range(idx - 1, -1, -1):
755
+ if phases[i] == "IMPLEMENT":
756
+ return "IMPLEMENT"
757
+ return phases[0]
758
+
759
+
760
+ def _count_iteration_failures(iteration: int) -> list[dict]:
761
+ """Read failure log entries for a specific iteration."""
762
+ return [e for e in _load_yaml_list(FAILURES_FILE) if e.get("iteration") == iteration]
763
+
764
+
765
+ def _init_artifacts_dir(artifacts_dir: Path | None = None) -> None:
766
+ """Initialise the artifacts directory and set global path variables.
767
+
768
+ Mutates module-level STATE_FILE, LOG_FILE, FAILURES_FILE, and
769
+ HYPOTHESES_FILE to point to the correct artifacts directory.
770
+ Called once in main() before any command handler runs.
771
+ """
772
+ global STATE_FILE, LOG_FILE, FAILURES_FILE, HYPOTHESES_FILE, CONTEXT_FILE # noqa: PLW0603
773
+ d = artifacts_dir or DEFAULT_ARTIFACTS_DIR
774
+ d.mkdir(parents=True, exist_ok=True)
775
+ STATE_FILE = d / "state.yaml"
776
+ LOG_FILE = d / "log.yaml"
777
+ FAILURES_FILE = d / "failures.yaml"
778
+ HYPOTHESES_FILE = d / "hypotheses.yaml"
779
+ CONTEXT_FILE = d / "context.yaml"
780
+
781
+
782
+ def _read_last_iteration(artifacts_dir: Path | None = None) -> int:
783
+ """Read the last iteration number before cleaning. Returns 0 if none."""
784
+ d = artifacts_dir or DEFAULT_ARTIFACTS_DIR
785
+ state_file = d / "state.yaml"
786
+ if state_file.exists():
787
+ try:
788
+ return yaml.safe_load(state_file.read_text()).get("iteration", 0)
789
+ except (yaml.YAMLError, KeyError, AttributeError):
790
+ pass
791
+ return 0
792
+
793
+
794
+ def _clean_artifacts_dir(artifacts_dir: Path | None = None) -> None:
795
+ """Clean artifacts directory for fresh run.
796
+
797
+ Preserves hypotheses*.yaml, hypotheses_archive.yaml, and context.yaml.
798
+ """
799
+ d = artifacts_dir or DEFAULT_ARTIFACTS_DIR
800
+ if d.exists():
801
+ for f in d.iterdir():
802
+ if f.is_file():
803
+ # Preserve hypothesis and context files across clean
804
+ if f.name.startswith("hypotheses") or f.name == "context.yaml":
805
+ continue
806
+ f.unlink()
807
+ elif f.is_dir():
808
+ import shutil
809
+
810
+ shutil.rmtree(f)
811
+ d.mkdir(parents=True, exist_ok=True)
812
+
813
+
814
+ # ── Programmatic verification ────────────────────────────────────────
815
+
816
+
817
+ def _verify_test_phase(state: dict | None = None) -> tuple[bool, str]:
818
+ """Run automated test suite, linter, and optional benchmark.
819
+
820
+ Executed automatically during TEST phase's cmd_end. Runs make test
821
+ and make lint with 120s timeout each. If a benchmark command is
822
+ configured (via --benchmark on cmd_new), runs it after tests pass
823
+ and tracks the numeric score across iterations.
824
+ Failure auto-rejects back to IMPLEMENT phase.
825
+ """
826
+ results = []
827
+ for cmd_name, cmd in [("test", "make test"), ("lint", "make lint")]:
828
+ try:
829
+ result = subprocess.run(
830
+ cmd,
831
+ shell=True,
832
+ capture_output=True,
833
+ text=True,
834
+ timeout=120,
835
+ cwd=str(PROJECT_ROOT),
836
+ )
837
+ if result.returncode == 0:
838
+ results.append(f"{cmd_name}: PASS")
839
+ else:
840
+ output = result.stdout[-500:] if result.stdout else result.stderr[-500:]
841
+ results.append(f"{cmd_name}: FAIL\n{output}")
842
+ return False, "\n".join(results)
843
+ except subprocess.TimeoutExpired:
844
+ results.append(f"{cmd_name}: TIMEOUT (120s)")
845
+ return False, "\n".join(results)
846
+ except FileNotFoundError:
847
+ results.append(f"{cmd_name}: Makefile target not found, skipping")
848
+
849
+ # Run benchmark if configured
850
+ # The benchmark is always a generative instruction - text that tells the
851
+ # orchestrating Claude what to evaluate. It is NOT a shell command.
852
+ # Claude reads the referenced file, evaluates each checklist item,
853
+ # updates the file, and reports the violation count.
854
+ benchmark_cmd = (state or {}).get("benchmark_cmd", "")
855
+ if benchmark_cmd:
856
+ results.append(f"benchmark (generative): {benchmark_cmd}")
857
+ results.append(
858
+ " INSTRUCTION: Read the benchmark file, evaluate each [ ] item,")
859
+ results.append(
860
+ " update [ ] -> [x] for passing items, add new [ ] items if discovered,")
861
+ results.append(
862
+ " then report the count of remaining [ ] items as the violation score.")
863
+ results.append(
864
+ " The orchestrating agent MUST follow this instruction before proceeding.")
865
+
866
+ return True, "\n".join(results)
867
+
868
+
869
+ # ── Claude evaluation ────────────────────────────────────────────────
870
+
871
+
872
+ def _claude_evaluate(
873
+ prompt: str,
874
+ timeout: int = 60,
875
+ ) -> tuple[bool, str]:
876
+ """Run claude -p with a PASS/FAIL evaluation prompt.
877
+
878
+ Used by readback and gatekeeper gates for independent validation.
879
+ Strips the CLAUDECODE environment variable to prevent subprocess
880
+ hang (claude-agent-sdk detects it and enters degraded mode).
881
+ Uses sonnet model with max-turns 3 and 60s timeout.
882
+ Logs every prompt+response to artifacts/logs/ for debugging.
883
+ """
884
+ env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
885
+
886
+ try:
887
+ result = subprocess.run(
888
+ [
889
+ "claude",
890
+ "-p",
891
+ prompt,
892
+ "--model",
893
+ "sonnet",
894
+ "--max-turns",
895
+ "3",
896
+ ],
897
+ capture_output=True,
898
+ text=True,
899
+ timeout=timeout,
900
+ env=env,
901
+ cwd=str(PROJECT_ROOT),
902
+ )
903
+ output = result.stdout.strip()
904
+ first_line = output.split("\n")[0].strip("*#> ").strip().upper()
905
+ passed = first_line.startswith("PASS")
906
+ except FileNotFoundError:
907
+ passed, output = False, "FAIL: claude CLI not found."
908
+ except subprocess.TimeoutExpired:
909
+ passed, output = False, f"FAIL: claude -p timed out ({timeout}s)."
910
+
911
+ # Log for tracing
912
+ log_dir = DEFAULT_ARTIFACTS_DIR / "logs"
913
+ log_dir.mkdir(parents=True, exist_ok=True)
914
+ ts = _now().replace(":", "-")
915
+ log_file = log_dir / f"eval_{ts}.log"
916
+ log_file.write_text(
917
+ f"PROMPT:\n{prompt}\n\nRESPONSE:\n{output}\n\nRESULT: {'PASS' if passed else 'FAIL'}\n",
918
+ encoding="utf-8",
919
+ )
920
+
921
+ return passed, output
922
+
923
+
924
+ def _readback_validate(
925
+ phase: str,
926
+ understanding: str,
927
+ instructions: str,
928
+ ) -> tuple[bool, str]:
929
+ """Validate agent understanding before phase execution via claude -p.
930
+
931
+ This is a BLOCKING gate at phase start. The agent provides a brief
932
+ understanding of what the phase requires, and an independent Claude
933
+ session evaluates whether it captures the essential requirements.
934
+ If readback fails, the phase stays PENDING until retried.
935
+ Prompt template loaded from agents.yaml gates.readback.
936
+ """
937
+ obj_line = ""
938
+ action_part = instructions
939
+ for marker in [
940
+ "**Goal",
941
+ "**MANDATORY",
942
+ "**CRITICAL",
943
+ "**Execution",
944
+ "### Agent",
945
+ "**Actions",
946
+ ]:
947
+ idx = instructions.find(marker)
948
+ if idx >= 0:
949
+ obj_line = instructions[:idx].replace("\n", " ").strip()[:150]
950
+ action_part = instructions[idx:]
951
+ break
952
+ action_abbrev = action_part[:500].replace("\n", " ").strip()
953
+ gate_key = _resolve_gate(phase, "readback")
954
+ gate_template = _MODEL.gates.get(gate_key)
955
+ prompt = (gate_template.prompt if gate_template else "").format_map(collections.defaultdict(str, {
956
+ "phase": phase,
957
+ "objective": obj_line,
958
+ "instructions": action_abbrev,
959
+ "understanding": understanding,
960
+ }))
961
+ return _claude_evaluate(prompt)
962
+
963
+
964
+ def _gatekeeper_validate(
965
+ phase: str,
966
+ state: dict,
967
+ evidence: str = "",
968
+ ) -> tuple[bool, str]:
969
+ """Validate phase execution quality against exit criteria via claude -p.
970
+
971
+ This is a BLOCKING gate at phase end. An independent Claude session
972
+ evaluates whether the agent's evidence satisfies the phase's exit
973
+ criteria. ASK response is treated as BLOCK (not pass) - the agent
974
+ must retry with better evidence.
975
+ Prompt template loaded from agents.yaml gates.gatekeeper.
976
+ """
977
+ agents = state.get("phase_agents", {}).get(phase, [])
978
+ output = state.get("phase_outputs", {}).get(phase, "")
979
+ readback = state.get("readbacks", {}).get(phase, {})
980
+ agent_key = _resolve_agents(phase)
981
+ required_agents = PHASE_AGENTS.get(agent_key, [])
982
+
983
+ exit_fn = _PHASE_END.get(phase)
984
+ exit_criteria = exit_fn() if exit_fn else f"No exit criteria defined for {phase}"
985
+
986
+ gate_key = _resolve_gate(phase, "gatekeeper")
987
+ gate_template = _MODEL.gates.get(gate_key)
988
+ prompt = (gate_template.prompt if gate_template else "").format_map(collections.defaultdict(str, {
989
+ "phase": phase,
990
+ "exit_criteria": exit_criteria[:400],
991
+ "required_agents": ", ".join(required_agents) if required_agents else "none",
992
+ "recorded_agents": ", ".join(agents) if agents else "NONE",
993
+ "output_status": f"yes ({len(output)} chars)" if output else "no",
994
+ "readback_status": "PASS" if readback.get("passed") else ("FAIL" if readback else "not done"),
995
+ "benchmark_configured": "yes" if state.get("benchmark_cmd") else "no",
996
+ "evidence": evidence if evidence else "(no report provided)",
997
+ }))
998
+ passed, explanation = _claude_evaluate(prompt)
999
+
1000
+ # ASK response = BLOCK (not pass)
1001
+ first_line = explanation.split("\n")[0].strip("*#> ").strip().upper() if explanation else ""
1002
+ if first_line.startswith("ASK"):
1003
+ print("\n" + _msg("gatekeeper_question", explanation=explanation))
1004
+ return False, f"ASK: {explanation}"
1005
+
1006
+ return passed, explanation
1007
+
1008
+
1009
+ def _gatekeeper_evaluate_skip(
1010
+ phase: str,
1011
+ reason: str,
1012
+ state: dict,
1013
+ ) -> tuple[bool, str]:
1014
+ """Gatekeeper decides if a phase skip is justified."""
1015
+ objective = state.get("objective", "not set")
1016
+ iteration = state.get("iteration", "?")
1017
+ itype = state.get("type", "?")
1018
+
1019
+ instructions_fn = _PHASE_START.get(phase)
1020
+ instructions = instructions_fn() if instructions_fn else f"Phase {phase}"
1021
+ abbrev = instructions[:300].replace("\n", " ").strip()
1022
+
1023
+ gate_template = _MODEL.gates.get("gatekeeper_skip", None)
1024
+ prompt = (gate_template.prompt if gate_template else "").format_map(collections.defaultdict(str, {
1025
+ "phase": phase,
1026
+ "iteration": str(iteration),
1027
+ "itype": itype,
1028
+ "objective": objective[:150],
1029
+ "phase_purpose": abbrev,
1030
+ "reason": reason,
1031
+ }))
1032
+ passed, output = _claude_evaluate(prompt)
1033
+ first_line = output.split("\n")[0].strip("*#> ").strip().upper() if output else ""
1034
+ approved = first_line.startswith("APPROVE")
1035
+ return approved, output
1036
+
1037
+
1038
+ def _gatekeeper_evaluate_force_skip(
1039
+ phase: str,
1040
+ reason: str,
1041
+ state: dict,
1042
+ ) -> tuple[bool, str]:
1043
+ """Very conservative gatekeeper for force-skipping REQUIRED phases.
1044
+
1045
+ Required phases exist for a reason. Force-skip should only be approved
1046
+ when:
1047
+ - The iteration is being stopped early (all work done)
1048
+ - The phase was already executed in substance
1049
+ - External constraint makes the phase impossible
1050
+ """
1051
+ iteration = state.get("iteration", "?")
1052
+ completed = state.get("completed_phases", [])
1053
+
1054
+ gate_template = _MODEL.gates.get("gatekeeper_force_skip", None)
1055
+ prompt = (gate_template.prompt if gate_template else "").format_map(collections.defaultdict(str, {
1056
+ "phase": phase,
1057
+ "iteration": str(iteration),
1058
+ "completed_phases": ", ".join(completed) if completed else "none",
1059
+ "reason": reason,
1060
+ }))
1061
+ passed, output = _claude_evaluate(prompt)
1062
+ first_line = output.split("\n")[0].strip("*#> ").strip().upper() if output else ""
1063
+ approved = first_line.startswith("APPROVE")
1064
+ return approved, output
1065
+
1066
+
1067
+ # ── Banner and footer ───────────────────────────────────────────────
1068
+
1069
+
1070
+ def _banner(phase: str, action: str, state: dict) -> str:
1071
+ """Render the phase header banner with iteration progress.
1072
+
1073
+ Displays iteration number, phase position, objective, and a progress
1074
+ bar showing completed/current/pending phases. Template loaded from
1075
+ app.yaml banner.header. Called at the start of cmd_start and cmd_end.
1076
+ """
1077
+ iteration = state.get("iteration", "?")
1078
+ itype = state.get("type", "?")
1079
+ phases = ITERATION_TYPES[itype]["phases"]
1080
+ phase_idx = phases.index(phase) + 1 if phase in phases else 0
1081
+ total = len(phases)
1082
+
1083
+ _banner_tmpl = _MODEL.app.banner
1084
+ progress_parts = []
1085
+ for p in phases:
1086
+ if p == phase:
1087
+ progress_parts.append(_banner_tmpl.progress_current.format_map({"p": p}))
1088
+ elif p in state.get("completed_phases", []):
1089
+ progress_parts.append(_banner_tmpl.progress_done.format_map({"p": p}))
1090
+ else:
1091
+ progress_parts.append(p)
1092
+ progress = " -> ".join(progress_parts)
1093
+
1094
+ rejected = state.get("rejected_count", 0)
1095
+ reject_info = f" | REJECTED {rejected}x" if rejected else ""
1096
+ objective = state.get("objective", "")
1097
+ total_iters = state.get("total_iterations", 1)
1098
+ wf_def = _MODEL.workflow_types.get(itype)
1099
+ if wf_def and wf_def.dependency:
1100
+ iter_label = itype.upper()
1101
+ elif total_iters > 1:
1102
+ iter_label = f"{iteration}/{total_iters}"
1103
+ else:
1104
+ iter_label = str(iteration)
1105
+
1106
+ template = _banner_tmpl.header
1107
+ ctx = {
1108
+ "header_line": _HDR_CHAR * _HDR_WIDTH,
1109
+ "iter_label": iter_label,
1110
+ "itype": itype,
1111
+ "action": action,
1112
+ "phase_idx": phase_idx,
1113
+ "total": total,
1114
+ "phase": phase,
1115
+ "reject_info": reject_info,
1116
+ "objective": objective,
1117
+ "progress": progress,
1118
+ }
1119
+ return template.format_map(collections.defaultdict(str, ctx))
1120
+
1121
+
1122
+ def _footer(phase: str, status: str, state: dict) -> str:
1123
+ """Render the phase footer with next-step guidance.
1124
+
1125
+ Three variants loaded from app.yaml: 'start' (reminds agent of
1126
+ claw commands), 'end' (directs to next phase), 'final' (last phase
1127
+ in iteration). Provides the command hints that guide autonomous
1128
+ execution through the phase sequence.
1129
+ """
1130
+ iteration = state.get("iteration", "?")
1131
+ itype = state.get("type", "?")
1132
+ _footer_tmpl = _MODEL.app.footer
1133
+ ctx = {
1134
+ "separator_line": _SEP_CHAR * _SEP_WIDTH,
1135
+ "iteration": iteration,
1136
+ "itype": itype,
1137
+ "phase": phase,
1138
+ "cmd": CMD,
1139
+ }
1140
+
1141
+ if status == "start":
1142
+ return _footer_tmpl.start.format_map(collections.defaultdict(str, ctx))
1143
+ else:
1144
+ nxt = _next_phase(state)
1145
+ if nxt:
1146
+ ctx["nxt"] = nxt
1147
+ return _footer_tmpl.end.format_map(collections.defaultdict(str, ctx))
1148
+ else:
1149
+ return _footer_tmpl.final.format_map(collections.defaultdict(str, ctx))
1150
+
1151
+
1152
+ # ── Auto-action helpers ──────────────────────────────────────────────
1153
+
1154
+
1155
+ def _run_hypothesis_gc() -> None:
1156
+ """Archive DONE and REMOVED hypotheses after HYPOTHESIS phase.
1157
+
1158
+ Auto-action triggered when HYPOTHESIS phase gatekeeper passes.
1159
+ Moves hypotheses with status DONE or REMOVED from the active
1160
+ catalogue to hypotheses_archive.yaml, keeping the working list
1161
+ clean for future iterations.
1162
+ """
1163
+ hyps = _load_yaml_list(HYPOTHESES_FILE)
1164
+ if not hyps:
1165
+ print(_msg("hypothesis_gc_none"))
1166
+ return
1167
+
1168
+ active = []
1169
+ archived = []
1170
+ for h in hyps:
1171
+ status = h.get("status", "").upper()
1172
+ if status in ("DONE", "REMOVED"):
1173
+ archived.append(h)
1174
+ else:
1175
+ active.append(h)
1176
+
1177
+ if not archived:
1178
+ print(_msg("hypothesis_gc_no_archive", count=len(active)))
1179
+ return
1180
+
1181
+ archive_path = DEFAULT_ARTIFACTS_DIR / "hypotheses_archive.yaml"
1182
+ existing_archive = _load_yaml_list(archive_path)
1183
+ existing_archive.extend(archived)
1184
+ archive_path.write_text(_yaml_dump(existing_archive))
1185
+
1186
+ HYPOTHESES_FILE.write_text(_yaml_dump(active))
1187
+
1188
+ print(_msg("hypothesis_gc_archived", count=len(archived), path=archive_path.name))
1189
+ print(_msg("hypothesis_gc_active", count=len(active)))
1190
+ for h in active:
1191
+ print(_msg("hypothesis_gc_item", hid=h.get("id", "?"), status=h.get("status", "?"), hyp=h.get("hypothesis", "?")[:80]))
1192
+
1193
+
1194
+ def _run_summary(state: dict) -> None:
1195
+ """Write iteration_N.md executive summary to artifacts directory.
1196
+
1197
+ Auto-action triggered after RECORD phase completes. Compiles
1198
+ research findings, hypotheses, plan, implementation evidence,
1199
+ and review verdicts into a single markdown summary file for
1200
+ the iteration audit trail.
1201
+ """
1202
+ iteration = state["iteration"]
1203
+ outputs = state.get("phase_outputs", {})
1204
+ agents = state.get("phase_agents", {})
1205
+ readbacks = state.get("readbacks", {})
1206
+ rejected = state.get("rejected_count", 0)
1207
+ objective = state.get("objective", "not set")
1208
+ completed = state.get("completed_phases", [])
1209
+ itype = state.get("type", "?")
1210
+
1211
+ iteration_plan = state.get("iteration_plan", "")
1212
+ scope = ""
1213
+ if iteration_plan:
1214
+ for line in iteration_plan.split("\n"):
1215
+ if f"ITERATION {iteration}:" in line.upper():
1216
+ scope = line.strip()
1217
+ break
1218
+
1219
+ total_iters = state.get("total_iterations", 1)
1220
+ lines = [
1221
+ f"# Iteration {iteration}/{total_iters} - Executive Summary",
1222
+ "",
1223
+ f"**Scope**: {scope if scope else 'see plan'}<br>",
1224
+ f"**Objective**: {objective}<br>",
1225
+ f"**Type**: {itype}<br>",
1226
+ f"**Phases completed**: {', '.join(completed) if completed else 'none'}<br>",
1227
+ f"**Rejections**: {rejected}<br>",
1228
+ f"**Started**: {state.get('started_at', '?')}",
1229
+ "",
1230
+ ]
1231
+
1232
+ if "RESEARCH" in outputs:
1233
+ lines.append("## Research Findings")
1234
+ lines.append("")
1235
+ for line in outputs["RESEARCH"].split("\n"):
1236
+ if line.strip() and not line.startswith(("#", "-", "|")):
1237
+ lines.append(f"{line}<br>")
1238
+ else:
1239
+ lines.append(line)
1240
+ lines.append("")
1241
+
1242
+ if "HYPOTHESIS" in outputs:
1243
+ lines.append("## Hypotheses")
1244
+ lines.append("")
1245
+ for line in outputs["HYPOTHESIS"].split("\n"):
1246
+ if line.strip() and not line.startswith(("#", "-", "|")):
1247
+ lines.append(f"{line}<br>")
1248
+ else:
1249
+ lines.append(line)
1250
+ lines.append("")
1251
+
1252
+ if "PLAN" in outputs:
1253
+ lines.append("## Plan")
1254
+ lines.append("")
1255
+ for line in outputs["PLAN"].split("\n"):
1256
+ if line.strip() and not line.startswith(("#", "-", "|")):
1257
+ lines.append(f"{line}<br>")
1258
+ else:
1259
+ lines.append(line)
1260
+ lines.append("")
1261
+
1262
+ for phase_name in ["IMPLEMENT", "TEST", "REVIEW"]:
1263
+ if phase_name in outputs:
1264
+ lines.append(f"## {phase_name.title()}")
1265
+ lines.append("")
1266
+ for line in outputs[phase_name].split("\n"):
1267
+ if line.strip() and not line.startswith(("#", "-", "|")):
1268
+ lines.append(f"{line}<br>")
1269
+ else:
1270
+ lines.append(line)
1271
+ lines.append("")
1272
+
1273
+ lines.append("## Execution Metrics")
1274
+ lines.append("")
1275
+ if agents:
1276
+ total_agents = sum(len(v) for v in agents.values())
1277
+ lines.append(f"- {total_agents} agents spawned across {len(agents)} phases<br>")
1278
+ for p, agent_list in agents.items():
1279
+ lines.append(f" - **{p}**: {', '.join(agent_list)}<br>")
1280
+ if readbacks:
1281
+ passed = sum(1 for r in readbacks.values() if r.get("passed"))
1282
+ lines.append(f"- Readbacks: {passed}/{len(readbacks)} passed<br>")
1283
+ gatekeepers = state.get("gatekeepers", {})
1284
+ if gatekeepers:
1285
+ gk_passed = sum(1 for g in gatekeepers.values() if g.get("passed"))
1286
+ lines.append(f"- Gatekeepers: {gk_passed}/{len(gatekeepers)} passed<br>")
1287
+ if rejected:
1288
+ lines.append(f"- Rejections: {rejected}<br>")
1289
+ lines.append("")
1290
+
1291
+ failures = _count_iteration_failures(iteration)
1292
+ if failures:
1293
+ lines.append("## Failures")
1294
+ lines.append("")
1295
+ for f in failures:
1296
+ lines.append(f"- [{f.get('mode', '?')}] {f.get('description', '?')}")
1297
+ lines.append("")
1298
+
1299
+ summary_path = DEFAULT_ARTIFACTS_DIR / f"iteration_{iteration}.md"
1300
+ summary_path.write_text("\n".join(lines), encoding="utf-8")
1301
+ print(_msg("summary_written", path=summary_path))
1302
+
1303
+
1304
+ def _run_next_iteration(state: dict) -> None:
1305
+ """Advance to the next iteration after NEXT phase completes.
1306
+
1307
+ Resets phase_outputs and phase_agents for the new iteration,
1308
+ preserves hypothesis catalogue and failure log, increments the
1309
+ iteration counter, and displays the new iteration info.
1310
+ If all requested iterations are done, reports completion.
1311
+ """
1312
+ total = state.get("total_iterations", 1)
1313
+ current = state["iteration"]
1314
+ remaining = total - current
1315
+
1316
+ if remaining <= 0:
1317
+ print("\n" + _msg("iteration_complete", total=total))
1318
+ print(_msg("iteration_new_cmd", cmd=CMD, itype=state["type"]))
1319
+ return
1320
+
1321
+ new_iteration = current + 1
1322
+
1323
+ # Switch from dependency workflow to parent workflow after planning iteration completes
1324
+ parent = state.get("parent_type", "")
1325
+ if parent and parent != state["type"]:
1326
+ wf_def = _MODEL.workflow_types.get(state["type"])
1327
+ if wf_def and wf_def.dependency:
1328
+ state["type"] = parent
1329
+ state.pop("parent_type", None)
1330
+
1331
+ itype_info = ITERATION_TYPES[state["type"]]
1332
+ first_phase = itype_info["phases"][0]
1333
+
1334
+ # Preserve iteration_plan from iteration 0
1335
+ iteration_plan = state.get("iteration_plan", "") or state.get("phase_outputs", {}).get(
1336
+ "PLAN", ""
1337
+ )
1338
+
1339
+ state["iteration"] = new_iteration
1340
+ state["current_phase"] = first_phase
1341
+ state["phase_status"] = "pending"
1342
+ state["completed_phases"] = []
1343
+ state["skipped_phases"] = []
1344
+ state["rejected_count"] = 0
1345
+ state["started_at"] = _now()
1346
+ # Reset phase_outputs AND phase_agents for new iteration
1347
+ state["phase_outputs"] = {}
1348
+ state["phase_agents"] = {}
1349
+ if iteration_plan:
1350
+ state["iteration_plan"] = iteration_plan
1351
+ _save_state(state)
1352
+ _append_log(
1353
+ {
1354
+ "iteration": new_iteration,
1355
+ "type": state["type"],
1356
+ "event": "next_iteration",
1357
+ "objective": state["objective"],
1358
+ }
1359
+ )
1360
+
1361
+ label = f"{new_iteration}/{total}" if total > 1 else str(new_iteration)
1362
+ print("\n" + _msg("iteration_started_short", iter_label=label, itype=state["type"]))
1363
+ print(_msg("iteration_objective", objective=state["objective"]))
1364
+ print(_msg("iteration_remaining", remaining=total - new_iteration))
1365
+ if iteration_plan:
1366
+ print("\n" + _msg("iteration_plan_header"))
1367
+ print(_msg("iteration_plan_content", plan=iteration_plan[:200]))
1368
+
1369
+ prior_failures = _count_iteration_failures(current)
1370
+ if prior_failures:
1371
+ print("\n" + _msg("prior_failures_header_short", count=len(prior_failures)))
1372
+ for f in prior_failures[-3:]:
1373
+ print(_msg("prior_failure_item", mode=f.get("mode", "?"), description=f.get("description", "?")))
1374
+
1375
+ print("\n" + _msg("iteration_begin_short", cmd=CMD))
1376
+
1377
+
1378
+ # ── Command functions ───────────────────────────────────────────────
1379
+
1380
+
1381
+ def cmd_new(args) -> None:
1382
+ """Start a new iteration request.
1383
+
1384
+ Creates initial state with objective, iteration count, type, and
1385
+ optional benchmark command. Auto-starts iteration 0 (planning)
1386
+ when multiple iterations are requested with 'full' type.
1387
+ Cleans prior artifacts by default (preserves hypotheses).
1388
+ """
1389
+ itype = args.type
1390
+ if itype not in ITERATION_TYPES:
1391
+ print(
1392
+ f"Unknown type: {itype}. Choose: {', '.join(ITERATION_TYPES)}",
1393
+ file=sys.stderr,
1394
+ )
1395
+ sys.exit(1)
1396
+
1397
+ # Block dependency workflows from direct invocation
1398
+ wf_def = _MODEL.workflow_types.get(itype)
1399
+ if wf_def and wf_def.dependency:
1400
+ print(_msg("dependency_blocked", itype=itype), file=sys.stderr)
1401
+ sys.exit(1)
1402
+
1403
+ total_iterations = getattr(args, "iterations", 1)
1404
+
1405
+ # --dry-run: validate and print execution plan, no state files
1406
+ if getattr(args, "dry_run", False):
1407
+ _dry_run(itype, total_iterations)
1408
+ return
1409
+
1410
+ # Read iteration counter BEFORE cleaning (clean wipes state file)
1411
+ last_iteration = _read_last_iteration()
1412
+
1413
+ # Clean artifacts from prior runs (default: yes)
1414
+ if getattr(args, "clean", True):
1415
+ _clean_artifacts_dir()
1416
+ print(_msg("cleaned") + "\n")
1417
+
1418
+ old_state = _load_state()
1419
+ iteration = max(
1420
+ (old_state["iteration"] + 1) if old_state else 1,
1421
+ last_iteration + 1,
1422
+ )
1423
+
1424
+ # Auto-run dependency workflow (iteration 0) when configured
1425
+ run_type = itype
1426
+ if wf_def and wf_def.depends_on and total_iterations > 1:
1427
+ dep_wf = _MODEL.workflow_types.get(wf_def.depends_on)
1428
+ if dep_wf:
1429
+ iteration = 0
1430
+ run_type = wf_def.depends_on
1431
+
1432
+ type_info = ITERATION_TYPES[run_type]
1433
+ first_phase = type_info["phases"][0]
1434
+
1435
+ objective = args.objective
1436
+
1437
+ benchmark_cmd = getattr(args, "benchmark", "") or ""
1438
+ state = {
1439
+ "iteration": iteration,
1440
+ "total_iterations": total_iterations,
1441
+ "type": run_type,
1442
+ "objective": objective,
1443
+ "benchmark_cmd": benchmark_cmd,
1444
+ "benchmark_scores": [],
1445
+ "current_phase": first_phase,
1446
+ "phase_status": "pending",
1447
+ "completed_phases": [],
1448
+ "skipped_phases": [],
1449
+ "rejected_count": 0,
1450
+ "started_at": _now(),
1451
+ "phase_outputs": {},
1452
+ "phase_agents": {},
1453
+ "parent_type": itype if run_type != itype else "",
1454
+ }
1455
+ _save_state(state)
1456
+ _save_objective(objective, total_iterations)
1457
+ _append_log(
1458
+ {
1459
+ "iteration": iteration,
1460
+ "type": run_type,
1461
+ "event": "new_iteration",
1462
+ "objective": objective,
1463
+ }
1464
+ )
1465
+
1466
+ run_wf = _MODEL.workflow_types.get(run_type)
1467
+ if run_wf and run_wf.dependency:
1468
+ iter_label = f"{run_type.upper()} (before {total_iterations} iterations)"
1469
+ elif total_iterations > 1:
1470
+ iter_label = f"{iteration} of {total_iterations}"
1471
+ else:
1472
+ iter_label = str(iteration)
1473
+ print(_msg("iteration_started", iter_label=iter_label, itype=run_type, description=type_info["description"]))
1474
+ print("\n" + _msg("iteration_objective", objective=objective))
1475
+ if total_iterations > 1:
1476
+ print(_msg("iteration_requested", total=total_iterations))
1477
+ if run_wf and run_wf.dependency:
1478
+ print("\n" + _msg("dependency_purpose", description=run_wf.description))
1479
+ print("\n" + _msg("iteration_phases", phases=" -> ".join(type_info["phases"])))
1480
+ print(_msg("iteration_required", required=", ".join(type_info["required"])))
1481
+ if type_info["skippable"]:
1482
+ print(_msg("iteration_skippable", skippable=", ".join(type_info["skippable"])))
1483
+
1484
+ # Show prior failures if any
1485
+ if old_state:
1486
+ prior_failures = _count_iteration_failures(old_state["iteration"])
1487
+ if prior_failures:
1488
+ print("\n" + _msg("prior_failures_header", count=len(prior_failures)))
1489
+ for f in prior_failures[-3:]:
1490
+ print(_msg("prior_failure_item_full", mode=f.get("mode", "?"), description=f.get("description", "?")))
1491
+
1492
+ print("\n" + _msg("iteration_begin", cmd=CMD))
1493
+
1494
+
1495
+ def cmd_start(args) -> None:
1496
+ """Enter current phase with BLOCKING readback validation.
1497
+
1498
+ Loads phase instructions from YAML, runs readback gate via claude -p
1499
+ to validate agent understanding, then displays the phase instructions
1500
+ with banner, agent definitions, and user context if provided.
1501
+ Phase stays PENDING if readback fails.
1502
+ """
1503
+ state = _load_state()
1504
+ if not state:
1505
+ print(_msg("no_active_start"), file=sys.stderr)
1506
+ print(_msg("no_active_start_cmd", cmd=CMD), file=sys.stderr)
1507
+ sys.exit(1)
1508
+
1509
+ phase = state["current_phase"]
1510
+
1511
+ # FSM guards against starting from in_progress (raises ValueError)
1512
+ try:
1513
+ _fire_fsm(FSMEvent.START, state) # pending -> readback
1514
+ except ValueError:
1515
+ print(_msg("phase_in_progress", phase=phase), file=sys.stderr)
1516
+ print(_msg("phase_in_progress_cmd", cmd=CMD), file=sys.stderr)
1517
+ sys.exit(1)
1518
+
1519
+ understanding = getattr(args, "understanding", None)
1520
+ if not understanding:
1521
+ print(_msg("understanding_required"), file=sys.stderr)
1522
+ print(_msg("understanding_required_cmd", cmd=CMD), file=sys.stderr)
1523
+ sys.exit(1)
1524
+
1525
+ # Get phase instructions for readback validation
1526
+ instructions_fn = _PHASE_START.get(phase)
1527
+ instructions = instructions_fn() if instructions_fn else f"Phase {phase}"
1528
+
1529
+ # BLOCKING readback validation
1530
+ print(_msg("readback_separator"))
1531
+ print(_msg("readback_validating", phase=phase))
1532
+ print(_msg("readback_separator"))
1533
+ passed, explanation = _readback_validate(
1534
+ phase,
1535
+ understanding,
1536
+ instructions,
1537
+ )
1538
+
1539
+ # Save readback artifact (pass or fail)
1540
+ pdir = _phase_dir(state)
1541
+ readback_file = pdir / "readback.md"
1542
+ readback_file.write_text(
1543
+ f"# Readback - {phase}\n\n"
1544
+ f"## Agent Understanding\n{understanding}\n\n"
1545
+ f"## Validation Result\n{'PASS' if passed else 'FAIL'}\n\n"
1546
+ f"## Explanation\n{explanation}\n",
1547
+ encoding="utf-8",
1548
+ )
1549
+
1550
+ # Update state with readback result
1551
+ if "readbacks" not in state:
1552
+ state["readbacks"] = {}
1553
+ state["readbacks"][phase] = {"passed": passed, "at": _now()}
1554
+
1555
+ _append_log(
1556
+ {
1557
+ "iteration": state["iteration"],
1558
+ "phase": phase,
1559
+ "event": "readback",
1560
+ "passed": passed,
1561
+ }
1562
+ )
1563
+
1564
+ if not passed:
1565
+ # Readback failed - return to pending
1566
+ _fire_fsm(FSMEvent.READBACK_FAIL, state) # readback -> pending
1567
+ _save_state(state)
1568
+ print("\n" + _msg("readback_fail", phase=phase))
1569
+ print(_msg("readback_fail_reason", reason=explanation[:200]))
1570
+ print("\n" + _msg("readback_retry", cmd=CMD))
1571
+ return
1572
+
1573
+ print(_msg("readback_pass", phase=phase) + "\n")
1574
+
1575
+ # Readback passed - advance to in_progress via FSM
1576
+ _fire_fsm(FSMEvent.READBACK_PASS, state) # readback -> in_progress
1577
+ state["phase_started_at"] = _now()
1578
+ _save_state(state)
1579
+ _append_log(
1580
+ {
1581
+ "iteration": state["iteration"],
1582
+ "phase": phase,
1583
+ "event": "phase_start",
1584
+ }
1585
+ )
1586
+
1587
+ header = _banner(phase, "ENTERING", state)
1588
+
1589
+ # Inject ALL user context from context.yaml (broadcast to all phases)
1590
+ body = instructions
1591
+ all_ctx = _load_context()
1592
+ if all_ctx:
1593
+ count = len(all_ctx)
1594
+ body += f"\n\n{count} context message(s) active:\n"
1595
+ body += _msg("user_guidance_header_line") + "\n"
1596
+ body += _msg("user_guidance_header") + "\n"
1597
+ body += _msg("user_guidance_header_line") + "\n\n"
1598
+ for ctx_phase, ctx_msg in all_ctx.items():
1599
+ body += f"**[{ctx_phase}]**: {ctx_msg}\n\n"
1600
+ body += _msg("user_guidance_instruction")
1601
+
1602
+ foot = _footer(phase, "start", state)
1603
+ print(header + body + foot)
1604
+
1605
+
1606
+ def cmd_end(args) -> None:
1607
+ """Complete current phase with gatekeeper validation.
1608
+
1609
+ Validates --agents against required agents from agents.yaml,
1610
+ records output file content, runs TEST automation if in TEST phase,
1611
+ runs gatekeeper gate for quality validation, then advances to
1612
+ next phase. Auto-actions: hypothesis-gc after HYPOTHESIS,
1613
+ summary after RECORD, inline NEXT display after RECORD.
1614
+ """
1615
+ state = _load_state()
1616
+ if not state:
1617
+ print(_msg("no_active"), file=sys.stderr)
1618
+ sys.exit(1)
1619
+
1620
+ phase = state["current_phase"]
1621
+ if state["phase_status"] != "in_progress":
1622
+ print(_msg("phase_not_started", phase=phase), file=sys.stderr)
1623
+ print(_msg("phase_not_started_cmd", cmd=CMD), file=sys.stderr)
1624
+ sys.exit(1)
1625
+
1626
+ # ── Fail-fast: validate ALL inputs at top ──
1627
+ evidence = getattr(args, "evidence", "") or ""
1628
+ agents_str = getattr(args, "agents", "") or ""
1629
+ output_file_str = getattr(args, "output_file", "") or ""
1630
+
1631
+ # Resolve and validate --output-file
1632
+ output_file_path = None
1633
+ output_content = ""
1634
+ if output_file_str:
1635
+ output_file_path = Path(output_file_str).resolve()
1636
+ if not output_file_path.exists():
1637
+ print(_msg("output_file_missing", path=output_file_path), file=sys.stderr)
1638
+ sys.exit(1)
1639
+ output_content = output_file_path.read_text(encoding="utf-8")
1640
+
1641
+ # Parse agents
1642
+ agents = [a.strip() for a in agents_str.split(",") if a.strip()] if agents_str else []
1643
+
1644
+ # Check required agents - resolve via :: namespace
1645
+ required_key = _resolve_agents(phase)
1646
+ required_agents = PHASE_AGENTS.get(required_key, [])
1647
+ if required_agents and agents:
1648
+ missing = [r for r in required_agents if r not in agents]
1649
+ if missing:
1650
+ print(_msg("missing_agents", phase=phase, missing=", ".join(missing)), file=sys.stderr)
1651
+ print(_msg("missing_agents_required", required=", ".join(required_agents)), file=sys.stderr)
1652
+ sys.exit(1)
1653
+ elif required_agents and not agents:
1654
+ print(_msg("requires_agents", phase=phase, required=", ".join(required_agents)), file=sys.stderr)
1655
+ print(_msg("requires_agents_provide", required=",".join(required_agents)), file=sys.stderr)
1656
+ sys.exit(1)
1657
+
1658
+ # ── Record agents BEFORE gatekeeper (so gatekeeper sees them) ──
1659
+ if agents:
1660
+ if "phase_agents" not in state:
1661
+ state["phase_agents"] = {}
1662
+ state["phase_agents"][phase] = agents
1663
+
1664
+ # ── Record output-file (OVERWRITE phase_outputs) ──
1665
+ if output_file_path:
1666
+ if "phase_outputs" not in state:
1667
+ state["phase_outputs"] = {}
1668
+ state["phase_outputs"][phase] = output_content
1669
+
1670
+ # Also save to phase subfolder
1671
+ pdir = _phase_dir(state)
1672
+ output_dest = pdir / "output.md"
1673
+ md_lines = []
1674
+ for line in output_content.split("\n"):
1675
+ if (
1676
+ line.strip()
1677
+ and not line.startswith("#")
1678
+ and not line.startswith("-")
1679
+ and not line.startswith("|")
1680
+ ):
1681
+ md_lines.append(line + "<br>")
1682
+ else:
1683
+ md_lines.append(line)
1684
+ md_content = "\n".join(md_lines)
1685
+ output_dest.write_text(
1686
+ f"# {phase} Output\n\n{md_content}\n",
1687
+ encoding="utf-8",
1688
+ )
1689
+ elif evidence:
1690
+ # Evidence stored as gap-fill only if no --output-file
1691
+ if "phase_outputs" not in state:
1692
+ state["phase_outputs"] = {}
1693
+ if phase not in state["phase_outputs"]:
1694
+ state["phase_outputs"][phase] = evidence
1695
+
1696
+ _save_state(state)
1697
+
1698
+ header = _banner(phase, "COMPLETING", state)
1699
+
1700
+ # ── TEST phase: run automated verification ──
1701
+ if phase == "TEST":
1702
+ print(header)
1703
+ body = _PHASE_END.get(phase, lambda: "")()
1704
+ print(body)
1705
+
1706
+ passed, output = _verify_test_phase(state)
1707
+ print(output)
1708
+
1709
+ if not passed:
1710
+ target = _prev_implementable(state)
1711
+ _fire_fsm(FSMEvent.END, state) # in_progress -> gatekeeper
1712
+ _fire_fsm(FSMEvent.GATE_FAIL, state) # gatekeeper -> in_progress
1713
+ _fire_fsm(FSMEvent.REJECT, state) # in_progress -> rejected
1714
+ _fire_fsm(FSMEvent.ADVANCE, state) # rejected -> pending
1715
+ state["current_phase"] = target
1716
+ state["rejected_count"] = state.get("rejected_count", 0) + 1
1717
+ state.pop("phase_started_at", None)
1718
+ _save_state(state)
1719
+ _append_log(
1720
+ {
1721
+ "iteration": state["iteration"],
1722
+ "phase": phase,
1723
+ "event": "auto_reject",
1724
+ "reason": "tests/lint failed",
1725
+ "target": target,
1726
+ }
1727
+ )
1728
+ _append_failure(
1729
+ {
1730
+ "iteration": state["iteration"],
1731
+ "phase": phase,
1732
+ "mode": "FM-TEST-FAIL",
1733
+ "description": output[:200],
1734
+ }
1735
+ )
1736
+ print("\n" + _msg("tests_fail", target=target))
1737
+ print(_msg("tests_fail_run", cmd=CMD))
1738
+ return
1739
+
1740
+ print("\n" + _msg("tests_pass"))
1741
+
1742
+ else:
1743
+ body = _PHASE_END.get(phase, lambda: "")()
1744
+ print(header + body)
1745
+
1746
+ # ── Gatekeeper: per-phase generative validation ──
1747
+ _fire_fsm(FSMEvent.END, state) # in_progress -> gatekeeper
1748
+ print("\n" + _msg("gatekeeper_separator"))
1749
+ print(_msg("gatekeeper_evaluating", phase=phase))
1750
+ print(_msg("gatekeeper_separator"))
1751
+ gk_passed, gk_output = _gatekeeper_validate(
1752
+ phase,
1753
+ state,
1754
+ evidence,
1755
+ )
1756
+
1757
+ # Save gatekeeper result to phase subfolder
1758
+ pdir = _phase_dir(state)
1759
+ gk_file = pdir / "gatekeeper.md"
1760
+ gk_file.write_text(
1761
+ f"# Gatekeeper - {phase}\n\n"
1762
+ f"## Result\n{'PASS' if gk_passed else 'FAIL'}\n\n"
1763
+ f"## Evaluation\n{gk_output}\n",
1764
+ encoding="utf-8",
1765
+ )
1766
+
1767
+ # Update state
1768
+ if "gatekeepers" not in state:
1769
+ state["gatekeepers"] = {}
1770
+ state["gatekeepers"][phase] = {
1771
+ "passed": gk_passed,
1772
+ "at": _now(),
1773
+ }
1774
+ _save_state(state)
1775
+ _append_log(
1776
+ {
1777
+ "iteration": state["iteration"],
1778
+ "phase": phase,
1779
+ "event": "gatekeeper",
1780
+ "passed": gk_passed,
1781
+ }
1782
+ )
1783
+
1784
+ if not gk_passed:
1785
+ _fire_fsm(FSMEvent.GATE_FAIL, state) # gatekeeper -> in_progress (retry)
1786
+ _save_state(state)
1787
+ print("\n" + _msg("gatekeeper_fail", phase=phase))
1788
+ print(_msg("gatekeeper_fail_reason", reason=gk_output[:300]))
1789
+ print("\n" + _msg("gatekeeper_fail_retry", cmd=CMD))
1790
+ return
1791
+
1792
+ _fire_fsm(FSMEvent.GATE_PASS, state) # gatekeeper -> complete
1793
+ print(_msg("gatekeeper_pass", phase=phase))
1794
+
1795
+ # Mark phase complete and advance
1796
+ state["completed_phases"].append(phase)
1797
+ started_at = state.get("phase_started_at", "")
1798
+
1799
+ nxt = _next_phase(state)
1800
+ if nxt:
1801
+ _fire_fsm(FSMEvent.ADVANCE, state) # complete -> pending
1802
+ state["current_phase"] = nxt
1803
+ else:
1804
+ state["phase_status"] = "iteration_complete"
1805
+
1806
+ state.pop("phase_started_at", None)
1807
+ _save_state(state)
1808
+ _append_log(
1809
+ {
1810
+ "iteration": state["iteration"],
1811
+ "phase": phase,
1812
+ "event": "phase_complete",
1813
+ "started_at": started_at,
1814
+ }
1815
+ )
1816
+
1817
+ # Phase-end executive summary
1818
+ outputs = state.get("phase_outputs", {})
1819
+ agents_map = state.get("phase_agents", {})
1820
+ readbacks = state.get("readbacks", {})
1821
+ gatekeepers = state.get("gatekeepers", {})
1822
+ summary_lines = ["\n" + _msg("phase_complete", phase=phase)]
1823
+ if phase in outputs:
1824
+ out_text = outputs[phase]
1825
+ summary_lines.append(_msg("phase_output", output=out_text[:100]))
1826
+ if phase in agents_map:
1827
+ summary_lines.append(_msg("phase_agents", agents=", ".join(agents_map[phase])))
1828
+ if phase in readbacks:
1829
+ rb = readbacks[phase]
1830
+ summary_lines.append(_msg("phase_readback", status="PASS" if rb.get("passed") else "FAIL"))
1831
+ if phase in gatekeepers:
1832
+ gk = gatekeepers[phase]
1833
+ summary_lines.append(_msg("phase_gatekeeper", status="PASS" if gk.get("passed") else "FAIL"))
1834
+ print("\n".join(summary_lines))
1835
+
1836
+ # ── Auto-actions from phases.yaml auto_actions.on_complete ──
1837
+ if _run_auto_actions(phase, state):
1838
+ return
1839
+
1840
+ print(_footer(phase, "end", state))
1841
+
1842
+
1843
+ def cmd_status(args) -> None:
1844
+ """Show current iteration state with phase progress.
1845
+
1846
+ Displays iteration info, phase checklist with completion markers,
1847
+ agents recorded per phase, failures logged, and next command hint.
1848
+ Useful for resuming work after context loss.
1849
+ """
1850
+ state = _load_state()
1851
+ if not state:
1852
+ print(_msg("no_active"))
1853
+ print("\n" + _msg("no_active_start_full", cmd=CMD))
1854
+ print("\n" + _msg("available_types"))
1855
+ for name, info in ITERATION_TYPES.items():
1856
+ print(_msg("available_type_item", name=name, description=info["description"]))
1857
+ return
1858
+
1859
+ wf_type = state["type"]
1860
+ itype = ITERATION_TYPES[wf_type]
1861
+ phases = itype["phases"]
1862
+ total_iters = state.get("total_iterations", 1)
1863
+ iteration = state.get("iteration", "?")
1864
+
1865
+ wf_def = _MODEL.workflow_types.get(wf_type)
1866
+ if wf_def and wf_def.dependency:
1867
+ iter_label = wf_type.upper()
1868
+ elif total_iters > 1:
1869
+ iter_label = f"{iteration}/{total_iters}"
1870
+ else:
1871
+ iter_label = str(iteration)
1872
+
1873
+ print(_msg("status_header", iter_label=iter_label, itype=wf_type))
1874
+ print(_msg("status_objective", objective=state.get("objective", "?")))
1875
+ print(_msg("status_started", started=state.get("started_at", "?")))
1876
+ print(_msg("status_current", phase=state["current_phase"], status=state["phase_status"]))
1877
+ rejected = state.get("rejected_count", 0)
1878
+ if rejected:
1879
+ print(_msg("status_rejections", count=rejected))
1880
+ lr = state.get("last_rejection", {})
1881
+ if lr:
1882
+ print(_msg("status_last_reject", from_phase=lr.get("from", "?"), reason=lr.get("reason", "?")))
1883
+ print()
1884
+
1885
+ for p in phases:
1886
+ if p in state["completed_phases"]:
1887
+ marker = "[x]"
1888
+ elif p == state["current_phase"]:
1889
+ marker = "[>]" if state["phase_status"] == "in_progress" else "[ ]"
1890
+ elif any(s["phase"] == p for s in state.get("skipped_phases", [])):
1891
+ marker = "[-]"
1892
+ else:
1893
+ marker = "[ ]"
1894
+ req = "*" if p in itype["required"] else " "
1895
+ print(_msg("status_phase_item", marker=marker, p=p, req=req))
1896
+
1897
+ # Show agents recorded per phase
1898
+ agents_map = state.get("phase_agents", {})
1899
+ if agents_map:
1900
+ print("\n" + _msg("status_agents_header"))
1901
+ for p, agent_list in agents_map.items():
1902
+ print(_msg("status_agent_item", phase=p, agents=", ".join(agent_list)))
1903
+
1904
+ # Show failures for this iteration
1905
+ failures = _count_iteration_failures(state["iteration"])
1906
+ if failures:
1907
+ print("\n" + _msg("status_failures_header", count=len(failures)))
1908
+ for f in failures:
1909
+ print(_msg("status_failure_item", mode=f.get("mode", "?"), desc=f.get("description", "?")[:60]))
1910
+
1911
+ print("\n" + _msg("status_required_note"))
1912
+ if state["phase_status"] == "pending":
1913
+ print("\n" + _msg("status_next_start", cmd=CMD))
1914
+ elif state["phase_status"] == "in_progress":
1915
+ print("\n" + _msg("status_next_end", cmd=CMD))
1916
+
1917
+
1918
+ def cmd_reject(args) -> None:
1919
+ """Critic rejects current phase, returning to an earlier phase.
1920
+
1921
+ Rolls back to the most recent IMPLEMENT phase in the sequence,
1922
+ increments rejection count, and logs the rejection reason.
1923
+ Used when review agents find issues that need fixing.
1924
+ """
1925
+ state = _load_state()
1926
+ if not state:
1927
+ print(_msg("no_active"), file=sys.stderr)
1928
+ sys.exit(1)
1929
+
1930
+ phase = state["current_phase"]
1931
+ reason = args.reason or "no reason given"
1932
+
1933
+ # Check reject_to declaration on current phase
1934
+ resolved = _resolve_phase(phase)
1935
+ phase_obj = _MODEL.phases.get(resolved)
1936
+ if phase_obj and phase_obj.reject_to:
1937
+ target = phase_obj.reject_to.get("phase", _prev_implementable(state))
1938
+ else:
1939
+ target = _prev_implementable(state)
1940
+
1941
+ # FSM: reject current phase and advance to target
1942
+ _fire_fsm(FSMEvent.REJECT, state) # in_progress -> rejected
1943
+ _fire_fsm(FSMEvent.ADVANCE, state) # rejected -> pending
1944
+ state["current_phase"] = target
1945
+ state["rejected_count"] = state.get("rejected_count", 0) + 1
1946
+ state["last_rejection"] = {
1947
+ "from": phase,
1948
+ "reason": reason,
1949
+ "at": _now(),
1950
+ }
1951
+ state.pop("phase_started_at", None)
1952
+ _save_state(state)
1953
+
1954
+ _append_log(
1955
+ {
1956
+ "iteration": state["iteration"],
1957
+ "phase": phase,
1958
+ "event": "rejected",
1959
+ "reason": reason,
1960
+ "target": target,
1961
+ }
1962
+ )
1963
+
1964
+ print("\n" + _msg("reject_header", phase=phase, target=target))
1965
+ print(_msg("reject_reason", reason=reason))
1966
+ print(_msg("reject_count", count=state["rejected_count"]))
1967
+ print("\n" + _msg("reject_fix", cmd=CMD))
1968
+
1969
+
1970
+ def cmd_skip(args) -> None:
1971
+ """Skip an optional phase or force-skip a required one.
1972
+
1973
+ Optional phases (skippable: true in workflow.yaml) can be
1974
+ skipped with gatekeeper approval. Required phases need --force
1975
+ flag and pass a conservative gatekeeper that defaults to DENY.
1976
+ """
1977
+ state = _load_state()
1978
+ if not state:
1979
+ print(_msg("no_active"), file=sys.stderr)
1980
+ sys.exit(1)
1981
+
1982
+ phase = state["current_phase"]
1983
+ itype = ITERATION_TYPES[state["type"]]
1984
+ force = getattr(args, "force", False)
1985
+
1986
+ if phase in itype["required"] and not force:
1987
+ print(_msg("skip_blocked", phase=phase, itype=state["type"]), file=sys.stderr)
1988
+ print(_msg("skip_blocked_required", required=", ".join(itype["required"])), file=sys.stderr)
1989
+ print("\n" + _msg("skip_blocked_force"), file=sys.stderr)
1990
+ sys.exit(1)
1991
+
1992
+ reason = args.reason or "no reason given"
1993
+ is_required = phase in itype["required"]
1994
+
1995
+ print(_msg("gatekeeper_skip_separator"))
1996
+ label = "FORCE-SKIP (required phase)" if is_required else "SKIP"
1997
+ print(_msg("gatekeeper_skip_evaluating", label=label, phase=phase))
1998
+ print(_msg("gatekeeper_skip_separator"))
1999
+
2000
+ if is_required:
2001
+ approved, explanation = _gatekeeper_evaluate_force_skip(
2002
+ phase,
2003
+ reason,
2004
+ state,
2005
+ )
2006
+ else:
2007
+ approved, explanation = _gatekeeper_evaluate_skip(
2008
+ phase,
2009
+ reason,
2010
+ state,
2011
+ )
2012
+
2013
+ if not approved:
2014
+ print("\n" + _msg("gatekeeper_skip_denied", phase=phase))
2015
+ print(_msg("gatekeeper_skip_denied_reason", reason=explanation[:300]))
2016
+ print("\n" + _msg("gatekeeper_skip_denied_retry", cmd=CMD))
2017
+ _append_log(
2018
+ {
2019
+ "iteration": state["iteration"],
2020
+ "phase": phase,
2021
+ "event": "skip_denied",
2022
+ "reason": reason,
2023
+ "gatekeeper": explanation[:200],
2024
+ }
2025
+ )
2026
+ return
2027
+
2028
+ print(_msg("gatekeeper_skip_approved", phase=phase))
2029
+
2030
+ state["skipped_phases"].append({"phase": phase, "reason": reason})
2031
+
2032
+ # FSM: skip and advance
2033
+ _fire_fsm(FSMEvent.SKIP, state) # pending -> skipped
2034
+ nxt = _next_phase(state)
2035
+ if nxt:
2036
+ _fire_fsm(FSMEvent.ADVANCE, state) # skipped -> pending
2037
+ state["current_phase"] = nxt
2038
+ else:
2039
+ state["phase_status"] = "iteration_complete"
2040
+
2041
+ _save_state(state)
2042
+ _append_log(
2043
+ {
2044
+ "iteration": state["iteration"],
2045
+ "phase": phase,
2046
+ "event": "phase_skipped",
2047
+ "reason": reason,
2048
+ "gatekeeper": "approved",
2049
+ }
2050
+ )
2051
+
2052
+ print(_msg("skip_approved_msg", phase=phase, reason=reason))
2053
+ if nxt:
2054
+ print("\n" + _msg("skip_next", nxt=nxt))
2055
+ print(_msg("skip_next_cmd", cmd=CMD))
2056
+ else:
2057
+ print("\n" + _msg("skip_iteration_complete"))
2058
+
2059
+
2060
+ def cmd_context(args) -> None:
2061
+ """Inject user guidance into a phase, broadcast to all agents.
2062
+
2063
+ Stores the user's message in context.yaml (persistent across --clean).
2064
+ Displays as a prominent banner in phase instructions. All agents
2065
+ spawned in any phase receive the guidance. Can target a specific
2066
+ phase or the current one.
2067
+ """
2068
+ state = _load_state()
2069
+ if not state:
2070
+ print(_msg("no_active"), file=sys.stderr)
2071
+ sys.exit(1)
2072
+
2073
+ phase = getattr(args, "phase", "") or state["current_phase"]
2074
+ phase = phase.upper()
2075
+ clear = getattr(args, "clear", False)
2076
+
2077
+ if clear:
2078
+ ctx = _load_context()
2079
+ ctx.pop(phase, None)
2080
+ _save_context(ctx)
2081
+ print(_msg("context_cleared", phase=phase))
2082
+ return
2083
+
2084
+ message = args.message
2085
+ if not message:
2086
+ ctx = _load_context()
2087
+ if not ctx:
2088
+ print(_msg("context_none"))
2089
+ else:
2090
+ for p, msg in ctx.items():
2091
+ truncated = msg[:100]
2092
+ ellipsis = "..." if len(msg) > 100 else ""
2093
+ print(_msg("context_item", phase=p, text=truncated + ellipsis))
2094
+ return
2095
+
2096
+ ctx = _load_context()
2097
+ ctx[phase] = message
2098
+ _save_context(ctx)
2099
+ _append_log(
2100
+ {
2101
+ "iteration": state["iteration"],
2102
+ "phase": phase,
2103
+ "event": "user_context",
2104
+ "message": message[:200],
2105
+ }
2106
+ )
2107
+ print(_msg("context_set", phase=phase))
2108
+ print(_msg("context_message", message=message))
2109
+ if state["phase_status"] == "in_progress" and state["current_phase"] == phase:
2110
+ print("\n" + _msg("context_in_progress", cmd=CMD))
2111
+ else:
2112
+ print("\n" + _msg("context_will_show", phase=phase))
2113
+
2114
+
2115
+ def cmd_log_failure(args) -> None:
2116
+ """Log a failure mode found during the iteration.
2117
+
2118
+ Appends to failures.yaml with mode ID, description, iteration,
2119
+ and phase. Failure modes accumulate across iterations and feed
2120
+ into RESEARCH phase context for the next iteration.
2121
+ """
2122
+ state = _load_state()
2123
+ iteration = state["iteration"] if state else 0
2124
+ phase = state["current_phase"] if state else "unknown"
2125
+
2126
+ _append_failure(
2127
+ {
2128
+ "iteration": iteration,
2129
+ "phase": phase,
2130
+ "mode": args.mode,
2131
+ "description": args.desc,
2132
+ }
2133
+ )
2134
+ print(_msg("failure_logged", mode=args.mode, desc=args.desc))
2135
+
2136
+
2137
+ def cmd_failures(args) -> None:
2138
+ """Display the failure log grouped by iteration.
2139
+
2140
+ Shows all logged failure modes with their mode ID, phase,
2141
+ description, and timestamp. Used to review what went wrong
2142
+ across iterations.
2143
+ """
2144
+ if not FAILURES_FILE.exists():
2145
+ print(_msg("no_failures"))
2146
+ return
2147
+
2148
+ entries = _load_yaml_list(FAILURES_FILE)
2149
+
2150
+ if not entries:
2151
+ print(_msg("no_failures"))
2152
+ return
2153
+
2154
+ by_iter: dict[int, list] = {}
2155
+ for e in entries:
2156
+ it = e.get("iteration", 0)
2157
+ by_iter.setdefault(it, []).append(e)
2158
+
2159
+ for it in sorted(by_iter.keys()):
2160
+ print("\n" + _msg("failure_iteration_header", iteration=it))
2161
+ for e in by_iter[it]:
2162
+ mode = e.get("mode", "?")
2163
+ desc = e.get("description", "?")
2164
+ phase = e.get("phase", "?")
2165
+ ts = e.get("timestamp", "?")
2166
+ print(_msg("failure_item", mode=mode, phase=phase, desc=desc, ts=ts))
2167
+
2168
+
2169
+ def cmd_hypotheses(args) -> None:
2170
+ """Display the hypothesis catalogue across all iterations.
2171
+
2172
+ Shows hypothesis ID, star rating average, status, and text.
2173
+ The catalogue persists across iterations - hypotheses marked
2174
+ DONE or REMOVED are archived by hypothesis-gc.
2175
+ """
2176
+ entries = _load_prior_hypotheses()
2177
+ if not entries:
2178
+ print(_msg("no_hypotheses"))
2179
+ return
2180
+
2181
+ for e in entries:
2182
+ hid = e.get("id", "?")
2183
+ status = e.get("status", "?")
2184
+ avg = e.get("avg_score", "?")
2185
+ hyp = e.get("hypothesis", "?")
2186
+ ts = e.get("timestamp", "?")
2187
+ print("\n" + _msg("hypothesis_item", hid=hid, avg=avg, status=status, hyp=hyp[:200], ts=ts))
2188
+
2189
+
2190
+ def cmd_validate(args) -> None:
2191
+ """Run model validation and report any issues found.
2192
+
2193
+ Loads the model from YAML resources, runs validate_model(), and prints
2194
+ each issue in human-readable format with file origin, location, and fix
2195
+ suggestion. Exits with code 0 if the model is valid, 1 if issues found.
2196
+ """
2197
+ issues = validate_model(_MODEL)
2198
+ if not issues:
2199
+ print(_msg("validate_success"))
2200
+ sys.exit(0)
2201
+ print(_msg("validate_issues", count=len(issues)))
2202
+ for i, issue in enumerate(issues, 1):
2203
+ print(_msg("validate_item", num=i, issue=issue))
2204
+ sys.exit(1)
2205
+
2206
+
2207
+ def _dry_run_phase(workflow: str, phase_name: str) -> list[str]:
2208
+ """Print expected agents and gates for one phase. Returns list of issues."""
2209
+ issues: list[str] = []
2210
+ phase_key = _resolve_key(workflow, phase_name, set(_MODEL.phases.keys()))
2211
+ agent_key = _resolve_key(workflow, phase_name, set(_MODEL.agents.keys()))
2212
+ agents = _MODEL.agents.get(agent_key, [])
2213
+
2214
+ gate_phases = {k.rsplit("::", 1)[0] for k in _MODEL.gates if "::" in k}
2215
+ gate_key = _resolve_key(workflow, phase_name, gate_phases)
2216
+ has_rb = f"{gate_key}::readback" in _MODEL.gates
2217
+ has_gk = f"{gate_key}::gatekeeper" in _MODEL.gates
2218
+
2219
+ skippable = any(
2220
+ p.get("skippable") for p in _MODEL.workflow_types[workflow].phases
2221
+ if p["name"] == phase_name
2222
+ )
2223
+ tag = "skip" if skippable else "req"
2224
+ agent_names = ", ".join(a.name for a in agents) if agents else "none"
2225
+ rb = "yes" if has_rb else "NO"
2226
+ gk = "yes" if has_gk else "NO"
2227
+
2228
+ # Report resolution path
2229
+ resolved_display = phase_key if phase_key != phase_name else phase_name
2230
+ print(_msg("dry_run_phase_line", phase=phase_name, tag=tag, agents=agent_names, rb=rb, gk=gk)
2231
+ + f" [{resolved_display}]")
2232
+
2233
+ # Test template rendering with dummy context
2234
+ phase_obj = _MODEL.phases.get(phase_key)
2235
+ if phase_obj:
2236
+ dummy_ctx = collections.defaultdict(str, {v: f"<{v}>" for v in _KNOWN_TEMPLATE_VARS})
2237
+ for attr in ("start", "end", "start_continue", "start_final", "end_continue", "end_final"):
2238
+ text = getattr(phase_obj, attr, "")
2239
+ if text:
2240
+ try:
2241
+ text.format_map(dummy_ctx)
2242
+ except (KeyError, ValueError, IndexError) as exc:
2243
+ issues.append(f"[phases.yaml] '{phase_key}.{attr}': template render error: {exc}")
2244
+
2245
+ return issues
2246
+
2247
+
2248
+ def _dry_run(itype: str, total_iterations: int) -> None:
2249
+ """Print expected execution plan without creating state."""
2250
+ issues = validate_model(_MODEL)
2251
+ if issues:
2252
+ for issue in issues:
2253
+ print(_msg("dry_run_error", issue=issue))
2254
+ sys.exit(1)
2255
+ print(_msg("dry_run_valid"))
2256
+
2257
+ wf = _MODEL.workflow_types[itype]
2258
+ dep_wf = _MODEL.workflow_types.get(wf.depends_on) if wf.depends_on else None
2259
+ template_issues: list[str] = []
2260
+
2261
+ if dep_wf and total_iterations > 1:
2262
+ print(_msg("dry_run_planning_iter", wtype=wf.depends_on))
2263
+ for p in dep_wf.phases:
2264
+ template_issues.extend(_dry_run_phase(wf.depends_on, p["name"]))
2265
+
2266
+ for i in range(1, total_iterations + 1):
2267
+ print(_msg("dry_run_impl_iter", num=i, wtype=itype))
2268
+ for p in wf.phases:
2269
+ template_issues.extend(_dry_run_phase(itype, p["name"]))
2270
+
2271
+ if template_issues:
2272
+ print("\n Template rendering issues:")
2273
+ for ti in template_issues:
2274
+ print(f" {ti}")
2275
+ sys.exit(1)
2276
+
2277
+ # FSM lifecycle simulation - verify all transitions work
2278
+ if dep_wf and total_iterations > 1:
2279
+ dep_reports = _PHASE_FSM.simulate([p["name"] for p in dep_wf.phases])
2280
+ for r in dep_reports:
2281
+ if not r["valid"]:
2282
+ print(_msg("dry_run_error", issue=f"FSM simulation failed for {r['phase']}: {r.get('error', '')}"))
2283
+ sys.exit(1)
2284
+
2285
+ reports = _PHASE_FSM.simulate([p["name"] for p in wf.phases])
2286
+ for r in reports:
2287
+ if not r["valid"]:
2288
+ print(_msg("dry_run_error", issue=f"FSM simulation failed for {r['phase']}: {r.get('error', '')}"))
2289
+ sys.exit(1)
2290
+
2291
+ print(_msg("dry_run_complete"))
2292
+
2293
+
2294
+ def cmd_add_iteration(args) -> None:
2295
+ """Add iterations to an active cycle without restarting."""
2296
+ state = _load_state()
2297
+ if not state:
2298
+ print(_msg("no_active_add_iteration"), file=sys.stderr)
2299
+ sys.exit(1)
2300
+ count = args.count
2301
+ old_total = state["total_iterations"]
2302
+ state["total_iterations"] = old_total + count
2303
+ new_objective = getattr(args, "objective", "") or ""
2304
+ if new_objective:
2305
+ state["objective"] = new_objective
2306
+ _save_state(state)
2307
+ _append_log(
2308
+ {
2309
+ "iteration": state["iteration"],
2310
+ "event": "add_iteration",
2311
+ "count": count,
2312
+ "old_total": old_total,
2313
+ "new_total": old_total + count,
2314
+ }
2315
+ )
2316
+ print(_msg("add_iteration_success", count=count, old=old_total, new=old_total + count))
2317
+
2318
+
2319
+ # ── Main ─────────────────────────────────────────────────────────────
2320
+
2321
+
2322
+ def main(resources_dir: Path | None = None):
2323
+ """CLI entry point. Parses arguments and dispatches to command handlers.
2324
+
2325
+ Args:
2326
+ resources_dir: Path to YAML resource files directory. If None,
2327
+ checks --resources-dir CLI argument, then falls back to
2328
+ a 'resources' subdirectory next to this file.
2329
+ """
2330
+ # Resolve resources_dir: explicit arg > CLI --resources-dir > default
2331
+ if resources_dir is None:
2332
+ # Pre-parse --resources-dir before full argparse (it needs _initialize first)
2333
+ for i, arg in enumerate(sys.argv[1:]):
2334
+ if arg == "--resources-dir" and i + 1 < len(sys.argv) - 1:
2335
+ resources_dir = Path(sys.argv[i + 2])
2336
+ break
2337
+ if arg.startswith("--resources-dir="):
2338
+ resources_dir = Path(arg.split("=", 1)[1])
2339
+ break
2340
+ if resources_dir is None:
2341
+ resources_dir = Path(__file__).parent / "resources"
2342
+
2343
+ _initialize(resources_dir)
2344
+
2345
+ parser = argparse.ArgumentParser(
2346
+ description=_cli("description", ""),
2347
+ formatter_class=argparse.RawDescriptionHelpFormatter,
2348
+ epilog=_cli("epilog", ""),
2349
+ )
2350
+ parser.add_argument(
2351
+ "--resources-dir",
2352
+ default=str(resources_dir),
2353
+ help="Path to YAML resource files directory",
2354
+ )
2355
+ sub = parser.add_subparsers(dest="command")
2356
+
2357
+ # ── new ──
2358
+ p_new = sub.add_parser("new", help=_cli("commands", "new"))
2359
+ p_new.add_argument(
2360
+ "--type",
2361
+ required=True,
2362
+ choices=list(ITERATION_TYPES.keys()),
2363
+ )
2364
+ p_new.add_argument("--objective", required=True, help=_cli("args", "objective"))
2365
+ p_new.add_argument("--iterations", type=int, default=1, help=_cli("args", "iterations"))
2366
+ p_new.add_argument("--benchmark", default="", help=_cli("args", "benchmark"))
2367
+ p_new.add_argument("--clean", action="store_true", default=True, help=_cli("args", "clean"))
2368
+ p_new.add_argument("--no-clean", action="store_false", dest="clean", help=_cli("args", "no_clean"))
2369
+ p_new.add_argument("--dry-run", action="store_true", default=False, help=_cli("args", "dry_run"))
2370
+
2371
+ # ── start ──
2372
+ p_start = sub.add_parser("start", help=_cli("commands", "start"))
2373
+ p_start.add_argument("--understanding", required=True, help=_cli("args", "understanding"))
2374
+
2375
+ # ── end ──
2376
+ p_end = sub.add_parser("end", help=_cli("commands", "end"))
2377
+ p_end.add_argument("--evidence", default="", help=_cli("args", "evidence"))
2378
+ p_end.add_argument("--agents", default="", help=_cli("args", "agents"))
2379
+ p_end.add_argument("--output-file", default="", help=_cli("args", "output_file"))
2380
+
2381
+ # ── status ──
2382
+ sub.add_parser("status", help=_cli("commands", "status"))
2383
+
2384
+ # ── reject ──
2385
+ p_reject = sub.add_parser("reject", help=_cli("commands", "reject"))
2386
+ p_reject.add_argument("--reason", required=True, help=_cli("args", "reason"))
2387
+
2388
+ # ── skip ──
2389
+ p_skip = sub.add_parser("skip", help=_cli("commands", "skip"))
2390
+ p_skip.add_argument("--reason", default="", help=_cli("args", "skip_reason"))
2391
+ p_skip.add_argument("--force", action="store_true", default=False, help=_cli("args", "force"))
2392
+
2393
+ # ── context ──
2394
+ p_ctx = sub.add_parser("context", help=_cli("commands", "context"))
2395
+ p_ctx.add_argument("--message", default="", help=_cli("args", "message"))
2396
+ p_ctx.add_argument("--phase", default="", help=_cli("args", "phase"))
2397
+ p_ctx.add_argument("--clear", action="store_true", default=False, help=_cli("args", "clear"))
2398
+
2399
+ # ── log-failure ──
2400
+ p_fail = sub.add_parser("log-failure", help=_cli("commands", "log_failure"))
2401
+ p_fail.add_argument("--mode", required=True, help=_cli("args", "mode"))
2402
+ p_fail.add_argument("--desc", required=True, help=_cli("args", "desc"))
2403
+
2404
+ # ── failures ──
2405
+ sub.add_parser("failures", help=_cli("commands", "failures"))
2406
+
2407
+ # ── hypotheses ──
2408
+ sub.add_parser("hypotheses", help=_cli("commands", "hypotheses"))
2409
+
2410
+ # ── add-iteration ──
2411
+ p_add = sub.add_parser("add-iteration", help=_cli("commands", "add_iteration"))
2412
+ p_add.add_argument("--count", type=int, required=True, help=_cli("args", "count"))
2413
+ p_add.add_argument("--objective", default="", help=_cli("args", "add_objective"))
2414
+
2415
+ # ── validate ──
2416
+ sub.add_parser("validate", help="Validate YAML resources against the model schema")
2417
+
2418
+ args = parser.parse_args()
2419
+ if not args.command:
2420
+ parser.print_help()
2421
+ sys.exit(1)
2422
+
2423
+ # Initialize artifacts directory
2424
+ _init_artifacts_dir()
2425
+
2426
+ cmds = {
2427
+ "new": cmd_new,
2428
+ "start": cmd_start,
2429
+ "end": cmd_end,
2430
+ "status": cmd_status,
2431
+ "reject": cmd_reject,
2432
+ "skip": cmd_skip,
2433
+ "context": cmd_context,
2434
+ "log-failure": cmd_log_failure,
2435
+ "failures": cmd_failures,
2436
+ "hypotheses": cmd_hypotheses,
2437
+ "add-iteration": cmd_add_iteration,
2438
+ "validate": cmd_validate,
2439
+ }
2440
+ cmds[args.command](args)
2441
+
2442
+
2443
+ if __name__ == "__main__":
2444
+ main()