spec-runner 2.2.2__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {spec_runner-2.2.2/src/spec_runner.egg-info → spec_runner-2.3.1}/PKG-INFO +8 -2
  2. {spec_runner-2.2.2 → spec_runner-2.3.1}/README.md +7 -1
  3. {spec_runner-2.2.2 → spec_runner-2.3.1}/pyproject.toml +1 -1
  4. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/cli.py +87 -1
  5. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/cli_info.py +66 -28
  6. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/config.py +51 -2
  7. spec_runner-2.3.1/src/spec_runner/errors.py +82 -0
  8. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/execution.py +14 -3
  9. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/hooks.py +18 -1
  10. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/runner.py +5 -3
  11. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/SKILL.md +4 -0
  12. spec_runner-2.3.1/src/spec_runner/skills/spec-generator-skill/templates/pi/skills/pi-implementer/SKILL.md +37 -0
  13. spec_runner-2.3.1/src/spec_runner/skills/spec-generator-skill/templates/pi/skills/pi-reviewer/SKILL.md +32 -0
  14. spec_runner-2.3.1/src/spec_runner/skills/spec-generator-skill/templates/pi/skills/pi-tester/SKILL.md +28 -0
  15. spec_runner-2.3.1/src/spec_runner/skills/spec-generator-skill/templates/pi/spec-runner.pi.config.yaml +51 -0
  16. spec_runner-2.3.1/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.pi.md +38 -0
  17. spec_runner-2.3.1/src/spec_runner/stages.py +45 -0
  18. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/state.py +116 -5
  19. {spec_runner-2.2.2 → spec_runner-2.3.1/src/spec_runner.egg-info}/PKG-INFO +8 -2
  20. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner.egg-info/SOURCES.txt +12 -0
  21. spec_runner-2.3.1/tests/test_cli_flags.py +15 -0
  22. spec_runner-2.3.1/tests/test_cli_info.py +134 -0
  23. spec_runner-2.3.1/tests/test_cli_run_reset.py +202 -0
  24. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_config.py +61 -0
  25. spec_runner-2.3.1/tests/test_errors.py +72 -0
  26. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_execution.py +113 -5
  27. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_hooks.py +101 -0
  28. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_runner.py +28 -5
  29. spec_runner-2.3.1/tests/test_stages.py +51 -0
  30. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_state.py +194 -0
  31. spec_runner-2.3.1/tests/test_subdir_detection.py +27 -0
  32. spec_runner-2.2.2/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.pi.md +0 -38
  33. {spec_runner-2.2.2 → spec_runner-2.3.1}/LICENSE +0 -0
  34. {spec_runner-2.2.2 → spec_runner-2.3.1}/setup.cfg +0 -0
  35. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/__init__.py +0 -0
  36. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/audit.py +0 -0
  37. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/audit_log.py +0 -0
  38. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/cli_plan.py +0 -0
  39. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/events.py +0 -0
  40. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/executor.py +0 -0
  41. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/git_ops.py +0 -0
  42. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/github_sync.py +0 -0
  43. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/init_cmd.py +0 -0
  44. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/logging.py +0 -0
  45. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/mcp_server.py +0 -0
  46. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/notifications.py +0 -0
  47. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/obs.py +0 -0
  48. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/plugins.py +0 -0
  49. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/prompt.py +0 -0
  50. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/py.typed +0 -0
  51. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/report.py +0 -0
  52. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/review.py +0 -0
  53. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/Makefile.template +0 -0
  54. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/design.template.md +0 -0
  55. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/executor.config.yaml +0 -0
  56. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/executor.py +0 -0
  57. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/phase-design.template.md +0 -0
  58. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/phase-requirements.template.md +0 -0
  59. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/phase-tasks.template.md +0 -0
  60. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.claude.md +0 -0
  61. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.codex.md +0 -0
  62. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.llama.md +0 -0
  63. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.md +0 -0
  64. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.ollama.md +0 -0
  65. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.opencode.md +0 -0
  66. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/requirements.template.md +0 -0
  67. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/task.py +0 -0
  68. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/tasks.template.md +0 -0
  69. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/skills/spec-generator-skill/templates/workflow.template.md +0 -0
  70. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/task.py +0 -0
  71. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/task_commands.py +0 -0
  72. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/tui.py +0 -0
  73. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/validate.py +0 -0
  74. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/verify.py +0 -0
  75. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner.egg-info/dependency_links.txt +0 -0
  76. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner.egg-info/entry_points.txt +0 -0
  77. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner.egg-info/requires.txt +0 -0
  78. {spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner.egg-info/top_level.txt +0 -0
  79. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_audit.py +0 -0
  80. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_audit_log.py +0 -0
  81. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_costs.py +0 -0
  82. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_e2e.py +0 -0
  83. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_events.py +0 -0
  84. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_gh_sync.py +0 -0
  85. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_json_result_contract.py +0 -0
  86. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_logging.py +0 -0
  87. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_mcp.py +0 -0
  88. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_notifications.py +0 -0
  89. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_obs.py +0 -0
  90. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_obs_contract.py +0 -0
  91. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_plan_full.py +0 -0
  92. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_plugins.py +0 -0
  93. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_prompt.py +0 -0
  94. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_report.py +0 -0
  95. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_spec_prefix.py +0 -0
  96. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_task.py +0 -0
  97. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_task_diff.py +0 -0
  98. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_tui.py +0 -0
  99. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_validate.py +0 -0
  100. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_verify.py +0 -0
  101. {spec_runner-2.2.2 → spec_runner-2.3.1}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spec-runner
3
- Version: 2.2.2
3
+ Version: 2.3.1
4
4
  Summary: Task automation from markdown specs via Claude CLI
5
5
  Author: Andrei
6
6
  License-Expression: MIT
@@ -134,7 +134,8 @@ Tasks are defined in `spec/tasks.md`:
134
134
  # Execution
135
135
  spec-runner run # Execute next ready task
136
136
  spec-runner run --task=TASK-001 # Execute specific task
137
- spec-runner run --all # Execute all ready tasks
137
+ spec-runner run --all # Execute all ready tasks (resets failed→pending by default)
138
+ spec-runner run --all --no-reset-failed # Keep failed tasks sticky (skip the default reset)
138
139
  spec-runner run --all --hitl-review # Interactive HITL approval gate
139
140
  spec-runner run --force # Skip lock check (stale lock)
140
141
  spec-runner run --tui # Execute with live TUI dashboard
@@ -375,6 +376,11 @@ paths:
375
376
  | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
376
377
  | Custom | Use template | `{cmd} --prompt {prompt}` |
377
378
 
379
+ > **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
380
+ > skills, per-stage tool control and a read-only review gate) using only config and a small
381
+ > script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
382
+ > [examples/pi-loop/](examples/pi-loop/).
383
+
378
384
  ## Project Structure
379
385
 
380
386
  ```
@@ -99,7 +99,8 @@ Tasks are defined in `spec/tasks.md`:
99
99
  # Execution
100
100
  spec-runner run # Execute next ready task
101
101
  spec-runner run --task=TASK-001 # Execute specific task
102
- spec-runner run --all # Execute all ready tasks
102
+ spec-runner run --all # Execute all ready tasks (resets failed→pending by default)
103
+ spec-runner run --all --no-reset-failed # Keep failed tasks sticky (skip the default reset)
103
104
  spec-runner run --all --hitl-review # Interactive HITL approval gate
104
105
  spec-runner run --force # Skip lock check (stale lock)
105
106
  spec-runner run --tui # Execute with live TUI dashboard
@@ -340,6 +341,11 @@ paths:
340
341
  | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
341
342
  | Custom | Use template | `{cmd} --prompt {prompt}` |
342
343
 
344
+ > **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
345
+ > skills, per-stage tool control and a read-only review gate) using only config and a small
346
+ > script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
347
+ > [examples/pi-loop/](examples/pi-loop/).
348
+
343
349
  ## Project Structure
344
350
 
345
351
  ```
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spec-runner"
7
- version = "2.2.2"
7
+ version = "2.3.1"
8
8
  description = "Task automation from markdown specs via Claude CLI"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -201,6 +201,19 @@ def _run_tasks(args, config: ExecutorConfig):
201
201
  logger.warning("Recovered stale tasks", task_ids=recovered)
202
202
  tasks = parse_tasks(config.tasks_file)
203
203
 
204
+ # v2.3.0: reset failed-task state on `run --all` unless opted out.
205
+ reset_enabled = getattr(args, "all", False) and not getattr(
206
+ args, "no_reset_failed", False
207
+ )
208
+ previously_failed: set[str] = set() # used by T17 second-pass detection
209
+ if reset_enabled:
210
+ previously_failed = state.reset_failed_to_pending()
211
+ state.consecutive_failures = 0
212
+ state.clear_second_pass_fails()
213
+ state._save()
214
+ stop_reason: str = "completed" # used by T18 stop-reason capture
215
+ stop_detail: str = "" # used by T18 stop-reason capture
216
+
204
217
  # Pre-run validation
205
218
  from .validate import format_results, validate_all
206
219
 
@@ -255,6 +268,8 @@ def _run_tasks(args, config: ExecutorConfig):
255
268
  logger.info("No tasks ready to execute")
256
269
  if getattr(args, "json_result", False):
257
270
  print(json.dumps({"tasks": [], "message": "No tasks ready to execute"}))
271
+ state.set_meta("last_run_stop_reason", stop_reason)
272
+ state.set_meta("last_run_stop_detail", stop_detail)
258
273
  return
259
274
 
260
275
  # --dry-run: show what would execute and exit
@@ -373,11 +388,37 @@ def _run_tasks(args, config: ExecutorConfig):
373
388
  result = run_with_retries(task, config, state)
374
389
  last_activity = time.monotonic()
375
390
 
391
+ # v2.3.0: detect tasks that fail again on a second pass.
392
+ # Use the persisted task status (set to "failed" when retries
393
+ # are exhausted) rather than `result is False`, because the
394
+ # default on_task_failure="skip" mode returns "SKIP" for a
395
+ # fully-failed task — so a result-based check would miss it.
396
+ # Must run BEFORE the SKIP `continue` below, which short-circuits.
397
+ if (
398
+ task.id in previously_failed
399
+ and state.get_task_state(task.id).status == "failed"
400
+ ):
401
+ log_progress(
402
+ f"💡 [{task.id}] repeated failure — review logs at "
403
+ f"{config.logs_dir}/{task.id}-*.log"
404
+ )
405
+ state.add_second_pass_fail(task.id)
406
+
376
407
  # "SKIP" means continue to next task
377
408
  if result == "SKIP":
378
409
  continue
379
410
 
380
411
  if result is False and state.should_stop():
412
+ last = state.most_recent_failed_attempt()
413
+ if last and last.error_kind and last.error_kind != "unknown":
414
+ stop_reason = f"error_{last.error_kind}"
415
+ stop_detail = last.error or ""
416
+ else:
417
+ stop_reason = "max_consecutive_failures"
418
+ stop_detail = (
419
+ f"{state.consecutive_failures}/"
420
+ f"{config.max_consecutive_failures}"
421
+ )
381
422
  logger.warning("Stopping: too many consecutive failures")
382
423
  break
383
424
  else:
@@ -392,13 +433,43 @@ def _run_tasks(args, config: ExecutorConfig):
392
433
 
393
434
  result = run_with_retries(task, config, state)
394
435
 
436
+ # v2.3.0: detect tasks that fail again on a second pass.
437
+ # Use the persisted task status (set to "failed" when retries
438
+ # are exhausted) rather than `result is False`, because the
439
+ # default on_task_failure="skip" mode returns "SKIP" for a
440
+ # fully-failed task — so a result-based check would miss it.
441
+ # Must run BEFORE the SKIP `continue` below, which short-circuits.
442
+ if (
443
+ task.id in previously_failed
444
+ and state.get_task_state(task.id).status == "failed"
445
+ ):
446
+ log_progress(
447
+ f"💡 [{task.id}] repeated failure — review logs at "
448
+ f"{config.logs_dir}/{task.id}-*.log"
449
+ )
450
+ state.add_second_pass_fail(task.id)
451
+
395
452
  if result == "SKIP":
396
453
  continue
397
454
 
398
455
  if result is False and state.should_stop():
456
+ last = state.most_recent_failed_attempt()
457
+ if last and last.error_kind and last.error_kind != "unknown":
458
+ stop_reason = f"error_{last.error_kind}"
459
+ stop_detail = last.error or ""
460
+ else:
461
+ stop_reason = "max_consecutive_failures"
462
+ stop_detail = (
463
+ f"{state.consecutive_failures}/"
464
+ f"{config.max_consecutive_failures}"
465
+ )
399
466
  logger.warning("Stopping: too many consecutive failures")
400
467
  break
401
468
 
469
+ # v2.3.0: persist stop-reason for this run
470
+ state.set_meta("last_run_stop_reason", stop_reason)
471
+ state.set_meta("last_run_stop_detail", stop_detail)
472
+
402
473
  # Summary
403
474
  # Re-read tasks to get updated statuses after execution
404
475
  tasks = parse_tasks(config.tasks_file)
@@ -647,7 +718,11 @@ def _dispatch_task_command(args: argparse.Namespace) -> None:
647
718
  read_commands[task_cmd](args, tasks)
648
719
 
649
720
 
650
- def main():
721
+ def _build_parser() -> argparse.ArgumentParser:
722
+ """Build and return the top-level argument parser.
723
+
724
+ Extracted from main() to allow programmatic use and testing.
725
+ """
651
726
  # Shared options available to every subcommand
652
727
  common = argparse.ArgumentParser(add_help=False)
653
728
  common.add_argument(
@@ -743,6 +818,12 @@ def main():
743
818
  action="store_true",
744
819
  help="Output structured JSON result per task (for Maestro interop)",
745
820
  )
821
+ run_parser.add_argument(
822
+ "--no-reset-failed",
823
+ action="store_true",
824
+ help="Do not reset failed→pending or clear consecutive_failures "
825
+ "at the start of `run --all` (default: reset enabled).",
826
+ )
746
827
 
747
828
  # status
748
829
  status_parser = subparsers.add_parser("status", parents=[common], help="Show execution status")
@@ -910,6 +991,11 @@ def main():
910
991
  "sync-from-gh", parents=[task_common], help="Sync GitHub Issues to tasks.md"
911
992
  )
912
993
 
994
+ return parser
995
+
996
+
997
+ def main():
998
+ parser = _build_parser()
913
999
  args = parser.parse_args()
914
1000
 
915
1001
  if not args.command:
@@ -23,8 +23,9 @@ from .task import (
23
23
  logger = get_logger("cli")
24
24
 
25
25
 
26
- def cmd_status(args, config: ExecutorConfig):
27
- """Execution status"""
26
+ def print_status(config: ExecutorConfig) -> None:
27
+ """Print human-readable status to stdout."""
28
+ from . import __version__
28
29
 
29
30
  with ExecutorState(config) as state:
30
31
  # Parse tasks from tasks.md to cross-reference
@@ -32,29 +33,6 @@ def cmd_status(args, config: ExecutorConfig):
32
33
  if config.tasks_file.exists():
33
34
  all_tasks = parse_tasks(config.tasks_file)
34
35
 
35
- # --json: output matching MCP server format
36
- if getattr(args, "json_output", False):
37
- completed = sum(1 for ts in state.tasks.values() if ts.status == "success")
38
- failed = sum(1 for ts in state.tasks.values() if ts.status == "failed")
39
- running = sum(1 for ts in state.tasks.values() if ts.status == "running")
40
- cost = state.total_cost()
41
- inp, out = state.total_tokens()
42
- print(
43
- json.dumps(
44
- {
45
- "total_tasks": len(all_tasks),
46
- "completed": completed,
47
- "failed": failed,
48
- "running": running,
49
- "not_started": len(all_tasks) - completed - failed - running,
50
- "total_cost": round(cost, 2),
51
- "input_tokens": inp,
52
- "output_tokens": out,
53
- "budget_usd": config.budget_usd,
54
- }
55
- )
56
- )
57
- return
58
36
  total_in_spec = len(all_tasks)
59
37
 
60
38
  # Calculate statistics from actual task state
@@ -69,7 +47,21 @@ def cmd_status(args, config: ExecutorConfig):
69
47
  state_ids = set(state.tasks.keys())
70
48
  not_started = [t for t in all_tasks if t.id not in state_ids]
71
49
 
72
- print("\n📊 spec-runner Status")
50
+ print(f"\n📊 spec-runner v{__version__}")
51
+
52
+ # Stop-reason warning from executor_meta
53
+ reason = state.get_meta("last_run_stop_reason")
54
+ detail = state.get_meta("last_run_stop_detail") or ""
55
+ if reason and reason != "completed":
56
+ if reason == "max_consecutive_failures":
57
+ human = f"max_consecutive_failures reached ({detail})"
58
+ elif reason.startswith("error_"):
59
+ kind = reason.removeprefix("error_")
60
+ human = f"{kind} — {detail}" if detail else kind
61
+ else:
62
+ human = reason
63
+ print(f"⚠️ Last run stopped: {human}")
64
+
73
65
  print(f"{'=' * 50}")
74
66
  print(f"Tasks in spec: {total_in_spec}")
75
67
  print(f"Tasks completed: {completed_tasks}")
@@ -101,6 +93,7 @@ def cmd_status(args, config: ExecutorConfig):
101
93
 
102
94
  # Tasks with attempts
103
95
  attempted = [ts for ts in state.tasks.values() if ts.attempts]
96
+ second_pass = state.get_second_pass_fails()
104
97
  if attempted:
105
98
  print("\n📝 Task History:")
106
99
  for ts in attempted:
@@ -111,16 +104,27 @@ def cmd_status(args, config: ExecutorConfig):
111
104
  task_cost = state.task_cost(ts.task_id)
112
105
  if task_cost > 0:
113
106
  attempts_info += f", ${task_cost:.2f}"
114
- print(f" {icon} {ts.task_id}: {ts.status} ({attempts_info})")
107
+ # Stage tag on the task header line
108
+ stage_tag = ""
109
+ if ts.status == "failed" and ts.attempts and ts.attempts[-1].error_stage:
110
+ stage_tag = f" [at: {ts.attempts[-1].error_stage}]"
111
+ print(f" {icon} {ts.task_id}: {ts.status} ({attempts_info}){stage_tag}")
115
112
  # Show review verdict from last attempt
116
113
  if ts.attempts:
117
114
  last_attempt = ts.attempts[-1]
118
115
  if last_attempt.review_status and last_attempt.review_status != "skipped":
119
116
  print(f" Review: {last_attempt.review_status}")
117
+ # Kind tag on the error line
120
118
  if ts.status == "failed" and ts.last_error:
121
- print(f" Last error: {ts.last_error[:50]}...")
119
+ kind = ts.attempts[-1].error_kind if ts.attempts else None
120
+ kind_tag = f"[{kind}] " if kind else ""
121
+ print(f" Last error: {kind_tag}{ts.last_error[:50]}...")
122
122
  elif ts.status == "running" and ts.last_error:
123
123
  print(f" ⚠️ Last attempt failed: {ts.last_error[:50]}...")
124
+ # Second-pass hint
125
+ if ts.status == "failed" and ts.task_id in second_pass:
126
+ print(" 💡 Repeated failure across runs — review:")
127
+ print(f" {config.logs_dir}/{ts.task_id}-*.log")
124
128
 
125
129
  # Show tasks not yet in executor state
126
130
  if not_started:
@@ -129,6 +133,40 @@ def cmd_status(args, config: ExecutorConfig):
129
133
  print(f" ⬜ {t.id}: {t.name}")
130
134
 
131
135
 
136
+ def cmd_status(args, config: ExecutorConfig):
137
+ """Execution status"""
138
+
139
+ if getattr(args, "json_output", False):
140
+ with ExecutorState(config) as state:
141
+ # Parse tasks from tasks.md to cross-reference
142
+ all_tasks: list[Task] = []
143
+ if config.tasks_file.exists():
144
+ all_tasks = parse_tasks(config.tasks_file)
145
+
146
+ completed = sum(1 for ts in state.tasks.values() if ts.status == "success")
147
+ failed = sum(1 for ts in state.tasks.values() if ts.status == "failed")
148
+ running = sum(1 for ts in state.tasks.values() if ts.status == "running")
149
+ cost = state.total_cost()
150
+ inp, out = state.total_tokens()
151
+ print(
152
+ json.dumps(
153
+ {
154
+ "total_tasks": len(all_tasks),
155
+ "completed": completed,
156
+ "failed": failed,
157
+ "running": running,
158
+ "not_started": len(all_tasks) - completed - failed - running,
159
+ "total_cost": round(cost, 2),
160
+ "input_tokens": inp,
161
+ "output_tokens": out,
162
+ "budget_usd": config.budget_usd,
163
+ }
164
+ )
165
+ )
166
+ return
167
+ print_status(config)
168
+
169
+
132
170
  def cmd_costs(args: argparse.Namespace, config: ExecutorConfig) -> None:
133
171
  """Show cost breakdown per task with optional JSON output."""
134
172
  tasks = parse_tasks(config.tasks_file)
@@ -8,6 +8,7 @@ import argparse
8
8
  import contextlib
9
9
  import fcntl
10
10
  import os
11
+ import subprocess
11
12
  from dataclasses import dataclass, field
12
13
  from datetime import datetime
13
14
  from pathlib import Path
@@ -137,7 +138,7 @@ class ExecutorConfig:
137
138
  # Command template for custom CLIs. Placeholders: {cmd}, {model}, {prompt}, {prompt_file}
138
139
  # Examples:
139
140
  # claude: "{cmd} -p {prompt}" or "{cmd} -p {prompt} --model {model}"
140
- # codex: "{cmd} -p {prompt}"
141
+ # codex: "{cmd} exec {prompt}" # -p is --profile in codex, not the prompt
141
142
  # opencode: "{cmd} run --model {model} {prompt}"
142
143
  # pi: "{cmd} -p --model {model} {prompt}"
143
144
  # ollama: "{cmd} run {model} {prompt}"
@@ -281,6 +282,32 @@ def _parse_personas(raw: dict) -> dict[str, Persona] | None:
281
282
  return personas if personas else None
282
283
 
283
284
 
285
+ def _detect_subdir_repo(project_root: Path) -> Path | None:
286
+ """Return the git repo toplevel if `project_root` is a strict subdir of
287
+ a git repo. Return None when project_root IS the toplevel, when no git
288
+ repo wraps it, or when git is not installed.
289
+ """
290
+ try:
291
+ result = subprocess.run(
292
+ ["git", "-C", str(project_root), "rev-parse", "--show-toplevel"],
293
+ capture_output=True,
294
+ text=True,
295
+ check=True,
296
+ )
297
+ except (subprocess.CalledProcessError, FileNotFoundError):
298
+ return None
299
+ toplevel = Path(result.stdout.strip()).resolve()
300
+ return toplevel if toplevel != project_root.resolve() else None
301
+
302
+
303
+ def _user_set(yaml_config: dict, args: argparse.Namespace, key: str) -> bool:
304
+ """True if user explicitly set this key in YAML or CLI."""
305
+ if yaml_config.get(key) is not None:
306
+ return True
307
+ val = getattr(args, key, None)
308
+ return val not in (None, False)
309
+
310
+
284
311
  def _resolve_config_path() -> Path:
285
312
  """Find the config file, preferring new location over legacy.
286
313
 
@@ -448,4 +475,26 @@ def build_config(yaml_config: dict, args: argparse.Namespace) -> ExecutorConfig:
448
475
  if hasattr(args, "log_level") and getattr(args, "log_level", None):
449
476
  config_kwargs["log_level"] = args.log_level
450
477
 
451
- return ExecutorConfig(**config_kwargs)
478
+ config = ExecutorConfig(**config_kwargs)
479
+
480
+ git_root = _detect_subdir_repo(config.project_root)
481
+ if git_root is not None:
482
+ flipped = []
483
+ if not _user_set(yaml_config, args, "create_git_branch"):
484
+ config.create_git_branch = False
485
+ flipped.append("create_git_branch")
486
+ if not _user_set(yaml_config, args, "auto_commit"):
487
+ config.auto_commit = False
488
+ flipped.append("auto_commit")
489
+ if flipped:
490
+ from .logging import get_logger
491
+
492
+ get_logger("config").warning(
493
+ "subdir_project_detected",
494
+ project_root=str(config.project_root),
495
+ git_root=str(git_root),
496
+ defaulted_off=flipped,
497
+ override_hint="set create_git_branch/auto_commit=true in YAML to opt-in",
498
+ )
499
+
500
+ return config
@@ -0,0 +1,82 @@
1
+ """Error classification for CLI agent stderr (v2.3.0).
2
+
3
+ Adds short, human-readable reasons to failures (previously surfaced as
4
+ "Unknown error"). Pattern library + last-N-lines stderr fallback.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from dataclasses import dataclass
11
+
12
+ STDERR_TAIL_LINES = 5
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class ErrorPattern:
17
+ """One classification pattern.
18
+
19
+ `template` supports {0}, {1}, ... substitutions from regex groups;
20
+ if the template uses no groups, regex match-only is enough.
21
+ """
22
+
23
+ kind: str
24
+ regex: re.Pattern[str]
25
+ template: str
26
+
27
+
28
+ PATTERNS: list[ErrorPattern] = [
29
+ # codex / OpenAI quota — captures the "try again at <time>" hint
30
+ ErrorPattern(
31
+ kind="rate_limit",
32
+ regex=re.compile(
33
+ r"hit your usage limit.*?try again at ([\d:]+\s*[AP]M)", re.S
34
+ ),
35
+ template="OpenAI usage limit — try again at {0}",
36
+ ),
37
+ # generic rate-limit (claude, generic providers)
38
+ ErrorPattern(
39
+ kind="rate_limit",
40
+ regex=re.compile(r"rate[_\s-]?limit", re.I),
41
+ template="Rate limit hit",
42
+ ),
43
+ # auth failures
44
+ ErrorPattern(
45
+ kind="auth",
46
+ regex=re.compile(r"unauthor|invalid api key|forbidden", re.I),
47
+ template="Authentication failed",
48
+ ),
49
+ # network failures
50
+ ErrorPattern(
51
+ kind="network",
52
+ regex=re.compile(
53
+ r"ECONNREFUSED|timed out|name or service not known|dns",
54
+ re.I,
55
+ ),
56
+ template="Network error",
57
+ ),
58
+ # generic CLI error line (last resort before unknown fallback)
59
+ ErrorPattern(
60
+ kind="cli_error",
61
+ regex=re.compile(r"^error:\s*(.+)$", re.M),
62
+ template="{0}",
63
+ ),
64
+ ]
65
+
66
+
67
+ def classify(stderr: str, returncode: int) -> tuple[str, str]:
68
+ """Return (kind, human_message) for a failed CLI invocation.
69
+
70
+ - Tries each pattern in PATTERNS order; first match wins.
71
+ - Falls back to ("unknown", last N lines of stderr) when nothing matches.
72
+ - When stderr is empty, falls back to ("unknown", "CLI exited with code N").
73
+ """
74
+ for p in PATTERNS:
75
+ m = p.regex.search(stderr)
76
+ if m:
77
+ try:
78
+ return p.kind, p.template.format(*m.groups())
79
+ except IndexError:
80
+ return p.kind, p.template
81
+ tail = "\n".join(stderr.strip().splitlines()[-STDERR_TAIL_LINES:])
82
+ return "unknown", tail or f"CLI exited with code {returncode}"
@@ -6,6 +6,7 @@ import time
6
6
  from datetime import datetime
7
7
 
8
8
  from .config import ExecutorConfig
9
+ from .errors import classify
9
10
  from .hooks import post_done_hook, pre_start_hook
10
11
  from .logging import get_logger
11
12
  from .prompt import build_task_prompt, extract_test_failures
@@ -16,6 +17,7 @@ from .runner import (
16
17
  parse_token_usage,
17
18
  send_callback,
18
19
  )
20
+ from .stages import StageReporter
19
21
  from .state import (
20
22
  ErrorCode,
21
23
  ExecutorState,
@@ -42,11 +44,12 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
42
44
  """
43
45
 
44
46
  task_id = task.id
47
+ reporter = StageReporter(task.id, lambda line: log_progress(line))
45
48
  log_progress(f"\U0001f680 Starting: {task.name}", task_id)
46
49
  logger.info("Executing task", task_id=task_id, name=task.name)
47
50
 
48
51
  # Pre-start hook
49
- if not pre_start_hook(task, config):
52
+ if not pre_start_hook(task, config, reporter=reporter):
50
53
  logger.error("Pre-start hook failed", task_id=task_id)
51
54
  state.record_attempt(
52
55
  task_id,
@@ -118,6 +121,7 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
118
121
  skip_permissions=config.skip_permissions,
119
122
  )
120
123
 
124
+ reporter.enter("codex")
121
125
  result = subprocess.run(
122
126
  cmd,
123
127
  capture_output=True,
@@ -181,6 +185,7 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
181
185
  success = (has_complete_marker and not has_failed_marker) or implicit_success
182
186
 
183
187
  if success:
188
+ reporter.enter("parse")
184
189
  if has_complete_marker:
185
190
  logger.info("Task completed by Claude", task_id=task_id)
186
191
  else:
@@ -188,7 +193,7 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
188
193
 
189
194
  # Post-done hook (tests, lint, review)
190
195
  hook_success, hook_error, review_status, review_findings = post_done_hook(
191
- task, config, True
196
+ task, config, True, reporter=reporter
192
197
  )
193
198
 
194
199
  if hook_success:
@@ -263,7 +268,11 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
263
268
  else:
264
269
  # Claude reported failure
265
270
  error_match = re.search(r"TASK_FAILED:\s*(.+)", output)
266
- error = error_match.group(1) if error_match else "Unknown error"
271
+ if error_match:
272
+ error = error_match.group(1)
273
+ error_kind = "cli_error"
274
+ else:
275
+ error_kind, error = classify(result.stderr, result.returncode)
267
276
  state.record_attempt(
268
277
  task_id,
269
278
  False,
@@ -274,6 +283,8 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
274
283
  input_tokens=input_tokens,
275
284
  output_tokens=output_tokens,
276
285
  cost_usd=cost_usd,
286
+ error_kind=error_kind,
287
+ error_stage=reporter.current,
277
288
  )
278
289
  log_progress(f"\u274c Failed: {error[:50]}", task_id)
279
290
  send_callback(
@@ -23,6 +23,7 @@ from .review import (
23
23
  run_code_review,
24
24
  run_parallel_review,
25
25
  )
26
+ from .stages import StageReporter
26
27
  from .state import ReviewVerdict
27
28
  from .task import Task
28
29
 
@@ -46,11 +47,13 @@ __all__ = [
46
47
  ]
47
48
 
48
49
 
49
- def pre_start_hook(task: Task, config: ExecutorConfig) -> bool:
50
+ def pre_start_hook(task: Task, config: ExecutorConfig, *, reporter: StageReporter | None = None) -> bool:
50
51
  """Hook before starting task"""
51
52
  logger.info("Pre-start hook", task_id=task.id)
52
53
 
53
54
  # Sync dependencies
55
+ if reporter:
56
+ reporter.enter("sync_deps")
54
57
  logger.info("Syncing dependencies")
55
58
  result = subprocess.run(["uv", "sync"], capture_output=True, text=True, cwd=config.project_root)
56
59
  if result.returncode == 0:
@@ -60,6 +63,8 @@ def pre_start_hook(task: Task, config: ExecutorConfig) -> bool:
60
63
 
61
64
  # Create git branch
62
65
  if config.create_git_branch:
66
+ if reporter:
67
+ reporter.enter("branch")
63
68
  branch_name = get_task_branch_name(task)
64
69
  try:
65
70
  # Check if git exists
@@ -158,6 +163,8 @@ def post_done_hook(
158
163
  config: ExecutorConfig,
159
164
  success: bool,
160
165
  changed_since: float | None = None,
166
+ *,
167
+ reporter: StageReporter | None = None,
161
168
  ) -> tuple[bool, str | None, str, str]:
162
169
  """Hook after task completion.
163
170
 
@@ -175,6 +182,8 @@ def post_done_hook(
175
182
  # Run tests — capture output for review context
176
183
  test_output_str: str | None = None
177
184
  if config.run_tests_on_done:
185
+ if reporter:
186
+ reporter.enter("tests")
178
187
  test_cmd = config.test_command
179
188
 
180
189
  # Scope tests to changed files when running in parallel mode
@@ -220,6 +229,8 @@ def post_done_hook(
220
229
  # Run lint — capture output for review context
221
230
  lint_output_str: str | None = None
222
231
  if config.run_lint_on_done and config.lint_command:
232
+ if reporter:
233
+ reporter.enter("lint")
223
234
  logger.info("Running lint")
224
235
  result = subprocess.run(
225
236
  config.lint_command,
@@ -287,6 +298,8 @@ def post_done_hook(
287
298
  if config.hitl_review and not config.run_review:
288
299
  logger.warning("hitl_review enabled but run_review is False; HITL gate skipped")
289
300
  if config.run_review:
301
+ if reporter:
302
+ reporter.enter("review")
290
303
  review_fn = run_parallel_review if config.review_parallel else run_code_review
291
304
  logger.info(
292
305
  "Running code review",
@@ -332,6 +345,8 @@ def post_done_hook(
332
345
 
333
346
  # Auto-commit
334
347
  if config.auto_commit:
348
+ if reporter:
349
+ reporter.enter("commit")
335
350
  try:
336
351
  # Check if there are changes to commit
337
352
  status_result = subprocess.run(
@@ -366,6 +381,8 @@ def post_done_hook(
366
381
 
367
382
  # Merge branch to main
368
383
  if config.create_git_branch:
384
+ if reporter:
385
+ reporter.enter("merge")
369
386
  try:
370
387
  branch_name = get_task_branch_name(task)
371
388
  main_branch = get_main_branch(config)