spec-runner 2.3.0__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {spec_runner-2.3.0/src/spec_runner.egg-info → spec_runner-2.4.0}/PKG-INFO +37 -4
  2. {spec_runner-2.3.0 → spec_runner-2.4.0}/README.md +36 -3
  3. {spec_runner-2.3.0 → spec_runner-2.4.0}/pyproject.toml +1 -1
  4. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/cli.py +53 -8
  5. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/cli_plan.py +28 -1
  6. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/config.py +2 -0
  7. spec_runner-2.4.0/src/spec_runner/doctor.py +417 -0
  8. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/errors.py +1 -3
  9. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/execution.py +26 -15
  10. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/hooks.py +40 -14
  11. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/runner.py +142 -17
  12. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/SKILL.md +4 -0
  13. spec_runner-2.4.0/src/spec_runner/skills/spec-generator-skill/templates/pi/skills/pi-implementer/SKILL.md +37 -0
  14. spec_runner-2.4.0/src/spec_runner/skills/spec-generator-skill/templates/pi/skills/pi-reviewer/SKILL.md +32 -0
  15. spec_runner-2.4.0/src/spec_runner/skills/spec-generator-skill/templates/pi/skills/pi-tester/SKILL.md +28 -0
  16. spec_runner-2.4.0/src/spec_runner/skills/spec-generator-skill/templates/pi/spec-runner.pi.config.yaml +51 -0
  17. spec_runner-2.4.0/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.pi.md +38 -0
  18. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/state.py +5 -13
  19. {spec_runner-2.3.0 → spec_runner-2.4.0/src/spec_runner.egg-info}/PKG-INFO +37 -4
  20. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner.egg-info/SOURCES.txt +6 -0
  21. spec_runner-2.4.0/tests/test_doctor.py +623 -0
  22. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_errors.py +1 -4
  23. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_execution.py +184 -54
  24. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_hooks.py +70 -0
  25. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_plan_full.py +61 -0
  26. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_runner.py +156 -0
  27. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_stages.py +9 -2
  28. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_state.py +6 -2
  29. spec_runner-2.3.0/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.pi.md +0 -38
  30. {spec_runner-2.3.0 → spec_runner-2.4.0}/LICENSE +0 -0
  31. {spec_runner-2.3.0 → spec_runner-2.4.0}/setup.cfg +0 -0
  32. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/__init__.py +0 -0
  33. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/audit.py +0 -0
  34. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/audit_log.py +0 -0
  35. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/cli_info.py +0 -0
  36. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/events.py +0 -0
  37. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/executor.py +0 -0
  38. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/git_ops.py +0 -0
  39. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/github_sync.py +0 -0
  40. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/init_cmd.py +0 -0
  41. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/logging.py +0 -0
  42. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/mcp_server.py +0 -0
  43. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/notifications.py +0 -0
  44. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/obs.py +0 -0
  45. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/plugins.py +0 -0
  46. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/prompt.py +0 -0
  47. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/py.typed +0 -0
  48. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/report.py +0 -0
  49. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/review.py +0 -0
  50. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/Makefile.template +0 -0
  51. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/design.template.md +0 -0
  52. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/executor.config.yaml +0 -0
  53. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/executor.py +0 -0
  54. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/phase-design.template.md +0 -0
  55. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/phase-requirements.template.md +0 -0
  56. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/phase-tasks.template.md +0 -0
  57. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.claude.md +0 -0
  58. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.codex.md +0 -0
  59. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.llama.md +0 -0
  60. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.md +0 -0
  61. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.ollama.md +0 -0
  62. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.opencode.md +0 -0
  63. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/requirements.template.md +0 -0
  64. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/task.py +0 -0
  65. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/tasks.template.md +0 -0
  66. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/skills/spec-generator-skill/templates/workflow.template.md +0 -0
  67. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/stages.py +0 -0
  68. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/task.py +0 -0
  69. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/task_commands.py +0 -0
  70. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/tui.py +0 -0
  71. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/validate.py +0 -0
  72. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/verify.py +0 -0
  73. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner.egg-info/dependency_links.txt +0 -0
  74. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner.egg-info/entry_points.txt +0 -0
  75. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner.egg-info/requires.txt +0 -0
  76. {spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner.egg-info/top_level.txt +0 -0
  77. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_audit.py +0 -0
  78. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_audit_log.py +0 -0
  79. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_cli_flags.py +0 -0
  80. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_cli_info.py +0 -0
  81. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_cli_run_reset.py +0 -0
  82. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_config.py +0 -0
  83. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_costs.py +0 -0
  84. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_e2e.py +0 -0
  85. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_events.py +0 -0
  86. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_gh_sync.py +0 -0
  87. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_json_result_contract.py +0 -0
  88. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_logging.py +0 -0
  89. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_mcp.py +0 -0
  90. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_notifications.py +0 -0
  91. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_obs.py +0 -0
  92. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_obs_contract.py +0 -0
  93. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_plugins.py +0 -0
  94. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_prompt.py +0 -0
  95. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_report.py +0 -0
  96. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_spec_prefix.py +0 -0
  97. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_subdir_detection.py +0 -0
  98. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_task.py +0 -0
  99. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_task_diff.py +0 -0
  100. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_tui.py +0 -0
  101. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_validate.py +0 -0
  102. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_verify.py +0 -0
  103. {spec_runner-2.3.0 → spec_runner-2.4.0}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spec-runner
3
- Version: 2.3.0
3
+ Version: 2.4.0
4
4
  Summary: Task automation from markdown specs via Claude CLI
5
5
  Author: Andrei
6
6
  License-Expression: MIT
@@ -134,7 +134,8 @@ Tasks are defined in `spec/tasks.md`:
134
134
  # Execution
135
135
  spec-runner run # Execute next ready task
136
136
  spec-runner run --task=TASK-001 # Execute specific task
137
- spec-runner run --all # Execute all ready tasks
137
+ spec-runner run --all # Execute all ready tasks (resets failed→pending by default)
138
+ spec-runner run --all --no-reset-failed # Keep failed tasks sticky (skip the default reset)
138
139
  spec-runner run --all --hitl-review # Interactive HITL approval gate
139
140
  spec-runner run --force # Skip lock check (stale lock)
140
141
  spec-runner run --tui # Execute with live TUI dashboard
@@ -177,6 +178,14 @@ spec-runner report --json # JSON matrix output
177
178
  # Planning
178
179
  spec-runner plan "description" # Interactive task planning
179
180
  spec-runner plan --full "description" # Generate full spec (requirements + design + tasks)
181
+ spec-runner plan --full --from-file spec.md # Read the description from a file instead of an arg
182
+
183
+ # Diagnostics
184
+ spec-runner doctor # Probe the configured CLI/model (real mini-task)
185
+ spec-runner doctor --cli=codex --model=gpt-5.4 # Probe an ad-hoc CLI+model
186
+ spec-runner doctor --with-review # Also probe the review stage
187
+ spec-runner doctor --json --yes # Machine-readable, no confirmation (CI)
188
+ spec-runner doctor --strict # Exit non-zero on DEGRADED too
180
189
 
181
190
  # Integration
182
191
  spec-runner mcp # Launch MCP server (stdio)
@@ -368,13 +377,34 @@ paths:
368
377
  | CLI | Auto-detected | Example template |
369
378
  |-----|--------------|------------------|
370
379
  | Claude | Yes | `{cmd} -p {prompt} --model {model}` |
371
- | Codex | Yes | `{cmd} -p {prompt} --model {model}` |
380
+ | Codex | Yes | `{cmd} exec -m {model} {prompt}` (codex's `-p` is `--profile`, not the prompt) |
372
381
  | OpenCode ([sst/opencode](https://opencode.ai)) | Yes | `{cmd} run --model {model} {prompt}` |
373
382
  | Pi Agent ([pi.dev](https://pi.dev)) | Yes (basename match) | `{cmd} -p --model {model} {prompt}` |
374
383
  | Ollama | Yes | `{cmd} run {model} {prompt}` |
375
384
  | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
376
385
  | Custom | Use template | `{cmd} --prompt {prompt}` |
377
386
 
387
+ > **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
388
+ > skills, per-stage tool control and a read-only review gate) using only config and a small
389
+ > script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
390
+ > [examples/pi-loop/](examples/pi-loop/).
391
+
392
+ ### Checking CLI/model compatibility
393
+
394
+ `spec-runner doctor` runs a real one-task probe through the actual execution
395
+ path and reports, per capability, whether your CLI/model works:
396
+
397
+ - **invocation** — the command runs and authenticates
398
+ - **completion_marker** — the model prints `TASK_COMPLETE` (not all models do)
399
+ - **task_action** — the model actually performs the work
400
+ - **cost_tracking** — token/cost parsing works (needed for `costs`/`--budget`)
401
+ - **error_classification** — failures are classified (diagnostic)
402
+ - **review** *(with `--with-review`)* — the reviewer prints `REVIEW_PASSED`/`FAILED`
403
+
404
+ Verdict: **READY** / **DEGRADED** (works, but something like cost tracking is
405
+ unavailable) / **BROKEN**. It makes real, billable model calls (capped by
406
+ `--budget`, default $0.50) and asks for confirmation unless `--yes`.
407
+
378
408
  ## Project Structure
379
409
 
380
410
  ```
@@ -391,6 +421,8 @@ project/
391
421
  │ ├── cli_info.py # Status, costs, logs, validate, verify, report, TUI, MCP
392
422
  │ ├── cli_plan.py # Interactive planning command
393
423
  │ ├── execution.py # Task execution + retry logic
424
+ │ ├── errors.py # CLI stderr → human-readable failure reasons
425
+ │ ├── stages.py # Per-task sub-stage tracking (StageReporter)
394
426
  │ ├── config.py # ExecutorConfig + YAML loading
395
427
  │ ├── state.py # SQLite state persistence + degraded-mode fallback
396
428
  │ ├── prompt.py # Prompt building + templates
@@ -407,7 +439,8 @@ project/
407
439
  │ ├── report.py # Traceability matrix generation
408
440
  │ ├── validate.py # Config + task validation
409
441
  │ ├── plugins.py # Plugin discovery + hooks
410
- │ ├── logging.py # Structured logging (structlog)
442
+ │ ├── logging.py # Structured logging (structlog back-compat shim)
443
+ │ ├── obs.py # OTel JSONL observability emitter (shared contract)
411
444
  │ ├── events.py # EventBus for streaming to TUI
412
445
  │ ├── notifications.py # Telegram + webhook notifications
413
446
  │ ├── tui.py # Textual TUI dashboard
@@ -99,7 +99,8 @@ Tasks are defined in `spec/tasks.md`:
99
99
  # Execution
100
100
  spec-runner run # Execute next ready task
101
101
  spec-runner run --task=TASK-001 # Execute specific task
102
- spec-runner run --all # Execute all ready tasks
102
+ spec-runner run --all # Execute all ready tasks (resets failed→pending by default)
103
+ spec-runner run --all --no-reset-failed # Keep failed tasks sticky (skip the default reset)
103
104
  spec-runner run --all --hitl-review # Interactive HITL approval gate
104
105
  spec-runner run --force # Skip lock check (stale lock)
105
106
  spec-runner run --tui # Execute with live TUI dashboard
@@ -142,6 +143,14 @@ spec-runner report --json # JSON matrix output
142
143
  # Planning
143
144
  spec-runner plan "description" # Interactive task planning
144
145
  spec-runner plan --full "description" # Generate full spec (requirements + design + tasks)
146
+ spec-runner plan --full --from-file spec.md # Read the description from a file instead of an arg
147
+
148
+ # Diagnostics
149
+ spec-runner doctor # Probe the configured CLI/model (real mini-task)
150
+ spec-runner doctor --cli=codex --model=gpt-5.4 # Probe an ad-hoc CLI+model
151
+ spec-runner doctor --with-review # Also probe the review stage
152
+ spec-runner doctor --json --yes # Machine-readable, no confirmation (CI)
153
+ spec-runner doctor --strict # Exit non-zero on DEGRADED too
145
154
 
146
155
  # Integration
147
156
  spec-runner mcp # Launch MCP server (stdio)
@@ -333,13 +342,34 @@ paths:
333
342
  | CLI | Auto-detected | Example template |
334
343
  |-----|--------------|------------------|
335
344
  | Claude | Yes | `{cmd} -p {prompt} --model {model}` |
336
- | Codex | Yes | `{cmd} -p {prompt} --model {model}` |
345
+ | Codex | Yes | `{cmd} exec -m {model} {prompt}` (codex's `-p` is `--profile`, not the prompt) |
337
346
  | OpenCode ([sst/opencode](https://opencode.ai)) | Yes | `{cmd} run --model {model} {prompt}` |
338
347
  | Pi Agent ([pi.dev](https://pi.dev)) | Yes (basename match) | `{cmd} -p --model {model} {prompt}` |
339
348
  | Ollama | Yes | `{cmd} run {model} {prompt}` |
340
349
  | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
341
350
  | Custom | Use template | `{cmd} --prompt {prompt}` |
342
351
 
352
+ > **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
353
+ > skills, per-stage tool control and a read-only review gate) using only config and a small
354
+ > script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
355
+ > [examples/pi-loop/](examples/pi-loop/).
356
+
357
+ ### Checking CLI/model compatibility
358
+
359
+ `spec-runner doctor` runs a real one-task probe through the actual execution
360
+ path and reports, per capability, whether your CLI/model works:
361
+
362
+ - **invocation** — the command runs and authenticates
363
+ - **completion_marker** — the model prints `TASK_COMPLETE` (not all models do)
364
+ - **task_action** — the model actually performs the work
365
+ - **cost_tracking** — token/cost parsing works (needed for `costs`/`--budget`)
366
+ - **error_classification** — failures are classified (diagnostic)
367
+ - **review** *(with `--with-review`)* — the reviewer prints `REVIEW_PASSED`/`FAILED`
368
+
369
+ Verdict: **READY** / **DEGRADED** (works, but something like cost tracking is
370
+ unavailable) / **BROKEN**. It makes real, billable model calls (capped by
371
+ `--budget`, default $0.50) and asks for confirmation unless `--yes`.
372
+
343
373
  ## Project Structure
344
374
 
345
375
  ```
@@ -356,6 +386,8 @@ project/
356
386
  │ ├── cli_info.py # Status, costs, logs, validate, verify, report, TUI, MCP
357
387
  │ ├── cli_plan.py # Interactive planning command
358
388
  │ ├── execution.py # Task execution + retry logic
389
+ │ ├── errors.py # CLI stderr → human-readable failure reasons
390
+ │ ├── stages.py # Per-task sub-stage tracking (StageReporter)
359
391
  │ ├── config.py # ExecutorConfig + YAML loading
360
392
  │ ├── state.py # SQLite state persistence + degraded-mode fallback
361
393
  │ ├── prompt.py # Prompt building + templates
@@ -372,7 +404,8 @@ project/
372
404
  │ ├── report.py # Traceability matrix generation
373
405
  │ ├── validate.py # Config + task validation
374
406
  │ ├── plugins.py # Plugin discovery + hooks
375
- │ ├── logging.py # Structured logging (structlog)
407
+ │ ├── logging.py # Structured logging (structlog back-compat shim)
408
+ │ ├── obs.py # OTel JSONL observability emitter (shared contract)
376
409
  │ ├── events.py # EventBus for streaming to TUI
377
410
  │ ├── notifications.py # Telegram + webhook notifications
378
411
  │ ├── tui.py # Textual TUI dashboard
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spec-runner"
7
- version = "2.3.0"
7
+ version = "2.4.0"
8
8
  description = "Task automation from markdown specs via Claude CLI"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -202,9 +202,7 @@ def _run_tasks(args, config: ExecutorConfig):
202
202
  tasks = parse_tasks(config.tasks_file)
203
203
 
204
204
  # v2.3.0: reset failed-task state on `run --all` unless opted out.
205
- reset_enabled = getattr(args, "all", False) and not getattr(
206
- args, "no_reset_failed", False
207
- )
205
+ reset_enabled = getattr(args, "all", False) and not getattr(args, "no_reset_failed", False)
208
206
  previously_failed: set[str] = set() # used by T17 second-pass detection
209
207
  if reset_enabled:
210
208
  previously_failed = state.reset_failed_to_pending()
@@ -416,8 +414,7 @@ def _run_tasks(args, config: ExecutorConfig):
416
414
  else:
417
415
  stop_reason = "max_consecutive_failures"
418
416
  stop_detail = (
419
- f"{state.consecutive_failures}/"
420
- f"{config.max_consecutive_failures}"
417
+ f"{state.consecutive_failures}/{config.max_consecutive_failures}"
421
418
  )
422
419
  logger.warning("Stopping: too many consecutive failures")
423
420
  break
@@ -460,8 +457,7 @@ def _run_tasks(args, config: ExecutorConfig):
460
457
  else:
461
458
  stop_reason = "max_consecutive_failures"
462
459
  stop_detail = (
463
- f"{state.consecutive_failures}/"
464
- f"{config.max_consecutive_failures}"
460
+ f"{state.consecutive_failures}/{config.max_consecutive_failures}"
465
461
  )
466
462
  logger.warning("Stopping: too many consecutive failures")
467
463
  break
@@ -663,6 +659,25 @@ def cmd_watch(args: argparse.Namespace, config: ExecutorConfig) -> None:
663
659
  time.sleep(1)
664
660
 
665
661
 
662
+ def cmd_doctor(args: argparse.Namespace, config: ExecutorConfig) -> None:
663
+ """Run the CLI/model compatibility probe and exit with its status code."""
664
+ from .doctor import run_doctor
665
+
666
+ code = run_doctor(
667
+ config,
668
+ cli=args.cli,
669
+ model=args.model,
670
+ with_review=args.with_review,
671
+ budget=args.budget,
672
+ timeout_min=getattr(args, "timeout", None),
673
+ assume_yes=args.yes,
674
+ strict=args.strict,
675
+ as_json=args.json,
676
+ keep=args.keep,
677
+ )
678
+ raise SystemExit(code)
679
+
680
+
666
681
  # === Main ===
667
682
 
668
683
 
@@ -853,7 +868,14 @@ def _build_parser() -> argparse.ArgumentParser:
853
868
 
854
869
  # plan
855
870
  plan_parser = subparsers.add_parser("plan", parents=[common], help="Interactive task planning")
856
- plan_parser.add_argument("description", help="Feature description")
871
+ plan_parser.add_argument(
872
+ "description", nargs="?", default=None, help="Feature description (or use --from-file)"
873
+ )
874
+ plan_parser.add_argument(
875
+ "--from-file",
876
+ metavar="PATH",
877
+ help="Read the feature description from a file instead of the positional argument",
878
+ )
857
879
  plan_parser.add_argument(
858
880
  "--full",
859
881
  action="store_true",
@@ -943,6 +965,28 @@ def _build_parser() -> argparse.ArgumentParser:
943
965
  # mcp
944
966
  subparsers.add_parser("mcp", parents=[common], help="Launch read-only MCP server")
945
967
 
968
+ # doctor
969
+ doctor_parser = subparsers.add_parser(
970
+ "doctor", parents=[common], help="Probe CLI/model compatibility (real mini-task)"
971
+ )
972
+ doctor_parser.add_argument("--cli", help="Override the CLI command (claude/codex/pi/...)")
973
+ doctor_parser.add_argument("--model", help="Override the model (executor + review)")
974
+ doctor_parser.add_argument(
975
+ "--with-review",
976
+ action="store_true",
977
+ help="Also probe the review stage (2nd model call)",
978
+ )
979
+ doctor_parser.add_argument(
980
+ "--yes", "-y", action="store_true", help="Skip the cost-gate confirmation"
981
+ )
982
+ doctor_parser.add_argument(
983
+ "--strict", action="store_true", help="Exit non-zero on DEGRADED too"
984
+ )
985
+ doctor_parser.add_argument("--json", action="store_true", help="Machine-readable output")
986
+ doctor_parser.add_argument("--keep", action="store_true", help="Keep the scratch workspace")
987
+ # --budget is inherited from common (default None); override default to 0.50 for doctor
988
+ doctor_parser.set_defaults(budget=0.5)
989
+
946
990
  # task (unified: replaces spec-task binary)
947
991
  task_parser = subparsers.add_parser(
948
992
  "task", help="Task management (list, show, start, done, graph, sync)"
@@ -1038,6 +1082,7 @@ def main():
1038
1082
  "tui": cmd_tui,
1039
1083
  "watch": cmd_watch,
1040
1084
  "mcp": cmd_mcp,
1085
+ "doctor": cmd_doctor,
1041
1086
  }
1042
1087
 
1043
1088
  # Handle unified task subcommand
@@ -4,6 +4,7 @@ import re
4
4
  import subprocess
5
5
  import sys
6
6
  from datetime import datetime
7
+ from pathlib import Path
7
8
 
8
9
  from .config import ExecutorConfig
9
10
  from .logging import get_logger
@@ -23,6 +24,32 @@ from .task import (
23
24
  logger = get_logger("cli")
24
25
 
25
26
 
27
+ def resolve_plan_description(description: str | None, from_file: str | None) -> str:
28
+ """Resolve the plan description from --from-file (preferred) or the positional
29
+ argument. Exits with an error if neither is usable.
30
+
31
+ Args:
32
+ description: the positional description (may be None when --from-file is used).
33
+ from_file: path to a file whose contents are the description.
34
+ """
35
+ if from_file:
36
+ path = Path(from_file)
37
+ if not path.is_file():
38
+ raise SystemExit(f"plan --from-file: not a readable file: {from_file}")
39
+ try:
40
+ text = path.read_text(encoding="utf-8").strip()
41
+ except UnicodeDecodeError as e:
42
+ raise SystemExit(f"plan --from-file: not valid UTF-8 text: {from_file}") from e
43
+ except OSError as e:
44
+ raise SystemExit(f"plan --from-file: cannot read {from_file}: {e}") from e
45
+ if not text:
46
+ raise SystemExit(f"plan --from-file: file is empty: {from_file}")
47
+ return text
48
+ if description and description.strip():
49
+ return description
50
+ raise SystemExit("plan: provide a description argument or --from-file PATH")
51
+
52
+
26
53
  def cmd_plan(args, config: ExecutorConfig):
27
54
  """Interactive task planning via Claude.
28
55
 
@@ -30,7 +57,7 @@ def cmd_plan(args, config: ExecutorConfig):
30
57
  requirements, design, and tasks files from a description.
31
58
  """
32
59
 
33
- description = args.description
60
+ description = resolve_plan_description(args.description, getattr(args, "from_file", None))
34
61
 
35
62
  if getattr(args, "full", False):
36
63
  from .prompt import build_generation_prompt, parse_spec_marker
@@ -153,6 +153,7 @@ class ExecutorConfig:
153
153
  create_git_branch: bool = True # Create branch on start
154
154
  auto_commit: bool = True # Auto-commit on success
155
155
  main_branch: str = "" # Main branch name (empty = auto-detect: main/master)
156
+ sync_deps: bool = True # Run `uv sync` in pre_start_hook (doctor disables this)
156
157
 
157
158
  # Code review
158
159
  run_review: bool = True # Run code review after task completion
@@ -377,6 +378,7 @@ def load_config_from_yaml(config_path: Path | None = None) -> dict:
377
378
  "claude_model": executor_config.get("claude_model"),
378
379
  "skip_permissions": executor_config.get("skip_permissions"),
379
380
  "create_git_branch": pre_start.get("create_git_branch"),
381
+ "sync_deps": pre_start.get("sync_deps"),
380
382
  "main_branch": executor_config.get("main_branch"),
381
383
  "run_tests_on_done": post_done.get("run_tests"),
382
384
  "run_lint_on_done": post_done.get("run_lint"),