spec-runner 2.0.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. spec_runner-2.2.0/PKG-INFO +435 -0
  2. spec_runner-2.2.0/README.md +400 -0
  3. {spec_runner-2.0.0 → spec_runner-2.2.0}/pyproject.toml +3 -1
  4. spec_runner-2.2.0/src/spec_runner/audit.py +293 -0
  5. spec_runner-2.2.0/src/spec_runner/audit_log.py +212 -0
  6. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/cli.py +93 -23
  7. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/cli_info.py +24 -0
  8. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/config.py +21 -4
  9. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/execution.py +59 -17
  10. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/github_sync.py +1 -1
  11. spec_runner-2.2.0/src/spec_runner/logging.py +45 -0
  12. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/mcp_server.py +19 -4
  13. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/notifications.py +38 -9
  14. spec_runner-2.2.0/src/spec_runner/obs.py +271 -0
  15. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/report.py +37 -0
  16. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/runner.py +27 -2
  17. spec_runner-2.2.0/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.opencode.md +38 -0
  18. spec_runner-2.2.0/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.pi.md +38 -0
  19. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/state.py +173 -8
  20. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/task.py +112 -0
  21. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/tui.py +61 -7
  22. spec_runner-2.2.0/src/spec_runner.egg-info/PKG-INFO +435 -0
  23. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner.egg-info/SOURCES.txt +11 -0
  24. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner.egg-info/requires.txt +1 -0
  25. spec_runner-2.2.0/tests/test_audit.py +338 -0
  26. spec_runner-2.2.0/tests/test_audit_log.py +257 -0
  27. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_execution.py +186 -0
  28. spec_runner-2.2.0/tests/test_gh_sync.py +654 -0
  29. spec_runner-2.2.0/tests/test_json_result_contract.py +257 -0
  30. spec_runner-2.2.0/tests/test_obs.py +262 -0
  31. spec_runner-2.2.0/tests/test_obs_contract.py +48 -0
  32. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_report.py +70 -0
  33. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_runner.py +36 -0
  34. spec_runner-2.2.0/tests/test_spec_prefix.py +341 -0
  35. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_state.py +97 -0
  36. spec_runner-2.2.0/tests/test_task_diff.py +109 -0
  37. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_tui.py +97 -0
  38. spec_runner-2.0.0/PKG-INFO +0 -345
  39. spec_runner-2.0.0/README.md +0 -311
  40. spec_runner-2.0.0/src/spec_runner/logging.py +0 -102
  41. spec_runner-2.0.0/src/spec_runner.egg-info/PKG-INFO +0 -345
  42. spec_runner-2.0.0/tests/test_gh_sync.py +0 -279
  43. spec_runner-2.0.0/tests/test_spec_prefix.py +0 -155
  44. {spec_runner-2.0.0 → spec_runner-2.2.0}/LICENSE +0 -0
  45. {spec_runner-2.0.0 → spec_runner-2.2.0}/setup.cfg +0 -0
  46. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/__init__.py +0 -0
  47. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/cli_plan.py +0 -0
  48. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/events.py +0 -0
  49. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/executor.py +0 -0
  50. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/git_ops.py +0 -0
  51. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/hooks.py +0 -0
  52. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/init_cmd.py +0 -0
  53. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/plugins.py +0 -0
  54. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/prompt.py +0 -0
  55. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/py.typed +0 -0
  56. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/review.py +0 -0
  57. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/SKILL.md +0 -0
  58. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/Makefile.template +0 -0
  59. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/design.template.md +0 -0
  60. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/executor.config.yaml +0 -0
  61. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/executor.py +0 -0
  62. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/phase-design.template.md +0 -0
  63. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/phase-requirements.template.md +0 -0
  64. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/phase-tasks.template.md +0 -0
  65. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.claude.md +0 -0
  66. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.codex.md +0 -0
  67. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.llama.md +0 -0
  68. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.md +0 -0
  69. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/prompts/review.ollama.md +0 -0
  70. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/requirements.template.md +0 -0
  71. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/task.py +0 -0
  72. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/tasks.template.md +0 -0
  73. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/skills/spec-generator-skill/templates/workflow.template.md +0 -0
  74. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/task_commands.py +0 -0
  75. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/validate.py +0 -0
  76. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner/verify.py +0 -0
  77. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner.egg-info/dependency_links.txt +0 -0
  78. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner.egg-info/entry_points.txt +0 -0
  79. {spec_runner-2.0.0 → spec_runner-2.2.0}/src/spec_runner.egg-info/top_level.txt +0 -0
  80. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_config.py +0 -0
  81. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_costs.py +0 -0
  82. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_e2e.py +0 -0
  83. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_events.py +0 -0
  84. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_hooks.py +0 -0
  85. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_logging.py +0 -0
  86. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_mcp.py +0 -0
  87. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_notifications.py +0 -0
  88. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_plan_full.py +0 -0
  89. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_plugins.py +0 -0
  90. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_prompt.py +0 -0
  91. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_validate.py +0 -0
  92. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_verify.py +0 -0
  93. {spec_runner-2.0.0 → spec_runner-2.2.0}/tests/test_watch.py +0 -0
@@ -0,0 +1,435 @@
1
+ Metadata-Version: 2.4
2
+ Name: spec-runner
3
+ Version: 2.2.0
4
+ Summary: Task automation from markdown specs via Claude CLI
5
+ Author: Andrei
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/andrei-shtanakov/spec-runner
8
+ Project-URL: Repository, https://github.com/andrei-shtanakov/spec-runner
9
+ Keywords: automation,tasks,claude,cli,specs,markdown
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: MacOS
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Build Tools
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: mcp>=1.26.0
24
+ Requires-Dist: PyYAML>=6.0
25
+ Requires-Dist: structlog>=24.0
26
+ Requires-Dist: textual>=1.0
27
+ Requires-Dist: ulid-py>=1.1.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
31
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
32
+ Requires-Dist: mypy>=1.0; extra == "dev"
33
+ Requires-Dist: types-PyYAML; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # spec-runner
37
+
38
+ Task automation from markdown specs via Claude CLI. Execute tasks from a structured `tasks.md` file with automatic retries, 5-role code review, Git integration, compliance verification, traceability reporting, and live TUI dashboard.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ uv add spec-runner
44
+ ```
45
+
46
+ Or for development:
47
+ ```bash
48
+ uv sync
49
+ ```
50
+
51
+ Requirements:
52
+ - Python 3.10+
53
+ - Claude CLI (`claude` command available)
54
+ - Git (for branch management)
55
+ - `gh` CLI (optional, for GitHub Issues sync)
56
+
57
+ ## Quick Start
58
+
59
+ ```bash
60
+ # Install Claude Code skills (creates .claude/skills in current project)
61
+ spec-runner-init
62
+
63
+ # Execute next ready task
64
+ spec-runner run
65
+
66
+ # Execute specific task
67
+ spec-runner run --task=TASK-001
68
+
69
+ # Execute all ready tasks
70
+ spec-runner run --all
71
+
72
+ # Execute with live TUI dashboard
73
+ spec-runner run --all --tui
74
+
75
+ # Create tasks interactively
76
+ spec-runner plan "add user authentication"
77
+
78
+ # Watch mode — continuously execute ready tasks
79
+ spec-runner watch
80
+ ```
81
+
82
+ ## Features
83
+
84
+ - **Task-based execution** — reads tasks from `spec/tasks.md` with priorities, checklists, and dependencies
85
+ - **Specification traceability** — links tasks to requirements (REQ-XXX) and design (DESIGN-XXX)
86
+ - **Automatic retries** — configurable retry policy with exponential backoff and error context forwarding
87
+ - **Code review** — multi-agent review after task completion with enriched diff context
88
+ - **Git integration** — automatic branch creation, commits, and merges
89
+ - **TUI dashboard** — live Textual-based terminal UI with progress bars and log panel
90
+ - **Cost tracking** — per-task token usage and cost breakdown
91
+ - **Watch mode** — continuously poll and execute ready tasks
92
+ - **Plugin system** — extend with custom hooks via `spec/plugins/*/plugin.yaml`
93
+ - **MCP server** — Model Context Protocol server for Claude Code integration (read + write operations)
94
+ - **GitHub Issues sync** — bidirectional sync between tasks.md and GitHub Issues
95
+ - **Interactive planning** — generate specs (requirements + design + tasks) through dialogue with Claude
96
+ - **Structured logging** — JSON/console output via structlog
97
+ - **SQLite state** — persistent execution state with WAL mode, auto-migration from legacy JSON
98
+ - **HITL review** — optional human-in-the-loop approval gate after code review
99
+ - **Parallel review** — 5 specialized review agents (quality, implementation, testing, simplification, docs) running concurrently
100
+ - **Agent personas** — role-specific prompt templates and model selection (architect, implementer, reviewer)
101
+ - **Constitution guardrails** — inviolable project rules from `spec/constitution.md` injected into every prompt
102
+ - **Telegram / webhook notifications** — alerts on task failure, run completion, and degraded-mode persistence failures (Telegram Bot API + generic webhook)
103
+ - **Degraded-mode resilience** — SQLite write failures (disk-full, DB corruption) are caught, the run continues in memory, and operators are notified once
104
+ - **Compliance audit trail** — opt-in JSON-Lines log of every task lifecycle event (started, attempt, completed/failed, state_degraded, run start/end) with operator + run-id attribution
105
+ - **Pause/resume** — pause mid-run with Ctrl+\, edit tasks, resume; TUI keybinding `p`
106
+ - **Streaming events** — live stdout streaming from Claude CLI to TUI via EventBus
107
+ - **Session/idle timeouts** — automatic stop after configurable session or idle duration
108
+
109
+ ## Task File Format
110
+
111
+ Tasks are defined in `spec/tasks.md`:
112
+
113
+ ```markdown
114
+ ## Milestone 1: MVP
115
+
116
+ ### TASK-001: Implement user login
117
+ 🔴 P0 | ⬜ TODO | Est: 2d
118
+
119
+ **Checklist:**
120
+ - [ ] Create login endpoint
121
+ - [ ] Add JWT token generation
122
+ - [ ] Write unit tests
123
+
124
+ **Traces to:** [REQ-001], [DESIGN-001]
125
+ **Depends on:** —
126
+ **Blocks:** [TASK-002], [TASK-003]
127
+ ```
128
+
129
+ ## CLI Commands
130
+
131
+ ### spec-runner
132
+
133
+ ```bash
134
+ # Execution
135
+ spec-runner run # Execute next ready task
136
+ spec-runner run --task=TASK-001 # Execute specific task
137
+ spec-runner run --all # Execute all ready tasks
138
+ spec-runner run --all --hitl-review # Interactive HITL approval gate
139
+ spec-runner run --force # Skip lock check (stale lock)
140
+ spec-runner run --tui # Execute with live TUI dashboard
141
+ spec-runner run --dry-run # Show what would execute (JSON)
142
+ spec-runner run --json-result # Structured JSON output (Maestro interop)
143
+ spec-runner run --budget=10.0 # Set global budget in USD
144
+ spec-runner run --log-level=DEBUG # Set log verbosity
145
+ spec-runner run --log-json # Output logs as JSON
146
+
147
+ # Monitoring
148
+ spec-runner status # Show execution status
149
+ spec-runner status --json # JSON status output
150
+ spec-runner costs # Cost breakdown per task
151
+ spec-runner costs --json # JSON output for automation
152
+ spec-runner costs --sort=cost # Sort by cost descending
153
+ spec-runner logs TASK-001 # View task logs
154
+
155
+ # Operations
156
+ spec-runner retry TASK-001 # Retry failed task
157
+ spec-runner reset # Reset state
158
+ spec-runner watch # Continuously execute ready tasks
159
+ spec-runner watch --tui # Watch with live TUI dashboard
160
+ spec-runner tui # Launch TUI status dashboard
161
+ spec-runner validate # Validate config and tasks
162
+
163
+ # Verification & Reporting (v2.0)
164
+ spec-runner audit # Static pre-execution spec check
165
+ spec-runner audit --strict # Fail on warnings (orphans, uncovered)
166
+ spec-runner audit --json # JSON findings output (for CI)
167
+ spec-runner audit --csv # CSV for spreadsheet review
168
+ spec-runner verify # Verify post-execution compliance
169
+ spec-runner verify --task=TASK-001 # Verify specific task
170
+ spec-runner verify --json # JSON compliance output
171
+ spec-runner verify --strict # Fail on warnings too
172
+ spec-runner report # Generate traceability matrix
173
+ spec-runner report --milestone=mvp # Filter by milestone
174
+ spec-runner report --uncovered-only # Show only uncovered requirements
175
+ spec-runner report --json # JSON matrix output
176
+
177
+ # Planning
178
+ spec-runner plan "description" # Interactive task planning
179
+ spec-runner plan --full "description" # Generate full spec (requirements + design + tasks)
180
+
181
+ # Integration
182
+ spec-runner mcp # Launch MCP server (stdio)
183
+ ```
184
+
185
+ ### Task Management (unified in v2.0)
186
+
187
+ ```bash
188
+ # Task commands (use `spec-runner task` instead of deprecated `spec-task`)
189
+ spec-runner task list # List all tasks
190
+ spec-runner task list --status=todo # Filter by status
191
+ spec-runner task list --priority=p0 # Filter by priority
192
+ spec-runner task list --milestone=mvp # Filter by milestone
193
+ spec-runner task show TASK-001 # Task details
194
+ spec-runner task start TASK-001 # Mark as in_progress
195
+ spec-runner task done TASK-001 # Mark as done
196
+ spec-runner task block TASK-001 # Mark as blocked
197
+ spec-runner task check TASK-001 2 # Mark checklist item
198
+ spec-runner task stats # Statistics
199
+ spec-runner task next # Show next ready tasks
200
+ spec-runner task graph # ASCII dependency graph
201
+
202
+ # GitHub Issues
203
+ spec-runner task export-gh # Export to GitHub Issues format
204
+ spec-runner task sync-to-gh # Sync tasks -> GitHub Issues
205
+ spec-runner task sync-to-gh --dry-run # Preview without making changes
206
+ spec-runner task sync-from-gh # Sync GitHub Issues -> tasks.md
207
+ ```
208
+
209
+ ### spec-runner-init
210
+
211
+ ```bash
212
+ spec-runner-init # Install skills to ./.claude/skills
213
+ spec-runner-init --force # Overwrite existing skills
214
+ spec-runner-init /path/to/project # Install to specific project
215
+ ```
216
+
217
+ ### Multi-phase Options
218
+
219
+ `--spec-prefix` namespaces tasks, state, logs, and history for phase-based workflows:
220
+
221
+ ```bash
222
+ spec-runner run --spec-prefix=phase5- # Uses spec/phase5-tasks.md
223
+ spec-runner task list --spec-prefix=phase5- # List phase 5 tasks
224
+ ```
225
+
226
+ Phase-scoped paths: `spec/phase5-{tasks,requirements,design}.md`, `spec/.executor-phase5-state.db`, `spec/.executor-phase5-logs/`, `spec/.phase5-task-history.log`. Multiple phases coexist without state bleed.
227
+
228
+ ## Usage as Library
229
+
230
+ ```python
231
+ from spec_runner import Task, ExecutorConfig, parse_tasks, get_next_tasks
232
+ from pathlib import Path
233
+
234
+ tasks = parse_tasks(Path("spec/tasks.md"))
235
+ ready = get_next_tasks(tasks)
236
+
237
+ for task in ready:
238
+ print(f"{task.id}: {task.name} ({task.priority})")
239
+ ```
240
+
241
+ ## MCP Server (Claude Code Integration)
242
+
243
+ spec-runner includes an MCP server for querying status and executing tasks from Claude Code.
244
+
245
+ Add to `.mcp.json`:
246
+
247
+ ```json
248
+ {
249
+ "mcpServers": {
250
+ "spec-runner": {
251
+ "command": "spec-runner",
252
+ "args": ["mcp"]
253
+ }
254
+ }
255
+ }
256
+ ```
257
+
258
+ Available tools:
259
+
260
+ | Tool | Kind | Effect |
261
+ |---|---|---|
262
+ | `spec_runner_status` | read | Returns aggregate status (completed/failed/running, cost, tokens) |
263
+ | `spec_runner_tasks` | read | Lists tasks with id/name/priority/status/deps |
264
+ | `spec_runner_next_tasks` | read | Lists ready-to-run tasks |
265
+ | `spec_runner_task_detail` | read | Returns per-task checklist, attempts, last review, cost |
266
+ | `spec_runner_costs` | read | Per-task cost/token breakdown |
267
+ | `spec_runner_logs` | read | Tail of a task's execution log |
268
+ | `spec_runner_run_task` | **write** | **Spawns a subprocess that runs Claude CLI** against the workspace. Can modify files, create git branches, run hooks (tests/lint/commit) |
269
+ | `spec_runner_stop` | **write** | Writes a stop-file that asks a running executor to shut down gracefully |
270
+
271
+ ### Security model
272
+
273
+ **Authentication.** The MCP server has **no built-in authentication**. It uses `stdio` transport and inherits the trust boundary of whatever started it (typically your terminal or Claude Code). Whoever can run the server can call any of its tools.
274
+
275
+ **Safe deployment patterns:**
276
+
277
+ - ✅ **Local stdio only** (default). Run via `spec-runner mcp` from `.mcp.json` on a single developer machine. Same trust boundary as your shell.
278
+ - ✅ **Claude Code inside your own workspace.** The MCP server operates on the workspace it's invoked in; tools like `spec_runner_run_task` will modify files in that workspace.
279
+ - ❌ **Do NOT expose over TCP, HTTP, or a shared socket** without adding authentication and audit logging — the write tools execute subprocesses that run Claude CLI with full filesystem access.
280
+ - ❌ **Do NOT run under a shared service account** that multiple users or agents share. There is no per-caller identity, so audit logs cannot attribute actions.
281
+
282
+ **Write-tool blast radius.** `spec_runner_run_task` does not sandbox execution: the spawned `spec-runner run --task TASK-XXX` can:
283
+
284
+ - edit any file in the project root
285
+ - create git branches and auto-commit (if `hooks.post_done.auto_commit: true`)
286
+ - run tests, linters, and any configured hook command
287
+ - spend budget (Claude API cost) up to `budget_usd` / `task_budget_usd`
288
+
289
+ Treat the MCP server as equivalent to giving the caller shell access to the workspace.
290
+
291
+ **Hardening options** (if you need tighter limits):
292
+
293
+ - Run in a disposable container or Maestro worktree so writes are isolated
294
+ - Set `budget_usd` low to cap accidental cost spend
295
+ - Disable `hooks.post_done.auto_commit` if you want manual review before commits
296
+ - Restrict `commands.test`/`commands.lint` to safe allow-listed shell commands — they run verbatim
297
+
298
+ See also: `docs/state-schema.md` for the read contract, and `src/spec_runner/mcp_server.py` for tool implementations.
299
+
300
+ ## Configuration
301
+
302
+ Configuration file: `spec-runner.config.yaml` (project root, v2.0)
303
+
304
+ Legacy location `spec/executor.config.yaml` is still supported with a deprecation warning.
305
+
306
+ v2.0 flat format (no `executor:` wrapper):
307
+
308
+ ```yaml
309
+ max_retries: 3
310
+ task_timeout_minutes: 30
311
+ claude_command: "claude"
312
+ claude_model: "sonnet"
313
+ spec_prefix: "" # e.g. "phase5-" for phase5-tasks.md
314
+ budget_usd: 50.0 # Total budget cap (whole run)
315
+ task_budget_usd: 10.0 # Per-task cap incl. first attempt
316
+ max_retry_cost_usd: 2.0 # Cap on retry cost only (attempts 2+)
317
+
318
+ # Telegram notifications (optional)
319
+ telegram_bot_token: "" # Bot token from @BotFather
320
+ telegram_chat_id: "" # Chat ID to send notifications to
321
+ notify_on: [run_complete, task_failed, state_degraded]
322
+
323
+ # Generic webhook (optional — works with Slack, Discord, ntfy.sh, etc.)
324
+ webhook_url: "" # Webhook URL (empty = disabled)
325
+ webhook_template: '{"text": "{{event}}: {{message}}"}'
326
+
327
+ # Compliance audit trail (optional — JSON Lines, opt-in)
328
+ audit_log_path: "" # e.g. "spec/.executor-audit.jsonl"; empty = disabled
329
+ audit_log_operator: "" # Override the auto-detected "user@host" tag
330
+
331
+ # Agent personas (optional)
332
+ personas:
333
+ implementer:
334
+ system_prompt: "You are a focused Python developer"
335
+ model: "sonnet"
336
+ reviewer:
337
+ system_prompt: "You are a senior code reviewer"
338
+ model: "haiku"
339
+
340
+ hooks:
341
+ pre_start:
342
+ create_git_branch: true
343
+ post_done:
344
+ run_tests: true
345
+ run_lint: true
346
+ auto_commit: true
347
+ run_review: true
348
+ review_parallel: false # Run 5 review agents in parallel
349
+ review_roles: [quality, implementation, testing]
350
+
351
+ commands:
352
+ test: "uv run pytest tests/ -v"
353
+ lint: "uv run ruff check ."
354
+
355
+ paths:
356
+ root: "."
357
+ logs: "spec/.executor-logs"
358
+ ```
359
+
360
+ ### Git Branch Workflow
361
+
362
+ 1. **Branch detection**: Auto-detects `main` or `master`, or use `main_branch` config
363
+ 2. **Task branches**: Creates `task/TASK-001-short-name` branches for each task
364
+ 3. **Auto-merge**: Merges task branch to main after completion
365
+
366
+ ### Supported CLIs
367
+
368
+ | CLI | Auto-detected | Example template |
369
+ |-----|--------------|------------------|
370
+ | Claude | Yes | `{cmd} -p {prompt} --model {model}` |
371
+ | Codex | Yes | `{cmd} -p {prompt} --model {model}` |
372
+ | OpenCode ([sst/opencode](https://opencode.ai)) | Yes | `{cmd} run --model {model} {prompt}` |
373
+ | Pi Agent ([pi.dev](https://pi.dev)) | Yes (basename match) | `{cmd} -p --model {model} {prompt}` |
374
+ | Ollama | Yes | `{cmd} run {model} {prompt}` |
375
+ | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
376
+ | Custom | Use template | `{cmd} --prompt {prompt}` |
377
+
378
+ ## Project Structure
379
+
380
+ ```
381
+ project/
382
+ ├── pyproject.toml
383
+ ├── spec-runner.config.yaml # v2.0 config location
384
+ ├── Makefile
385
+ ├── .pre-commit-config.yaml
386
+ ├── src/
387
+ │ └── spec_runner/
388
+ │ ├── __init__.py
389
+ │ ├── executor.py # Re-exports (backward compat)
390
+ │ ├── cli.py # Main CLI dispatcher, cmd_run, cmd_watch
391
+ │ ├── cli_info.py # Status, costs, logs, validate, verify, report, TUI, MCP
392
+ │ ├── cli_plan.py # Interactive planning command
393
+ │ ├── execution.py # Task execution + retry logic
394
+ │ ├── config.py # ExecutorConfig + YAML loading
395
+ │ ├── state.py # SQLite state persistence + degraded-mode fallback
396
+ │ ├── prompt.py # Prompt building + templates
397
+ │ ├── hooks.py # Pre/post hook orchestration
398
+ │ ├── git_ops.py # Git branch/commit/merge operations
399
+ │ ├── review.py # 5-role code review + HITL gate
400
+ │ ├── runner.py # Subprocess execution + event streaming
401
+ │ ├── task.py # Task parsing + dependency resolution
402
+ │ ├── task_commands.py # Task CLI commands (list, show, start, etc.)
403
+ │ ├── github_sync.py # GitHub Issues sync (to/from)
404
+ │ ├── audit.py # Pre-execution static audit (LABS-37)
405
+ │ ├── audit_log.py # JSON Lines compliance audit trail (LABS-40)
406
+ │ ├── verify.py # Post-execution compliance verification
407
+ │ ├── report.py # Traceability matrix generation
408
+ │ ├── validate.py # Config + task validation
409
+ │ ├── plugins.py # Plugin discovery + hooks
410
+ │ ├── logging.py # Structured logging (structlog)
411
+ │ ├── events.py # EventBus for streaming to TUI
412
+ │ ├── notifications.py # Telegram + webhook notifications
413
+ │ ├── tui.py # Textual TUI dashboard
414
+ │ ├── mcp_server.py # MCP server (FastMCP, stdio)
415
+ │ ├── init_cmd.py # Skill installer
416
+ │ └── skills/
417
+ │ └── spec-generator-skill/
418
+ ├── docs/
419
+ │ └── state-schema.md # Maestro interop contract (SQLite + --json-result)
420
+ ├── schemas/
421
+ │ ├── executor-state.schema.json # JSON Schema for .executor-state.db contents
422
+ │ └── json-result.schema.json # JSON Schema for `run --json-result` stdout
423
+ ├── tests/
424
+ │ └── fixtures/maestro-interop/ # Golden fixtures copied by Maestro contract tests
425
+ └── spec/
426
+ ├── tasks.md
427
+ ├── requirements.md
428
+ ├── design.md
429
+ ├── FORMAT.md # Task format specification
430
+ └── plugins/ # Optional: per-plugin subdirectories with plugin.yaml
431
+ ```
432
+
433
+ ## License
434
+
435
+ MIT