step-by-step-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- step_by_step_cli-0.1.0/.env.example +1 -0
- step_by_step_cli-0.1.0/.gitignore +17 -0
- step_by_step_cli-0.1.0/LICENSE +21 -0
- step_by_step_cli-0.1.0/PKG-INFO +186 -0
- step_by_step_cli-0.1.0/README.md +140 -0
- step_by_step_cli-0.1.0/app/__init__.py +0 -0
- step_by_step_cli-0.1.0/app/__main__.py +5 -0
- step_by_step_cli-0.1.0/app/agents.py +42 -0
- step_by_step_cli-0.1.0/app/claude.py +149 -0
- step_by_step_cli-0.1.0/app/git.py +152 -0
- step_by_step_cli-0.1.0/app/models.py +74 -0
- step_by_step_cli-0.1.0/app/pipeline.py +79 -0
- step_by_step_cli-0.1.0/app/prompts.py +156 -0
- step_by_step_cli-0.1.0/app/runner.py +501 -0
- step_by_step_cli-0.1.0/app/runner_steps.py +225 -0
- step_by_step_cli-0.1.0/app/stages.py +72 -0
- step_by_step_cli-0.1.0/app/styles.tcss +191 -0
- step_by_step_cli-0.1.0/app/tui.py +187 -0
- step_by_step_cli-0.1.0/app/widgets.py +168 -0
- step_by_step_cli-0.1.0/app/workers.py +130 -0
- step_by_step_cli-0.1.0/assets/screenshot.png +0 -0
- step_by_step_cli-0.1.0/pyproject.toml +40 -0
- step_by_step_cli-0.1.0/uv.lock +181 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
GITHUB_TOKEN=ghp_your-github-token-here
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Valentin Dutra
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: step-by-step-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM Development Pipeline UI - GitHub Actions-style visual dashboard for LLM-driven workflows
|
|
5
|
+
Project-URL: Homepage, https://github.com/ValentinDutra/step-by-step
|
|
6
|
+
Project-URL: Repository, https://github.com/ValentinDutra/step-by-step
|
|
7
|
+
Project-URL: Issues, https://github.com/ValentinDutra/step-by-step/issues
|
|
8
|
+
Author: Valentin Dutra
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 Valentin Dutra
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: ai,automation,claude,cli,developer-tools,llm,pipeline,tui
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Environment :: Console
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
39
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
40
|
+
Classifier: Topic :: Terminals
|
|
41
|
+
Requires-Python: >=3.13
|
|
42
|
+
Requires-Dist: psutil>=5.9.0
|
|
43
|
+
Requires-Dist: python-dotenv>=1.2.2
|
|
44
|
+
Requires-Dist: textual>=8.1.1
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# Step-by-Step
|
|
48
|
+
|
|
49
|
+
A terminal UI that runs your development tasks through a structured multi-agent pipeline. You describe what you want to build; a team of specialized Claude agents plans, implements, tests, reviews, and opens a pull request — autonomously.
|
|
50
|
+
|
|
51
|
+

|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## How it works
|
|
56
|
+
|
|
57
|
+
Step-by-Step models software delivery as a linear pipeline of specialized agents, each owning a single responsibility. Stages that can be parallelized fan out into independent worker agents that run concurrently, then merge their results before the next stage begins.
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
Plan ──● Decomp ──● Impl ⇶ ──● Tests ⇶ ──● Quality ──● Docs ──● PR
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
`⇶` = parallel workers · `●` = single agent
|
|
64
|
+
|
|
65
|
+
### Pipeline stages
|
|
66
|
+
|
|
67
|
+
| Stage | Mode | What it does |
|
|
68
|
+
|---|---|---|
|
|
69
|
+
| **Planning** | Single agent | Senior architect reads your codebase and produces a concrete, numbered implementation plan |
|
|
70
|
+
| **Decomposition** | Manager agent | Splits the plan into independent subtasks that can be worked on simultaneously |
|
|
71
|
+
| **Implementation** | Parallel workers | Each subtask is handed to a dedicated worker agent; all workers run concurrently |
|
|
72
|
+
| **Tests & Validation** | Parallel workers | QA agents write and run tests per subtask; surface failures via `## Issues Found` |
|
|
73
|
+
| **Code Quality** | Single agent | Reviewer checks for code smells, security issues, and readability |
|
|
74
|
+
| **Documentation** | Single agent | Generates or updates README sections, docstrings, and API reference |
|
|
75
|
+
| **Commit & PR** | Single agent | Writes conventional commits and opens a GitHub Pull Request |
|
|
76
|
+
|
|
77
|
+
### Refinement loops
|
|
78
|
+
|
|
79
|
+
Claude drives two autonomous feedback loops — it decides when to stop by reporting `## Issues Found: None`.
|
|
80
|
+
|
|
81
|
+
- **Test loop** — cycles through Implementation → Tests & Validation until no issues remain
|
|
82
|
+
- **Quality loop** — re-decomposes and re-implements until Code Quality is satisfied
|
|
83
|
+
|
|
84
|
+
### RAM-based flow control
|
|
85
|
+
|
|
86
|
+
Worker concurrency is not capped by a fixed number. Instead, the pipeline uses TCP-style flow control: a new worker starts only when system RAM is below 75%. Starts are serialized and include a post-start delay so the OS can register each new process's footprint before the next candidate is evaluated. When RAM is high, new workers queue up and resume as running workers release memory.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Requirements
|
|
91
|
+
|
|
92
|
+
- **Python 3.13+**
|
|
93
|
+
- **[uv](https://docs.astral.sh/uv/)** (recommended) or pip
|
|
94
|
+
- **[Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)** — `npm install -g @anthropic-ai/claude-code`
|
|
95
|
+
- **[GitHub CLI](https://cli.github.com/)** — required for the Commit & PR stage (`gh auth login`)
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Installation
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
git clone https://github.com/ValentinDutra/step-by-step.git
|
|
103
|
+
cd step-by-step
|
|
104
|
+
uv sync
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Usage
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# Run against the current directory
|
|
113
|
+
uv run pipeline
|
|
114
|
+
|
|
115
|
+
# Run against a specific repository
|
|
116
|
+
uv run pipeline /path/to/your/repo
|
|
117
|
+
|
|
118
|
+
# Load a prompt from a file and start immediately
|
|
119
|
+
uv run pipeline /path/to/your/repo -f prompt.txt
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Type your task in the input area at the bottom and press `Ctrl+Enter` to start.
|
|
123
|
+
|
|
124
|
+
### Keyboard shortcuts
|
|
125
|
+
|
|
126
|
+
| Key | Action |
|
|
127
|
+
|---|---|
|
|
128
|
+
| `Ctrl+Enter` | Submit prompt and run the pipeline |
|
|
129
|
+
| `Ctrl+L` | Clear the activity log |
|
|
130
|
+
| `Ctrl+E` | Export log to `pipeline_log_<timestamp>.txt` |
|
|
131
|
+
| `Ctrl+C` | Quit |
|
|
132
|
+
|
|
133
|
+
### Re-running from a specific stage
|
|
134
|
+
|
|
135
|
+
Once a run completes, every stage pill in the header becomes clickable. Click any stage to **re-run from that point forward**, reusing all prior context — useful for retrying a failed stage or iterating on implementation without re-planning.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## UI layout
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
143
|
+
│ Plan │ Decomp │ Impl ⇶ │ Tests ⇶ │ Quality │ PR │ ← stage bar
|
|
144
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
145
|
+
│ > Describe your task… │ ← prompt input
|
|
146
|
+
├──────────────────────────────┬──────────────────────────────────┤
|
|
147
|
+
│ ● Planning │ │
|
|
148
|
+
│ │ Activity log │
|
|
149
|
+
│ Streaming pane │ (full chronological history) │
|
|
150
|
+
│ (live output from active │ │
|
|
151
|
+
│ stage or worker) │ │
|
|
152
|
+
├──────────────────────────────┴──────────────────────────────────┤
|
|
153
|
+
│ ^p palette ^l Clear Log ctrl+↵ Run ^e Export Log ^m Monitor Calls: 4 | Cost: $0.0234 | Time: 1m 20s │
|
|
154
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Project structure
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
app/
|
|
163
|
+
├── __main__.py Entry point
|
|
164
|
+
├── models.py Shared data classes (Task, WorkerResult, PipelineStats)
|
|
165
|
+
├── agents.py Claude CLI invocation, flow control, and worker coordination
|
|
166
|
+
├── stages.py Stage definitions, prompt templates, and pipeline configuration
|
|
167
|
+
├── pipeline.py Stage runners: run_stage() and run_stage_parallel()
|
|
168
|
+
├── runner.py PipelineRunnerMixin: run_pipeline() and rerun_from_stage()
|
|
169
|
+
├── widgets.py StagePill TUI widget and display constants
|
|
170
|
+
├── git.py Git/gh subprocess helpers and Commit & PR stage runner
|
|
171
|
+
└── tui.py PipelineApp (Textual App) and main() entry point
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Safety
|
|
177
|
+
|
|
178
|
+
The pipeline invokes `claude --dangerously-skip-permissions` so agents can read and write files autonomously. **Only point it at repositories where you trust the output.** Always review the diff before the PR stage commits.
|
|
179
|
+
|
|
180
|
+
Each subprocess is run with a 10-minute timeout and cleaned up unconditionally on exit — even on errors or cancellation — so stalled Claude processes do not accumulate.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
MIT © Valentin Dutra — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Step-by-Step
|
|
2
|
+
|
|
3
|
+
A terminal UI that runs your development tasks through a structured multi-agent pipeline. You describe what you want to build; a team of specialized Claude agents plans, implements, tests, reviews, and opens a pull request — autonomously.
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
Step-by-Step models software delivery as a linear pipeline of specialized agents, each owning a single responsibility. Stages that can be parallelized fan out into independent worker agents that run concurrently, then merge their results before the next stage begins.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
Plan ──● Decomp ──● Impl ⇶ ──● Tests ⇶ ──● Quality ──● Docs ──● PR
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
`⇶` = parallel workers · `●` = single agent
|
|
18
|
+
|
|
19
|
+
### Pipeline stages
|
|
20
|
+
|
|
21
|
+
| Stage | Mode | What it does |
|
|
22
|
+
|---|---|---|
|
|
23
|
+
| **Planning** | Single agent | Senior architect reads your codebase and produces a concrete, numbered implementation plan |
|
|
24
|
+
| **Decomposition** | Manager agent | Splits the plan into independent subtasks that can be worked on simultaneously |
|
|
25
|
+
| **Implementation** | Parallel workers | Each subtask is handed to a dedicated worker agent; all workers run concurrently |
|
|
26
|
+
| **Tests & Validation** | Parallel workers | QA agents write and run tests per subtask; surface failures via `## Issues Found` |
|
|
27
|
+
| **Code Quality** | Single agent | Reviewer checks for code smells, security issues, and readability |
|
|
28
|
+
| **Documentation** | Single agent | Generates or updates README sections, docstrings, and API reference |
|
|
29
|
+
| **Commit & PR** | Single agent | Writes conventional commits and opens a GitHub Pull Request |
|
|
30
|
+
|
|
31
|
+
### Refinement loops
|
|
32
|
+
|
|
33
|
+
Claude drives two autonomous feedback loops — it decides when to stop by reporting `## Issues Found: None`.
|
|
34
|
+
|
|
35
|
+
- **Test loop** — cycles through Implementation → Tests & Validation until no issues remain
|
|
36
|
+
- **Quality loop** — re-decomposes and re-implements until Code Quality is satisfied
|
|
37
|
+
|
|
38
|
+
### RAM-based flow control
|
|
39
|
+
|
|
40
|
+
Worker concurrency is not capped by a fixed number. Instead, the pipeline uses TCP-style flow control: a new worker starts only when system RAM is below 75%. Starts are serialized and include a post-start delay so the OS can register each new process's footprint before the next candidate is evaluated. When RAM is high, new workers queue up and resume as running workers release memory.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Requirements
|
|
45
|
+
|
|
46
|
+
- **Python 3.13+**
|
|
47
|
+
- **[uv](https://docs.astral.sh/uv/)** (recommended) or pip
|
|
48
|
+
- **[Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)** — `npm install -g @anthropic-ai/claude-code`
|
|
49
|
+
- **[GitHub CLI](https://cli.github.com/)** — required for the Commit & PR stage (`gh auth login`)
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git clone https://github.com/ValentinDutra/step-by-step.git
|
|
57
|
+
cd step-by-step
|
|
58
|
+
uv sync
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Usage
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Run against the current directory
|
|
67
|
+
uv run pipeline
|
|
68
|
+
|
|
69
|
+
# Run against a specific repository
|
|
70
|
+
uv run pipeline /path/to/your/repo
|
|
71
|
+
|
|
72
|
+
# Load a prompt from a file and start immediately
|
|
73
|
+
uv run pipeline /path/to/your/repo -f prompt.txt
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Type your task in the input area at the bottom and press `Ctrl+Enter` to start.
|
|
77
|
+
|
|
78
|
+
### Keyboard shortcuts
|
|
79
|
+
|
|
80
|
+
| Key | Action |
|
|
81
|
+
|---|---|
|
|
82
|
+
| `Ctrl+Enter` | Submit prompt and run the pipeline |
|
|
83
|
+
| `Ctrl+L` | Clear the activity log |
|
|
84
|
+
| `Ctrl+E` | Export log to `pipeline_log_<timestamp>.txt` |
|
|
85
|
+
| `Ctrl+C` | Quit |
|
|
86
|
+
|
|
87
|
+
### Re-running from a specific stage
|
|
88
|
+
|
|
89
|
+
Once a run completes, every stage pill in the header becomes clickable. Click any stage to **re-run from that point forward**, reusing all prior context — useful for retrying a failed stage or iterating on implementation without re-planning.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## UI layout
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
97
|
+
│ Plan │ Decomp │ Impl ⇶ │ Tests ⇶ │ Quality │ PR │ ← stage bar
|
|
98
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
99
|
+
│ > Describe your task… │ ← prompt input
|
|
100
|
+
├──────────────────────────────┬──────────────────────────────────┤
|
|
101
|
+
│ ● Planning │ │
|
|
102
|
+
│ │ Activity log │
|
|
103
|
+
│ Streaming pane │ (full chronological history) │
|
|
104
|
+
│ (live output from active │ │
|
|
105
|
+
│ stage or worker) │ │
|
|
106
|
+
├──────────────────────────────┴──────────────────────────────────┤
|
|
107
|
+
│ ^p palette ^l Clear Log ctrl+↵ Run ^e Export Log ^m Monitor Calls: 4 | Cost: $0.0234 | Time: 1m 20s │
|
|
108
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Project structure
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
app/
|
|
117
|
+
├── __main__.py Entry point
|
|
118
|
+
├── models.py Shared data classes (Task, WorkerResult, PipelineStats)
|
|
119
|
+
├── agents.py Claude CLI invocation, flow control, and worker coordination
|
|
120
|
+
├── stages.py Stage definitions, prompt templates, and pipeline configuration
|
|
121
|
+
├── pipeline.py Stage runners: run_stage() and run_stage_parallel()
|
|
122
|
+
├── runner.py PipelineRunnerMixin: run_pipeline() and rerun_from_stage()
|
|
123
|
+
├── widgets.py StagePill TUI widget and display constants
|
|
124
|
+
├── git.py Git/gh subprocess helpers and Commit & PR stage runner
|
|
125
|
+
└── tui.py PipelineApp (Textual App) and main() entry point
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Safety
|
|
131
|
+
|
|
132
|
+
The pipeline invokes `claude --dangerously-skip-permissions` so agents can read and write files autonomously. **Only point it at repositories where you trust the output.** Always review the diff before the PR stage commits.
|
|
133
|
+
|
|
134
|
+
Each subprocess is run with a 10-minute timeout and cleaned up unconditionally on exit — even on errors or cancellation — so stalled Claude processes do not accumulate.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
MIT © Valentin Dutra — see [LICENSE](LICENSE)
|
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Manager agent: task decomposition."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from app.claude import call_claude
|
|
6
|
+
from app.models import Task
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def decompose_task(prompt: str, plan: str, working_dir: str) -> list[Task]:
|
|
10
|
+
"""Manager agent: decompose a plan into independent parallel subtasks."""
|
|
11
|
+
decompose_prompt = (
|
|
12
|
+
"You are a task decomposition agent. Given a plan, break it into independent subtasks "
|
|
13
|
+
"that can be worked on IN PARALLEL by different engineers.\n\n"
|
|
14
|
+
f"ORIGINAL TASK: {prompt}\n\n"
|
|
15
|
+
f"PLAN:\n{plan}\n\n"
|
|
16
|
+
"Output a JSON array of subtasks. Each subtask should have:\n"
|
|
17
|
+
'- "id": sequential integer starting at 1\n'
|
|
18
|
+
'- "description": what to implement (be specific and self-contained, include enough context)\n'
|
|
19
|
+
'- "files": list of files this subtask will create or modify\n\n'
|
|
20
|
+
"Rules:\n"
|
|
21
|
+
"- Each subtask must be independent enough to work on in parallel\n"
|
|
22
|
+
"- Include enough context in each description so a worker can act without seeing other subtasks\n"
|
|
23
|
+
"- Create as many subtasks as the complexity genuinely requires — no artificial limit\n"
|
|
24
|
+
"- If the task is simple and cannot be split, return a single subtask\n"
|
|
25
|
+
"- Output ONLY the JSON array, no markdown fences or other text\n"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
success, output, _ = await call_claude(decompose_prompt, working_dir)
|
|
29
|
+
if not success:
|
|
30
|
+
return [Task(id=1, description=prompt)]
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
text = output.strip()
|
|
34
|
+
if text.startswith("```"):
|
|
35
|
+
text = text.split("\n", 1)[1].rsplit("```", 1)[0].strip()
|
|
36
|
+
tasks_data = json.loads(text)
|
|
37
|
+
return [
|
|
38
|
+
Task(id=t["id"], description=t["description"], files=t.get("files", []))
|
|
39
|
+
for t in tasks_data
|
|
40
|
+
]
|
|
41
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
42
|
+
return [Task(id=1, description=prompt)]
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Claude CLI invocation and iteration evaluation."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
_CLAUDE_TIMEOUT = 600 # seconds per subprocess call
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def call_claude(
|
|
10
|
+
prompt: str,
|
|
11
|
+
working_dir: str,
|
|
12
|
+
on_stream=None,
|
|
13
|
+
) -> tuple[bool, str, float]:
|
|
14
|
+
"""Call Claude CLI and return (success, output, cost_usd).
|
|
15
|
+
|
|
16
|
+
Streams output chunks to on_stream(chunk: str) if provided.
|
|
17
|
+
Drains stderr concurrently to prevent pipe deadlock.
|
|
18
|
+
Always cleans up the subprocess on exit.
|
|
19
|
+
"""
|
|
20
|
+
proc = None
|
|
21
|
+
stderr_task: asyncio.Task | None = None
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
proc = await asyncio.create_subprocess_exec(
|
|
25
|
+
"claude",
|
|
26
|
+
"--print",
|
|
27
|
+
"--dangerously-skip-permissions",
|
|
28
|
+
"--output-format",
|
|
29
|
+
"stream-json",
|
|
30
|
+
"--verbose",
|
|
31
|
+
stdin=asyncio.subprocess.PIPE,
|
|
32
|
+
stdout=asyncio.subprocess.PIPE,
|
|
33
|
+
stderr=asyncio.subprocess.PIPE,
|
|
34
|
+
cwd=working_dir,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
proc.stdin.write(prompt.encode())
|
|
38
|
+
await proc.stdin.drain()
|
|
39
|
+
proc.stdin.close()
|
|
40
|
+
|
|
41
|
+
final_output = ""
|
|
42
|
+
cost_usd = 0.0
|
|
43
|
+
early_result: tuple[bool, str, float] | None = None
|
|
44
|
+
|
|
45
|
+
stderr_chunks: list[bytes] = []
|
|
46
|
+
|
|
47
|
+
async def _drain_stderr() -> None:
|
|
48
|
+
while True:
|
|
49
|
+
chunk = await proc.stderr.read(4096)
|
|
50
|
+
if not chunk:
|
|
51
|
+
break
|
|
52
|
+
stderr_chunks.append(chunk)
|
|
53
|
+
|
|
54
|
+
stderr_task = asyncio.create_task(_drain_stderr())
|
|
55
|
+
|
|
56
|
+
buf = b""
|
|
57
|
+
try:
|
|
58
|
+
async with asyncio.timeout(_CLAUDE_TIMEOUT):
|
|
59
|
+
while True:
|
|
60
|
+
raw_chunk = await proc.stdout.read(65536)
|
|
61
|
+
if not raw_chunk:
|
|
62
|
+
break
|
|
63
|
+
buf += raw_chunk
|
|
64
|
+
while b"\n" in buf:
|
|
65
|
+
raw_line, buf = buf.split(b"\n", 1)
|
|
66
|
+
line = raw_line.decode(errors="replace").strip()
|
|
67
|
+
if not line:
|
|
68
|
+
continue
|
|
69
|
+
try:
|
|
70
|
+
event = json.loads(line)
|
|
71
|
+
etype = event.get("type")
|
|
72
|
+
if etype == "assistant" and on_stream:
|
|
73
|
+
for block in event.get("message", {}).get("content", []):
|
|
74
|
+
if block.get("type") == "text":
|
|
75
|
+
chunk = block["text"]
|
|
76
|
+
if asyncio.iscoroutinefunction(on_stream):
|
|
77
|
+
await on_stream(chunk)
|
|
78
|
+
else:
|
|
79
|
+
on_stream(chunk)
|
|
80
|
+
elif etype == "result":
|
|
81
|
+
final_output = event.get("result", "")
|
|
82
|
+
cost_usd = float(event.get("total_cost_usd") or 0.0)
|
|
83
|
+
if event.get("subtype") == "error" or event.get("is_error"):
|
|
84
|
+
early_result = (
|
|
85
|
+
False,
|
|
86
|
+
final_output or "Claude returned an error",
|
|
87
|
+
cost_usd,
|
|
88
|
+
)
|
|
89
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
90
|
+
pass
|
|
91
|
+
if early_result:
|
|
92
|
+
break
|
|
93
|
+
except asyncio.TimeoutError:
|
|
94
|
+
return False, f"Timeout after {_CLAUDE_TIMEOUT}s", 0.0
|
|
95
|
+
|
|
96
|
+
await stderr_task
|
|
97
|
+
stderr_task = None
|
|
98
|
+
await proc.wait()
|
|
99
|
+
|
|
100
|
+
if early_result:
|
|
101
|
+
from app.models import pipeline_stats
|
|
102
|
+
pipeline_stats.add_call(early_result[2])
|
|
103
|
+
return early_result
|
|
104
|
+
|
|
105
|
+
if proc.returncode != 0 and not final_output:
|
|
106
|
+
stderr_data = b"".join(stderr_chunks)
|
|
107
|
+
err = stderr_data.decode().strip() or f"Exit code {proc.returncode}"
|
|
108
|
+
return False, err, 0.0
|
|
109
|
+
|
|
110
|
+
from app.models import pipeline_stats
|
|
111
|
+
pipeline_stats.add_call(cost_usd)
|
|
112
|
+
return True, final_output, cost_usd
|
|
113
|
+
|
|
114
|
+
except FileNotFoundError:
|
|
115
|
+
return (
|
|
116
|
+
False,
|
|
117
|
+
"'claude' CLI not found. Install: npm install -g @anthropic-ai/claude-code",
|
|
118
|
+
0.0,
|
|
119
|
+
)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
return False, str(e), 0.0
|
|
122
|
+
finally:
|
|
123
|
+
if stderr_task is not None and not stderr_task.done():
|
|
124
|
+
stderr_task.cancel()
|
|
125
|
+
try:
|
|
126
|
+
await stderr_task
|
|
127
|
+
except asyncio.CancelledError:
|
|
128
|
+
pass
|
|
129
|
+
if proc is not None and proc.returncode is None:
|
|
130
|
+
proc.kill()
|
|
131
|
+
try:
|
|
132
|
+
await proc.wait()
|
|
133
|
+
except Exception:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
async def evaluate_should_iterate(stage_output: str, working_dir: str) -> bool:
|
|
138
|
+
"""Ask Claude whether the stage output has issues that require another iteration."""
|
|
139
|
+
prompt = (
|
|
140
|
+
"You are a quality gate agent. Review the following stage output and decide "
|
|
141
|
+
"whether it contains genuine issues that require another implementation iteration.\n\n"
|
|
142
|
+
"Answer ONLY with 'yes' if there are real issues that need fixing, "
|
|
143
|
+
"or 'no' if the output is satisfactory and the pipeline can proceed.\n\n"
|
|
144
|
+
f"STAGE OUTPUT:\n{stage_output[:4000]}"
|
|
145
|
+
)
|
|
146
|
+
success, response, _ = await call_claude(prompt, working_dir)
|
|
147
|
+
if not success:
|
|
148
|
+
return False
|
|
149
|
+
return response.strip().lower().startswith("yes")
|