step-by-step-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ GITHUB_TOKEN=ghp_your-github-token-here
@@ -0,0 +1,17 @@
1
+ .agents/
2
+ .env
3
+ .claude/
4
+ skills-lock.json
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.pyc
9
+ *.pyo
10
+ *.egg-info/
11
+ dist/
12
+ build/
13
+ .venv/
14
+ .python-version
15
+
16
+ # Pipeline exports
17
+ pipeline_log_*.txt
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Valentin Dutra
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.4
2
+ Name: step-by-step-cli
3
+ Version: 0.1.0
4
+ Summary: LLM Development Pipeline UI - GitHub Actions-style visual dashboard for LLM-driven workflows
5
+ Project-URL: Homepage, https://github.com/ValentinDutra/step-by-step
6
+ Project-URL: Repository, https://github.com/ValentinDutra/step-by-step
7
+ Project-URL: Issues, https://github.com/ValentinDutra/step-by-step/issues
8
+ Author: Valentin Dutra
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Valentin Dutra
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: ai,automation,claude,cli,developer-tools,llm,pipeline,tui
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Environment :: Console
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: OS Independent
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.13
39
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
40
+ Classifier: Topic :: Terminals
41
+ Requires-Python: >=3.13
42
+ Requires-Dist: psutil>=5.9.0
43
+ Requires-Dist: python-dotenv>=1.2.2
44
+ Requires-Dist: textual>=8.1.1
45
+ Description-Content-Type: text/markdown
46
+
47
+ # Step-by-Step
48
+
49
+ A terminal UI that runs your development tasks through a structured multi-agent pipeline. You describe what you want to build; a team of specialized Claude agents plans, implements, tests, reviews, and opens a pull request — autonomously.
50
+
51
+ ![Step-by-Step in action](assets/screenshot.png)
52
+
53
+ ---
54
+
55
+ ## How it works
56
+
57
+ Step-by-Step models software delivery as a linear pipeline of specialized agents, each owning a single responsibility. Stages that can be parallelized fan out into independent worker agents that run concurrently, then merge their results before the next stage begins.
58
+
59
+ ```
60
+ Plan ──● Decomp ──● Impl ⇶ ──● Tests ⇶ ──● Quality ──● Docs ──● PR
61
+ ```
62
+
63
+ `⇶` = parallel workers · `●` = single agent
64
+
65
+ ### Pipeline stages
66
+
67
+ | Stage | Mode | What it does |
68
+ |---|---|---|
69
+ | **Planning** | Single agent | Senior architect reads your codebase and produces a concrete, numbered implementation plan |
70
+ | **Decomposition** | Manager agent | Splits the plan into independent subtasks that can be worked on simultaneously |
71
+ | **Implementation** | Parallel workers | Each subtask is handed to a dedicated worker agent; all workers run concurrently |
72
+ | **Tests & Validation** | Parallel workers | QA agents write and run tests per subtask; surface failures via `## Issues Found` |
73
+ | **Code Quality** | Single agent | Reviewer checks for code smells, security issues, and readability |
74
+ | **Documentation** | Single agent | Generates or updates README sections, docstrings, and API reference |
75
+ | **Commit & PR** | Single agent | Writes conventional commits and opens a GitHub Pull Request |
76
+
77
+ ### Refinement loops
78
+
79
+ Claude drives two autonomous feedback loops — it decides when to stop by reporting `## Issues Found: None`.
80
+
81
+ - **Test loop** — cycles through Implementation → Tests & Validation until no issues remain
82
+ - **Quality loop** — re-decomposes and re-implements until Code Quality is satisfied
83
+
84
+ ### RAM-based flow control
85
+
86
+ Worker concurrency is not capped by a fixed number. Instead, the pipeline uses TCP-style flow control: a new worker starts only when system RAM is below 75%. Starts are serialized and include a post-start delay so the OS can register each new process's footprint before the next candidate is evaluated. When RAM is high, new workers queue up and resume as running workers release memory.
87
+
88
+ ---
89
+
90
+ ## Requirements
91
+
92
+ - **Python 3.13+**
93
+ - **[uv](https://docs.astral.sh/uv/)** (recommended) or pip
94
+ - **[Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)** — `npm install -g @anthropic-ai/claude-code`
95
+ - **[GitHub CLI](https://cli.github.com/)** — required for the Commit & PR stage (`gh auth login`)
96
+
97
+ ---
98
+
99
+ ## Installation
100
+
101
+ ```bash
102
+ git clone https://github.com/ValentinDutra/step-by-step.git
103
+ cd step-by-step
104
+ uv sync
105
+ ```
106
+
107
+ ---
108
+
109
+ ## Usage
110
+
111
+ ```bash
112
+ # Run against the current directory
113
+ uv run pipeline
114
+
115
+ # Run against a specific repository
116
+ uv run pipeline /path/to/your/repo
117
+
118
+ # Load a prompt from a file and start immediately
119
+ uv run pipeline /path/to/your/repo -f prompt.txt
120
+ ```
121
+
122
+ Type your task in the input area at the bottom and press `Ctrl+Enter` to start.
123
+
124
+ ### Keyboard shortcuts
125
+
126
+ | Key | Action |
127
+ |---|---|
128
+ | `Ctrl+Enter` | Submit prompt and run the pipeline |
129
+ | `Ctrl+L` | Clear the activity log |
130
+ | `Ctrl+E` | Export log to `pipeline_log_<timestamp>.txt` |
131
+ | `Ctrl+C` | Quit |
132
+
133
+ ### Re-running from a specific stage
134
+
135
+ Once a run completes, every stage pill in the header becomes clickable. Click any stage to **re-run from that point forward**, reusing all prior context — useful for retrying a failed stage or iterating on implementation without re-planning.
136
+
137
+ ---
138
+
139
+ ## UI layout
140
+
141
+ ```
142
+ ┌─────────────────────────────────────────────────────────────────┐
143
+ │ Plan │ Decomp │ Impl ⇶ │ Tests ⇶ │ Quality │ PR │ ← stage bar
144
+ ├─────────────────────────────────────────────────────────────────┤
145
+ │ > Describe your task… │ ← prompt input
146
+ ├──────────────────────────────┬──────────────────────────────────┤
147
+ │ ● Planning │ │
148
+ │ │ Activity log │
149
+ │ Streaming pane │ (full chronological history) │
150
+ │ (live output from active │ │
151
+ │ stage or worker) │ │
152
+ ├──────────────────────────────┴──────────────────────────────────┤
153
+ │ ^p palette ^l Clear Log ctrl+↵ Run ^e Export Log ^m Monitor Calls: 4 | Cost: $0.0234 | Time: 1m 20s │
154
+ └─────────────────────────────────────────────────────────────────┘
155
+ ```
156
+
157
+ ---
158
+
159
+ ## Project structure
160
+
161
+ ```
162
+ app/
163
+ ├── __main__.py Entry point
164
+ ├── models.py Shared data classes (Task, WorkerResult, PipelineStats)
165
+ ├── agents.py Claude CLI invocation, flow control, and worker coordination
166
+ ├── stages.py Stage definitions, prompt templates, and pipeline configuration
167
+ ├── pipeline.py Stage runners: run_stage() and run_stage_parallel()
168
+ ├── runner.py PipelineRunnerMixin: run_pipeline() and rerun_from_stage()
169
+ ├── widgets.py StagePill TUI widget and display constants
170
+ ├── git.py Git/gh subprocess helpers and Commit & PR stage runner
171
+ └── tui.py PipelineApp (Textual App) and main() entry point
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Safety
177
+
178
+ The pipeline invokes `claude --dangerously-skip-permissions` so agents can read and write files autonomously. **Only point it at repositories where you trust the output.** Always review the diff before the PR stage commits.
179
+
180
+ Each subprocess is run with a 10-minute timeout and cleaned up unconditionally on exit — even on errors or cancellation — so stalled Claude processes do not accumulate.
181
+
182
+ ---
183
+
184
+ ## License
185
+
186
+ MIT © Valentin Dutra — see [LICENSE](LICENSE)
@@ -0,0 +1,140 @@
1
+ # Step-by-Step
2
+
3
+ A terminal UI that runs your development tasks through a structured multi-agent pipeline. You describe what you want to build; a team of specialized Claude agents plans, implements, tests, reviews, and opens a pull request — autonomously.
4
+
5
+ ![Step-by-Step in action](assets/screenshot.png)
6
+
7
+ ---
8
+
9
+ ## How it works
10
+
11
+ Step-by-Step models software delivery as a linear pipeline of specialized agents, each owning a single responsibility. Stages that can be parallelized fan out into independent worker agents that run concurrently, then merge their results before the next stage begins.
12
+
13
+ ```
14
+ Plan ──● Decomp ──● Impl ⇶ ──● Tests ⇶ ──● Quality ──● Docs ──● PR
15
+ ```
16
+
17
+ `⇶` = parallel workers · `●` = single agent
18
+
19
+ ### Pipeline stages
20
+
21
+ | Stage | Mode | What it does |
22
+ |---|---|---|
23
+ | **Planning** | Single agent | Senior architect reads your codebase and produces a concrete, numbered implementation plan |
24
+ | **Decomposition** | Manager agent | Splits the plan into independent subtasks that can be worked on simultaneously |
25
+ | **Implementation** | Parallel workers | Each subtask is handed to a dedicated worker agent; all workers run concurrently |
26
+ | **Tests & Validation** | Parallel workers | QA agents write and run tests per subtask; surface failures via `## Issues Found` |
27
+ | **Code Quality** | Single agent | Reviewer checks for code smells, security issues, and readability |
28
+ | **Documentation** | Single agent | Generates or updates README sections, docstrings, and API reference |
29
+ | **Commit & PR** | Single agent | Writes conventional commits and opens a GitHub Pull Request |
30
+
31
+ ### Refinement loops
32
+
33
+ Claude drives two autonomous feedback loops — it decides when to stop by reporting `## Issues Found: None`.
34
+
35
+ - **Test loop** — cycles through Implementation → Tests & Validation until no issues remain
36
+ - **Quality loop** — re-decomposes and re-implements until Code Quality is satisfied
37
+
38
+ ### RAM-based flow control
39
+
40
+ Worker concurrency is not capped by a fixed number. Instead, the pipeline uses TCP-style flow control: a new worker starts only when system RAM is below 75%. Starts are serialized and include a post-start delay so the OS can register each new process's footprint before the next candidate is evaluated. When RAM is high, new workers queue up and resume as running workers release memory.
41
+
42
+ ---
43
+
44
+ ## Requirements
45
+
46
+ - **Python 3.13+**
47
+ - **[uv](https://docs.astral.sh/uv/)** (recommended) or pip
48
+ - **[Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)** — `npm install -g @anthropic-ai/claude-code`
49
+ - **[GitHub CLI](https://cli.github.com/)** — required for the Commit & PR stage (`gh auth login`)
50
+
51
+ ---
52
+
53
+ ## Installation
54
+
55
+ ```bash
56
+ git clone https://github.com/ValentinDutra/step-by-step.git
57
+ cd step-by-step
58
+ uv sync
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Usage
64
+
65
+ ```bash
66
+ # Run against the current directory
67
+ uv run pipeline
68
+
69
+ # Run against a specific repository
70
+ uv run pipeline /path/to/your/repo
71
+
72
+ # Load a prompt from a file and start immediately
73
+ uv run pipeline /path/to/your/repo -f prompt.txt
74
+ ```
75
+
76
+ Type your task in the input area at the bottom and press `Ctrl+Enter` to start.
77
+
78
+ ### Keyboard shortcuts
79
+
80
+ | Key | Action |
81
+ |---|---|
82
+ | `Ctrl+Enter` | Submit prompt and run the pipeline |
83
+ | `Ctrl+L` | Clear the activity log |
84
+ | `Ctrl+E` | Export log to `pipeline_log_<timestamp>.txt` |
85
+ | `Ctrl+C` | Quit |
86
+
87
+ ### Re-running from a specific stage
88
+
89
+ Once a run completes, every stage pill in the header becomes clickable. Click any stage to **re-run from that point forward**, reusing all prior context — useful for retrying a failed stage or iterating on implementation without re-planning.
90
+
91
+ ---
92
+
93
+ ## UI layout
94
+
95
+ ```
96
+ ┌─────────────────────────────────────────────────────────────────┐
97
+ │ Plan │ Decomp │ Impl ⇶ │ Tests ⇶ │ Quality │ PR │ ← stage bar
98
+ ├─────────────────────────────────────────────────────────────────┤
99
+ │ > Describe your task… │ ← prompt input
100
+ ├──────────────────────────────┬──────────────────────────────────┤
101
+ │ ● Planning │ │
102
+ │ │ Activity log │
103
+ │ Streaming pane │ (full chronological history) │
104
+ │ (live output from active │ │
105
+ │ stage or worker) │ │
106
+ ├──────────────────────────────┴──────────────────────────────────┤
107
+ │ ^p palette ^l Clear Log ctrl+↵ Run ^e Export Log ^m Monitor Calls: 4 | Cost: $0.0234 | Time: 1m 20s │
108
+ └─────────────────────────────────────────────────────────────────┘
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Project structure
114
+
115
+ ```
116
+ app/
117
+ ├── __main__.py Entry point
118
+ ├── models.py Shared data classes (Task, WorkerResult, PipelineStats)
119
+ ├── agents.py Claude CLI invocation, flow control, and worker coordination
120
+ ├── stages.py Stage definitions, prompt templates, and pipeline configuration
121
+ ├── pipeline.py Stage runners: run_stage() and run_stage_parallel()
122
+ ├── runner.py PipelineRunnerMixin: run_pipeline() and rerun_from_stage()
123
+ ├── widgets.py StagePill TUI widget and display constants
124
+ ├── git.py Git/gh subprocess helpers and Commit & PR stage runner
125
+ └── tui.py PipelineApp (Textual App) and main() entry point
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Safety
131
+
132
+ The pipeline invokes `claude --dangerously-skip-permissions` so agents can read and write files autonomously. **Only point it at repositories where you trust the output.** Always review the diff before the PR stage commits.
133
+
134
+ Each subprocess is run with a 10-minute timeout and cleaned up unconditionally on exit — even on errors or cancellation — so stalled Claude processes do not accumulate.
135
+
136
+ ---
137
+
138
+ ## License
139
+
140
+ MIT © Valentin Dutra — see [LICENSE](LICENSE)
File without changes
@@ -0,0 +1,5 @@
1
+ """Entry point for the pipeline TUI."""
2
+
3
+ from app.tui import main
4
+
5
+ main()
@@ -0,0 +1,42 @@
1
+ """Manager agent: task decomposition."""
2
+
3
+ import json
4
+
5
+ from app.claude import call_claude
6
+ from app.models import Task
7
+
8
+
9
+ async def decompose_task(prompt: str, plan: str, working_dir: str) -> list[Task]:
10
+ """Manager agent: decompose a plan into independent parallel subtasks."""
11
+ decompose_prompt = (
12
+ "You are a task decomposition agent. Given a plan, break it into independent subtasks "
13
+ "that can be worked on IN PARALLEL by different engineers.\n\n"
14
+ f"ORIGINAL TASK: {prompt}\n\n"
15
+ f"PLAN:\n{plan}\n\n"
16
+ "Output a JSON array of subtasks. Each subtask should have:\n"
17
+ '- "id": sequential integer starting at 1\n'
18
+ '- "description": what to implement (be specific and self-contained, include enough context)\n'
19
+ '- "files": list of files this subtask will create or modify\n\n'
20
+ "Rules:\n"
21
+ "- Each subtask must be independent enough to work on in parallel\n"
22
+ "- Include enough context in each description so a worker can act without seeing other subtasks\n"
23
+ "- Create as many subtasks as the complexity genuinely requires — no artificial limit\n"
24
+ "- If the task is simple and cannot be split, return a single subtask\n"
25
+ "- Output ONLY the JSON array, no markdown fences or other text\n"
26
+ )
27
+
28
+ success, output, _ = await call_claude(decompose_prompt, working_dir)
29
+ if not success:
30
+ return [Task(id=1, description=prompt)]
31
+
32
+ try:
33
+ text = output.strip()
34
+ if text.startswith("```"):
35
+ text = text.split("\n", 1)[1].rsplit("```", 1)[0].strip()
36
+ tasks_data = json.loads(text)
37
+ return [
38
+ Task(id=t["id"], description=t["description"], files=t.get("files", []))
39
+ for t in tasks_data
40
+ ]
41
+ except (json.JSONDecodeError, KeyError, TypeError):
42
+ return [Task(id=1, description=prompt)]
@@ -0,0 +1,149 @@
1
+ """Claude CLI invocation and iteration evaluation."""
2
+
3
+ import asyncio
4
+ import json
5
+
6
+ _CLAUDE_TIMEOUT = 600 # seconds per subprocess call
7
+
8
+
9
+ async def call_claude(
10
+ prompt: str,
11
+ working_dir: str,
12
+ on_stream=None,
13
+ ) -> tuple[bool, str, float]:
14
+ """Call Claude CLI and return (success, output, cost_usd).
15
+
16
+ Streams output chunks to on_stream(chunk: str) if provided.
17
+ Drains stderr concurrently to prevent pipe deadlock.
18
+ Always cleans up the subprocess on exit.
19
+ """
20
+ proc = None
21
+ stderr_task: asyncio.Task | None = None
22
+
23
+ try:
24
+ proc = await asyncio.create_subprocess_exec(
25
+ "claude",
26
+ "--print",
27
+ "--dangerously-skip-permissions",
28
+ "--output-format",
29
+ "stream-json",
30
+ "--verbose",
31
+ stdin=asyncio.subprocess.PIPE,
32
+ stdout=asyncio.subprocess.PIPE,
33
+ stderr=asyncio.subprocess.PIPE,
34
+ cwd=working_dir,
35
+ )
36
+
37
+ proc.stdin.write(prompt.encode())
38
+ await proc.stdin.drain()
39
+ proc.stdin.close()
40
+
41
+ final_output = ""
42
+ cost_usd = 0.0
43
+ early_result: tuple[bool, str, float] | None = None
44
+
45
+ stderr_chunks: list[bytes] = []
46
+
47
+ async def _drain_stderr() -> None:
48
+ while True:
49
+ chunk = await proc.stderr.read(4096)
50
+ if not chunk:
51
+ break
52
+ stderr_chunks.append(chunk)
53
+
54
+ stderr_task = asyncio.create_task(_drain_stderr())
55
+
56
+ buf = b""
57
+ try:
58
+ async with asyncio.timeout(_CLAUDE_TIMEOUT):
59
+ while True:
60
+ raw_chunk = await proc.stdout.read(65536)
61
+ if not raw_chunk:
62
+ break
63
+ buf += raw_chunk
64
+ while b"\n" in buf:
65
+ raw_line, buf = buf.split(b"\n", 1)
66
+ line = raw_line.decode(errors="replace").strip()
67
+ if not line:
68
+ continue
69
+ try:
70
+ event = json.loads(line)
71
+ etype = event.get("type")
72
+ if etype == "assistant" and on_stream:
73
+ for block in event.get("message", {}).get("content", []):
74
+ if block.get("type") == "text":
75
+ chunk = block["text"]
76
+ if asyncio.iscoroutinefunction(on_stream):
77
+ await on_stream(chunk)
78
+ else:
79
+ on_stream(chunk)
80
+ elif etype == "result":
81
+ final_output = event.get("result", "")
82
+ cost_usd = float(event.get("total_cost_usd") or 0.0)
83
+ if event.get("subtype") == "error" or event.get("is_error"):
84
+ early_result = (
85
+ False,
86
+ final_output or "Claude returned an error",
87
+ cost_usd,
88
+ )
89
+ except (json.JSONDecodeError, KeyError, TypeError):
90
+ pass
91
+ if early_result:
92
+ break
93
+ except asyncio.TimeoutError:
94
+ return False, f"Timeout after {_CLAUDE_TIMEOUT}s", 0.0
95
+
96
+ await stderr_task
97
+ stderr_task = None
98
+ await proc.wait()
99
+
100
+ if early_result:
101
+ from app.models import pipeline_stats
102
+ pipeline_stats.add_call(early_result[2])
103
+ return early_result
104
+
105
+ if proc.returncode != 0 and not final_output:
106
+ stderr_data = b"".join(stderr_chunks)
107
+ err = stderr_data.decode().strip() or f"Exit code {proc.returncode}"
108
+ return False, err, 0.0
109
+
110
+ from app.models import pipeline_stats
111
+ pipeline_stats.add_call(cost_usd)
112
+ return True, final_output, cost_usd
113
+
114
+ except FileNotFoundError:
115
+ return (
116
+ False,
117
+ "'claude' CLI not found. Install: npm install -g @anthropic-ai/claude-code",
118
+ 0.0,
119
+ )
120
+ except Exception as e:
121
+ return False, str(e), 0.0
122
+ finally:
123
+ if stderr_task is not None and not stderr_task.done():
124
+ stderr_task.cancel()
125
+ try:
126
+ await stderr_task
127
+ except asyncio.CancelledError:
128
+ pass
129
+ if proc is not None and proc.returncode is None:
130
+ proc.kill()
131
+ try:
132
+ await proc.wait()
133
+ except Exception:
134
+ pass
135
+
136
+
137
+ async def evaluate_should_iterate(stage_output: str, working_dir: str) -> bool:
138
+ """Ask Claude whether the stage output has issues that require another iteration."""
139
+ prompt = (
140
+ "You are a quality gate agent. Review the following stage output and decide "
141
+ "whether it contains genuine issues that require another implementation iteration.\n\n"
142
+ "Answer ONLY with 'yes' if there are real issues that need fixing, "
143
+ "or 'no' if the output is satisfactory and the pipeline can proceed.\n\n"
144
+ f"STAGE OUTPUT:\n{stage_output[:4000]}"
145
+ )
146
+ success, response, _ = await call_claude(prompt, working_dir)
147
+ if not success:
148
+ return False
149
+ return response.strip().lower().startswith("yes")