wbcli 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +0 -0
- tests/conftest.py +35 -0
- tests/test_adapters.py +233 -0
- tests/test_agents.py +138 -0
- tests/test_cli.py +393 -0
- tests/test_orchestrator.py +88 -0
- tests/test_pipeline.py +227 -0
- tests/test_plan_parser.py +134 -0
- tests/test_prompt_builder.py +264 -0
- tests/test_tmux.py +473 -0
- tests/test_worktree.py +209 -0
- wbcli-0.0.1.dist-info/METADATA +22 -0
- wbcli-0.0.1.dist-info/RECORD +28 -0
- wbcli-0.0.1.dist-info/WHEEL +5 -0
- wbcli-0.0.1.dist-info/entry_points.txt +2 -0
- wbcli-0.0.1.dist-info/licenses/LICENSE +21 -0
- wbcli-0.0.1.dist-info/top_level.txt +2 -0
- workbench/__init__.py +6 -0
- workbench/_version.py +24 -0
- workbench/adapters.py +154 -0
- workbench/agents.py +547 -0
- workbench/cli.py +434 -0
- workbench/orchestrator.py +343 -0
- workbench/plan_parser.py +166 -0
- workbench/skills/__init__.py +0 -0
- workbench/skills/use-workbench/SKILL.md +238 -0
- workbench/tmux.py +108 -0
- workbench/worktree.py +262 -0
tests/__init__.py
ADDED
|
File without changes
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Shared test fixtures."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def git_repo(tmp_path):
|
|
10
|
+
"""Create a temporary git repo with main branch and initial commit."""
|
|
11
|
+
subprocess.run(
|
|
12
|
+
["git", "init", "--initial-branch=main"],
|
|
13
|
+
cwd=tmp_path,
|
|
14
|
+
check=True,
|
|
15
|
+
capture_output=True,
|
|
16
|
+
)
|
|
17
|
+
subprocess.run(
|
|
18
|
+
["git", "config", "user.email", "test@test.com"],
|
|
19
|
+
cwd=tmp_path,
|
|
20
|
+
capture_output=True,
|
|
21
|
+
)
|
|
22
|
+
subprocess.run(
|
|
23
|
+
["git", "config", "user.name", "Test"],
|
|
24
|
+
cwd=tmp_path,
|
|
25
|
+
capture_output=True,
|
|
26
|
+
)
|
|
27
|
+
(tmp_path / "README.md").write_text("init")
|
|
28
|
+
subprocess.run(["git", "add", "."], cwd=tmp_path, capture_output=True)
|
|
29
|
+
subprocess.run(
|
|
30
|
+
["git", "commit", "-m", "init"],
|
|
31
|
+
cwd=tmp_path,
|
|
32
|
+
check=True,
|
|
33
|
+
capture_output=True,
|
|
34
|
+
)
|
|
35
|
+
return tmp_path
|
tests/test_adapters.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Tests for agent platform adapters."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from workbench.adapters import (
|
|
9
|
+
ALLOWED_TOOLS,
|
|
10
|
+
AgentAdapter,
|
|
11
|
+
ClaudeAdapter,
|
|
12
|
+
CodexAdapter,
|
|
13
|
+
ConfigAdapter,
|
|
14
|
+
GenericAdapter,
|
|
15
|
+
get_adapter,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestClaudeAdapter:
|
|
20
|
+
def setup_method(self):
|
|
21
|
+
self.adapter = ClaudeAdapter()
|
|
22
|
+
|
|
23
|
+
def test_name(self):
|
|
24
|
+
assert self.adapter.name == "claude"
|
|
25
|
+
|
|
26
|
+
def test_build_command(self, tmp_path):
|
|
27
|
+
cmd = self.adapter.build_command("do something", tmp_path)
|
|
28
|
+
assert cmd == [
|
|
29
|
+
"claude", "-p", "do something",
|
|
30
|
+
"--output-format", "json",
|
|
31
|
+
"--allowedTools", ALLOWED_TOOLS,
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def test_parse_output_valid_json(self):
|
|
35
|
+
raw = json.dumps({"result": "done", "cost_usd": {"input": 0.01}})
|
|
36
|
+
text, cost = self.adapter.parse_output(raw)
|
|
37
|
+
assert text == "done"
|
|
38
|
+
assert cost == {"input": 0.01}
|
|
39
|
+
|
|
40
|
+
def test_parse_output_missing_keys(self):
|
|
41
|
+
raw = json.dumps({"other": "data"})
|
|
42
|
+
text, cost = self.adapter.parse_output(raw)
|
|
43
|
+
assert text == raw # falls back to raw when result key missing
|
|
44
|
+
assert cost == {}
|
|
45
|
+
|
|
46
|
+
def test_parse_output_invalid_json(self):
|
|
47
|
+
raw = "not json at all"
|
|
48
|
+
text, cost = self.adapter.parse_output(raw)
|
|
49
|
+
assert text == raw
|
|
50
|
+
assert cost == {}
|
|
51
|
+
|
|
52
|
+
def test_is_agent_adapter(self):
|
|
53
|
+
assert isinstance(self.adapter, AgentAdapter)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestCodexAdapter:
|
|
57
|
+
def setup_method(self):
|
|
58
|
+
self.adapter = CodexAdapter()
|
|
59
|
+
|
|
60
|
+
def test_name(self):
|
|
61
|
+
assert self.adapter.name == "codex"
|
|
62
|
+
|
|
63
|
+
def test_build_command(self, tmp_path):
|
|
64
|
+
cmd = self.adapter.build_command("fix bug", tmp_path)
|
|
65
|
+
assert cmd == [
|
|
66
|
+
"codex", "-q", "--full-auto",
|
|
67
|
+
"--approval-mode", "full-auto",
|
|
68
|
+
"fix bug",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
def test_parse_output_strips_whitespace(self):
|
|
72
|
+
text, cost = self.adapter.parse_output(" hello world \n")
|
|
73
|
+
assert text == "hello world"
|
|
74
|
+
assert cost == {}
|
|
75
|
+
|
|
76
|
+
def test_is_agent_adapter(self):
|
|
77
|
+
assert isinstance(self.adapter, AgentAdapter)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class TestGenericAdapter:
|
|
81
|
+
def test_name_matches_cmd(self):
|
|
82
|
+
adapter = GenericAdapter("my-tool")
|
|
83
|
+
assert adapter.name == "my-tool"
|
|
84
|
+
|
|
85
|
+
def test_build_command(self, tmp_path):
|
|
86
|
+
adapter = GenericAdapter("my-tool")
|
|
87
|
+
cmd = adapter.build_command("hello", tmp_path)
|
|
88
|
+
assert cmd == ["my-tool", "hello"]
|
|
89
|
+
|
|
90
|
+
def test_parse_output(self):
|
|
91
|
+
adapter = GenericAdapter("my-tool")
|
|
92
|
+
text, cost = adapter.parse_output(" output \n")
|
|
93
|
+
assert text == "output"
|
|
94
|
+
assert cost == {}
|
|
95
|
+
|
|
96
|
+
def test_is_agent_adapter(self):
|
|
97
|
+
assert isinstance(GenericAdapter("x"), AgentAdapter)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestConfigAdapter:
|
|
101
|
+
def test_build_command_substitutes_prompt(self, tmp_path):
|
|
102
|
+
adapter = ConfigAdapter(
|
|
103
|
+
name="custom",
|
|
104
|
+
command="my-cli",
|
|
105
|
+
args=["--headless", "{prompt}", "--verbose"],
|
|
106
|
+
)
|
|
107
|
+
cmd = adapter.build_command("do work", tmp_path)
|
|
108
|
+
assert cmd == ["my-cli", "--headless", "do work", "--verbose"]
|
|
109
|
+
|
|
110
|
+
def test_parse_output_text_format(self):
|
|
111
|
+
adapter = ConfigAdapter(
|
|
112
|
+
name="custom",
|
|
113
|
+
command="my-cli",
|
|
114
|
+
args=["{prompt}"],
|
|
115
|
+
output_format="text",
|
|
116
|
+
)
|
|
117
|
+
text, cost = adapter.parse_output(" some output \n")
|
|
118
|
+
assert text == "some output"
|
|
119
|
+
assert cost == {}
|
|
120
|
+
|
|
121
|
+
def test_parse_output_json_format(self):
|
|
122
|
+
adapter = ConfigAdapter(
|
|
123
|
+
name="custom",
|
|
124
|
+
command="my-cli",
|
|
125
|
+
args=["{prompt}"],
|
|
126
|
+
output_format="json",
|
|
127
|
+
json_result_key="answer",
|
|
128
|
+
json_cost_key="price",
|
|
129
|
+
)
|
|
130
|
+
raw = json.dumps({"answer": "42", "price": {"total": 0.05}})
|
|
131
|
+
text, cost = adapter.parse_output(raw)
|
|
132
|
+
assert text == "42"
|
|
133
|
+
assert cost == {"total": 0.05}
|
|
134
|
+
|
|
135
|
+
def test_parse_output_json_format_invalid(self):
|
|
136
|
+
adapter = ConfigAdapter(
|
|
137
|
+
name="custom",
|
|
138
|
+
command="my-cli",
|
|
139
|
+
args=["{prompt}"],
|
|
140
|
+
output_format="json",
|
|
141
|
+
)
|
|
142
|
+
text, cost = adapter.parse_output("not json")
|
|
143
|
+
assert text == "not json"
|
|
144
|
+
assert cost == {}
|
|
145
|
+
|
|
146
|
+
def test_parse_output_json_missing_keys(self):
|
|
147
|
+
adapter = ConfigAdapter(
|
|
148
|
+
name="custom",
|
|
149
|
+
command="my-cli",
|
|
150
|
+
args=["{prompt}"],
|
|
151
|
+
output_format="json",
|
|
152
|
+
json_result_key="answer",
|
|
153
|
+
json_cost_key="price",
|
|
154
|
+
)
|
|
155
|
+
raw = json.dumps({"unrelated": "data"})
|
|
156
|
+
text, cost = adapter.parse_output(raw)
|
|
157
|
+
assert text == raw # falls back to raw
|
|
158
|
+
assert cost == {}
|
|
159
|
+
|
|
160
|
+
def test_is_agent_adapter(self):
|
|
161
|
+
adapter = ConfigAdapter(name="x", command="x", args=[])
|
|
162
|
+
assert isinstance(adapter, AgentAdapter)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class TestGetAdapter:
|
|
166
|
+
def test_returns_claude_adapter(self):
|
|
167
|
+
adapter = get_adapter("claude")
|
|
168
|
+
assert isinstance(adapter, ClaudeAdapter)
|
|
169
|
+
|
|
170
|
+
def test_returns_codex_adapter(self):
|
|
171
|
+
adapter = get_adapter("codex")
|
|
172
|
+
assert isinstance(adapter, CodexAdapter)
|
|
173
|
+
|
|
174
|
+
def test_returns_generic_for_unknown(self):
|
|
175
|
+
adapter = get_adapter("some-random-tool")
|
|
176
|
+
assert isinstance(adapter, GenericAdapter)
|
|
177
|
+
assert adapter.name == "some-random-tool"
|
|
178
|
+
|
|
179
|
+
def test_config_path_not_exists_falls_through(self, tmp_path):
|
|
180
|
+
adapter = get_adapter("claude", config_path=tmp_path / "nonexistent.yaml")
|
|
181
|
+
assert isinstance(adapter, ClaudeAdapter)
|
|
182
|
+
|
|
183
|
+
def test_config_adapter_from_yaml(self, tmp_path):
|
|
184
|
+
config_file = tmp_path / "agents.yaml"
|
|
185
|
+
config_file.write_text(
|
|
186
|
+
"agents:\n"
|
|
187
|
+
" my-agent:\n"
|
|
188
|
+
" command: my-agent-cli\n"
|
|
189
|
+
" args:\n"
|
|
190
|
+
" - '--headless'\n"
|
|
191
|
+
" - '{prompt}'\n"
|
|
192
|
+
" output_format: json\n"
|
|
193
|
+
" json_result_key: result\n"
|
|
194
|
+
" json_cost_key: cost_usd\n"
|
|
195
|
+
)
|
|
196
|
+
adapter = get_adapter("my-agent", config_path=config_file)
|
|
197
|
+
assert isinstance(adapter, ConfigAdapter)
|
|
198
|
+
assert adapter.name == "my-agent"
|
|
199
|
+
assert adapter.command == "my-agent-cli"
|
|
200
|
+
assert adapter.output_format == "json"
|
|
201
|
+
|
|
202
|
+
cmd = adapter.build_command("test prompt", tmp_path)
|
|
203
|
+
assert cmd == ["my-agent-cli", "--headless", "test prompt"]
|
|
204
|
+
|
|
205
|
+
def test_config_agent_not_in_yaml_falls_through(self, tmp_path):
|
|
206
|
+
config_file = tmp_path / "agents.yaml"
|
|
207
|
+
config_file.write_text("agents:\n other-agent:\n command: other\n")
|
|
208
|
+
adapter = get_adapter("claude", config_path=config_file)
|
|
209
|
+
assert isinstance(adapter, ClaudeAdapter)
|
|
210
|
+
|
|
211
|
+
def test_config_overrides_builtin(self, tmp_path):
|
|
212
|
+
"""Config entry for 'claude' should override the built-in ClaudeAdapter."""
|
|
213
|
+
config_file = tmp_path / "agents.yaml"
|
|
214
|
+
config_file.write_text(
|
|
215
|
+
"agents:\n"
|
|
216
|
+
" claude:\n"
|
|
217
|
+
" command: custom-claude\n"
|
|
218
|
+
" args: ['{prompt}']\n"
|
|
219
|
+
)
|
|
220
|
+
adapter = get_adapter("claude", config_path=config_file)
|
|
221
|
+
assert isinstance(adapter, ConfigAdapter)
|
|
222
|
+
assert adapter.command == "custom-claude"
|
|
223
|
+
|
|
224
|
+
def test_config_defaults(self, tmp_path):
|
|
225
|
+
"""Minimal config entry should get sensible defaults."""
|
|
226
|
+
config_file = tmp_path / "agents.yaml"
|
|
227
|
+
config_file.write_text("agents:\n minimal:\n command: min-cli\n")
|
|
228
|
+
adapter = get_adapter("minimal", config_path=config_file)
|
|
229
|
+
assert isinstance(adapter, ConfigAdapter)
|
|
230
|
+
assert adapter.args == ["{prompt}"]
|
|
231
|
+
assert adapter.output_format == "text"
|
|
232
|
+
assert adapter.json_result_key == "result"
|
|
233
|
+
assert adapter.json_cost_key == "cost_usd"
|
tests/test_agents.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Tests for the agents module — spawning, prompts, and result parsing."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from unittest.mock import AsyncMock, patch, MagicMock
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from workbench.agents import AgentResult, Role, TaskStatus, build_prompt, run_agent
|
|
10
|
+
from workbench.plan_parser import Task
|
|
11
|
+
from workbench.worktree import Worktree
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def sample_task():
|
|
16
|
+
return Task(id="task-1", title="Test Task", description="Do something", files=["src/foo.py"])
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def sample_worktree(tmp_path):
|
|
21
|
+
return Worktree(path=tmp_path, branch="wb/test-task", task_id="task-1")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestRunAgentTmux:
|
|
25
|
+
def test_run_agent_success_tmux(self, sample_task, sample_worktree, tmp_path):
|
|
26
|
+
"""Mock run_in_tmux returning (0, json), verify AgentResult.status == DONE."""
|
|
27
|
+
output = json.dumps({"result": "all good", "cost_usd": {"input": 0.01}})
|
|
28
|
+
with patch("workbench.agents.run_in_tmux", new_callable=AsyncMock, return_value=(0, output)), \
|
|
29
|
+
patch("workbench.agents.get_main_branch", return_value="main"), \
|
|
30
|
+
patch("workbench.agents.get_diff", return_value=""):
|
|
31
|
+
result = asyncio.run(run_agent(
|
|
32
|
+
role=Role.IMPLEMENTOR,
|
|
33
|
+
task=sample_task,
|
|
34
|
+
worktree=sample_worktree,
|
|
35
|
+
repo=tmp_path,
|
|
36
|
+
agent_cmd="claude",
|
|
37
|
+
use_tmux=True,
|
|
38
|
+
))
|
|
39
|
+
|
|
40
|
+
assert result.status == TaskStatus.DONE
|
|
41
|
+
assert result.task_id == "task-1"
|
|
42
|
+
|
|
43
|
+
def test_run_agent_failure_tmux(self, sample_task, sample_worktree, tmp_path):
|
|
44
|
+
"""Mock returning (1, "error"), verify FAILED."""
|
|
45
|
+
with patch("workbench.agents.run_in_tmux", new_callable=AsyncMock, return_value=(1, "error")), \
|
|
46
|
+
patch("workbench.agents.get_main_branch", return_value="main"), \
|
|
47
|
+
patch("workbench.agents.get_diff", return_value=""):
|
|
48
|
+
result = asyncio.run(run_agent(
|
|
49
|
+
role=Role.IMPLEMENTOR,
|
|
50
|
+
task=sample_task,
|
|
51
|
+
worktree=sample_worktree,
|
|
52
|
+
repo=tmp_path,
|
|
53
|
+
agent_cmd="claude",
|
|
54
|
+
use_tmux=True,
|
|
55
|
+
))
|
|
56
|
+
|
|
57
|
+
assert result.status == TaskStatus.FAILED
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class TestRunAgentSubprocess:
|
|
61
|
+
def test_run_agent_no_tmux(self, sample_task, sample_worktree, tmp_path):
|
|
62
|
+
"""use_tmux=False, mock create_subprocess_exec, verify it's called."""
|
|
63
|
+
mock_proc = AsyncMock()
|
|
64
|
+
mock_proc.communicate.return_value = (b"done", b"")
|
|
65
|
+
mock_proc.returncode = 0
|
|
66
|
+
|
|
67
|
+
with patch("workbench.agents.asyncio.create_subprocess_exec", new_callable=AsyncMock, return_value=mock_proc) as mock_exec, \
|
|
68
|
+
patch("workbench.agents.get_main_branch", return_value="main"), \
|
|
69
|
+
patch("workbench.agents.get_diff", return_value=""):
|
|
70
|
+
result = asyncio.run(run_agent(
|
|
71
|
+
role=Role.IMPLEMENTOR,
|
|
72
|
+
task=sample_task,
|
|
73
|
+
worktree=sample_worktree,
|
|
74
|
+
repo=tmp_path,
|
|
75
|
+
agent_cmd="claude",
|
|
76
|
+
use_tmux=False,
|
|
77
|
+
))
|
|
78
|
+
|
|
79
|
+
mock_exec.assert_called_once()
|
|
80
|
+
assert result.status == TaskStatus.DONE
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TestPromptBuilding:
|
|
84
|
+
def test_implementor_prompt_has_branch(self, sample_task, sample_worktree):
|
|
85
|
+
"""Capture the prompt passed to adapter, verify branch name present."""
|
|
86
|
+
with patch("workbench.agents.get_diff", return_value=""):
|
|
87
|
+
prompt = build_prompt(
|
|
88
|
+
role=Role.IMPLEMENTOR,
|
|
89
|
+
task=sample_task,
|
|
90
|
+
worktree=sample_worktree,
|
|
91
|
+
base_branch="main",
|
|
92
|
+
)
|
|
93
|
+
assert "wb/test-task" in prompt
|
|
94
|
+
|
|
95
|
+
def test_fixer_prompt_has_branch(self, sample_task, sample_worktree):
|
|
96
|
+
"""Same for fixer role."""
|
|
97
|
+
with patch("workbench.agents.get_diff", return_value="some diff"):
|
|
98
|
+
prompt = build_prompt(
|
|
99
|
+
role=Role.FIXER,
|
|
100
|
+
task=sample_task,
|
|
101
|
+
worktree=sample_worktree,
|
|
102
|
+
base_branch="main",
|
|
103
|
+
)
|
|
104
|
+
assert "wb/test-task" in prompt
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class TestAgentResult:
|
|
108
|
+
def test_agent_result_passed(self):
|
|
109
|
+
"""AgentResult with 'VERDICT: PASS' → .passed == True."""
|
|
110
|
+
result = AgentResult(
|
|
111
|
+
task_id="task-1",
|
|
112
|
+
role=Role.TESTER,
|
|
113
|
+
status=TaskStatus.DONE,
|
|
114
|
+
output="All tests passed.\nVERDICT: PASS",
|
|
115
|
+
)
|
|
116
|
+
assert result.passed is True
|
|
117
|
+
|
|
118
|
+
def test_agent_result_failed_verdict(self):
|
|
119
|
+
"""'VERDICT: FAIL' → .passed == False."""
|
|
120
|
+
result = AgentResult(
|
|
121
|
+
task_id="task-1",
|
|
122
|
+
role=Role.TESTER,
|
|
123
|
+
status=TaskStatus.DONE,
|
|
124
|
+
output="Some tests failed.\nVERDICT: FAIL",
|
|
125
|
+
)
|
|
126
|
+
assert result.passed is False
|
|
127
|
+
|
|
128
|
+
def test_agent_result_feedback(self):
|
|
129
|
+
"""Text before VERDICT line extracted by .feedback."""
|
|
130
|
+
result = AgentResult(
|
|
131
|
+
task_id="task-1",
|
|
132
|
+
role=Role.REVIEWER,
|
|
133
|
+
status=TaskStatus.DONE,
|
|
134
|
+
output="Missing error handling in foo().\nNeeds type hints.\nVERDICT: FAIL",
|
|
135
|
+
)
|
|
136
|
+
assert "Missing error handling" in result.feedback
|
|
137
|
+
assert "Needs type hints" in result.feedback
|
|
138
|
+
assert "VERDICT" not in result.feedback
|