zwarm 0.1.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zwarm-0.1.0 → zwarm-1.0.0}/PKG-INFO +162 -36
- {zwarm-0.1.0 → zwarm-1.0.0}/README.md +161 -35
- {zwarm-0.1.0 → zwarm-1.0.0}/pyproject.toml +1 -1
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/claude_code.py +55 -3
- zwarm-1.0.0/src/zwarm/adapters/codex_mcp.py +739 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/test_codex_mcp.py +26 -26
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/cli/main.py +464 -3
- zwarm-1.0.0/src/zwarm/core/compact.py +312 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/config.py +51 -9
- zwarm-1.0.0/src/zwarm/core/environment.py +154 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/models.py +16 -0
- zwarm-1.0.0/src/zwarm/core/test_compact.py +266 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/orchestrator.py +222 -39
- zwarm-1.0.0/src/zwarm/prompts/orchestrator.py +196 -0
- zwarm-1.0.0/src/zwarm/test_orchestrator_watchers.py +23 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/tools/delegation.py +23 -4
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/builtin.py +90 -4
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/manager.py +46 -8
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/test_watchers.py +42 -0
- zwarm-0.1.0/src/zwarm/adapters/codex_mcp.py +0 -428
- zwarm-0.1.0/src/zwarm/core/environment.py +0 -83
- zwarm-0.1.0/src/zwarm/prompts/orchestrator.py +0 -214
- {zwarm-0.1.0 → zwarm-1.0.0}/.gitignore +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/base.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/cli/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/state.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/test_config.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/test_models.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/prompts/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/tools/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/__init__.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/base.py +0 -0
- {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/registry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zwarm
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Multi-Agent CLI Orchestration Research Platform
|
|
5
5
|
Requires-Python: <3.14,>=3.13
|
|
6
6
|
Requires-Dist: python-dotenv>=1.0.0
|
|
@@ -12,7 +12,17 @@ Description-Content-Type: text/markdown
|
|
|
12
12
|
|
|
13
13
|
# zwarm
|
|
14
14
|
|
|
15
|
-
Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, and
|
|
15
|
+
Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, trajectory alignment, and automatic context management.
|
|
16
|
+
|
|
17
|
+
## Key Features
|
|
18
|
+
|
|
19
|
+
- **Multi-adapter support**: Codex MCP, Claude Code adapters with unified interface
|
|
20
|
+
- **Sync & async modes**: Conversational (iterative refinement) or fire-and-forget
|
|
21
|
+
- **Token tracking**: Per-session token usage tracked and persisted for cost analysis
|
|
22
|
+
- **Context compaction**: Automatic LRU-style pruning when approaching context limits
|
|
23
|
+
- **Trajectory watchers**: Composable guardrails (progress, budget, scope, pattern, delegation)
|
|
24
|
+
- **State persistence**: Resume sessions, track history, replay events
|
|
25
|
+
- **Weave integration**: Full tracing and observability
|
|
16
26
|
|
|
17
27
|
## Installation
|
|
18
28
|
|
|
@@ -33,16 +43,19 @@ uv pip install -e ./zwarm
|
|
|
33
43
|
## Quick Start
|
|
34
44
|
|
|
35
45
|
```bash
|
|
36
|
-
# 1.
|
|
46
|
+
# 1. Initialize zwarm in your project
|
|
47
|
+
zwarm init
|
|
48
|
+
|
|
49
|
+
# 2. Test an executor directly
|
|
37
50
|
zwarm exec --task "What is 2+2?"
|
|
38
51
|
|
|
39
|
-
#
|
|
52
|
+
# 3. Run the orchestrator with a task
|
|
40
53
|
zwarm orchestrate --task "Create a hello world Python function"
|
|
41
54
|
|
|
42
|
-
#
|
|
55
|
+
# 4. Check state after running
|
|
43
56
|
zwarm status
|
|
44
57
|
|
|
45
|
-
#
|
|
58
|
+
# 5. View event history
|
|
46
59
|
zwarm history
|
|
47
60
|
```
|
|
48
61
|
|
|
@@ -80,24 +93,38 @@ adapter = "codex_mcp" # or "claude_code"
|
|
|
80
93
|
### Environment Variables
|
|
81
94
|
|
|
82
95
|
```bash
|
|
83
|
-
#
|
|
96
|
+
# Weave tracing (optional but recommended)
|
|
84
97
|
export WEAVE_PROJECT="your-entity/zwarm"
|
|
85
98
|
|
|
86
|
-
#
|
|
87
|
-
export OPENAI_API_KEY="
|
|
88
|
-
export ANTHROPIC_API_KEY="
|
|
99
|
+
# Executor authentication (required - set based on which adapter you use)
|
|
100
|
+
export OPENAI_API_KEY="sk-..." # Required for codex_mcp adapter
|
|
101
|
+
export ANTHROPIC_API_KEY="sk-ant-..." # Required for claude_code adapter
|
|
89
102
|
```
|
|
90
103
|
|
|
104
|
+
**Important:** The orchestrator agent runs with your credentials, but the executor adapters (Codex, Claude Code) need their own authentication. If executors fail with auth errors, check that the appropriate API key is set in your environment.
|
|
105
|
+
|
|
106
|
+
You can also put these in a `.env` file in your project root - zwarm will load it automatically.
|
|
107
|
+
|
|
91
108
|
### Full Configuration Reference
|
|
92
109
|
|
|
93
110
|
```yaml
|
|
94
111
|
# config.yaml
|
|
95
112
|
orchestrator:
|
|
113
|
+
lm: gpt-5-mini # Model for the orchestrator itself
|
|
96
114
|
max_steps: 100 # Maximum orchestrator steps
|
|
115
|
+
compaction: # Context window management
|
|
116
|
+
enabled: true
|
|
117
|
+
max_tokens: 100000 # Trigger compaction above this
|
|
118
|
+
threshold_pct: 0.85 # Compact at 85% of max
|
|
119
|
+
target_pct: 0.7 # Target 70% after compaction
|
|
120
|
+
keep_first_n: 2 # Always keep system + task
|
|
121
|
+
keep_last_n: 10 # Always keep recent context
|
|
97
122
|
|
|
98
123
|
executor:
|
|
99
124
|
adapter: codex_mcp # Default adapter: codex_mcp | claude_code
|
|
100
|
-
model: null # Model override (adapter
|
|
125
|
+
model: null # Model override (null = use adapter default)
|
|
126
|
+
# codex_mcp default: gpt-5.1-codex-mini
|
|
127
|
+
# claude_code default: claude-sonnet-4-5-20250514
|
|
101
128
|
sandbox: workspace-write # Codex sandbox mode
|
|
102
129
|
|
|
103
130
|
weave:
|
|
@@ -107,24 +134,25 @@ weave:
|
|
|
107
134
|
state_dir: .zwarm # State directory for sessions/events
|
|
108
135
|
|
|
109
136
|
watchers:
|
|
110
|
-
enabled:
|
|
111
|
-
|
|
112
|
-
progress
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
scope
|
|
118
|
-
|
|
137
|
+
enabled: true
|
|
138
|
+
watchers:
|
|
139
|
+
- name: progress
|
|
140
|
+
- name: budget
|
|
141
|
+
config:
|
|
142
|
+
max_steps: 50
|
|
143
|
+
max_sessions: 10
|
|
144
|
+
- name: scope
|
|
145
|
+
config:
|
|
146
|
+
keywords: []
|
|
119
147
|
```
|
|
120
148
|
|
|
121
149
|
## Adapters
|
|
122
150
|
|
|
123
|
-
zwarm supports multiple CLI coding agents through adapters.
|
|
151
|
+
zwarm supports multiple CLI coding agents through adapters. Each adapter wraps a different coding CLI and handles the mechanics of starting sessions, sending messages, and capturing responses.
|
|
124
152
|
|
|
125
153
|
### Codex MCP (default)
|
|
126
154
|
|
|
127
|
-
Uses Codex via MCP server for true conversational sessions.
|
|
155
|
+
Uses Codex via MCP server for true conversational sessions. This is the recommended adapter for iterative work where you need back-and-forth refinement.
|
|
128
156
|
|
|
129
157
|
```bash
|
|
130
158
|
# Sync mode (conversational)
|
|
@@ -134,17 +162,45 @@ zwarm exec --adapter codex_mcp --task "Add a login function"
|
|
|
134
162
|
# using delegate() and converse() tools
|
|
135
163
|
```
|
|
136
164
|
|
|
137
|
-
|
|
165
|
+
| Setting | Value |
|
|
166
|
+
|---------|-------|
|
|
167
|
+
| Default model | `gpt-5.1-codex-mini` |
|
|
168
|
+
| Requires | `codex` CLI installed |
|
|
169
|
+
| Auth | `OPENAI_API_KEY` environment variable |
|
|
138
170
|
|
|
139
171
|
### Claude Code
|
|
140
172
|
|
|
141
|
-
Uses Claude Code CLI for execution.
|
|
173
|
+
Uses Claude Code CLI for execution. Good alternative when you want Claude's capabilities.
|
|
142
174
|
|
|
143
175
|
```bash
|
|
144
176
|
zwarm exec --adapter claude_code --task "Fix the type errors"
|
|
145
177
|
```
|
|
146
178
|
|
|
147
|
-
|
|
179
|
+
| Setting | Value |
|
|
180
|
+
|---------|-------|
|
|
181
|
+
| Default model | `claude-sonnet-4-5-20250514` |
|
|
182
|
+
| Requires | `claude` CLI installed and authenticated |
|
|
183
|
+
| Auth | `ANTHROPIC_API_KEY` or `claude` CLI auth |
|
|
184
|
+
|
|
185
|
+
### Model Selection
|
|
186
|
+
|
|
187
|
+
Models are selected with this precedence (highest to lowest):
|
|
188
|
+
|
|
189
|
+
1. **Per-delegation override**: `delegate(task="...", model="o3")`
|
|
190
|
+
2. **Config file**: `executor.model` in config.toml or zwarm.yaml
|
|
191
|
+
3. **Adapter default**: Each adapter has a sensible default
|
|
192
|
+
|
|
193
|
+
```yaml
|
|
194
|
+
# config.toml - override the default model
|
|
195
|
+
[executor]
|
|
196
|
+
adapter = "codex_mcp"
|
|
197
|
+
model = "gpt-5.1-codex-max" # Use the more capable model
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
# Or override per-execution
|
|
202
|
+
zwarm exec --model gpt-5.1-codex-max --task "Complex refactoring"
|
|
203
|
+
```
|
|
148
204
|
|
|
149
205
|
## Watchers (Trajectory Alignment)
|
|
150
206
|
|
|
@@ -155,10 +211,11 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
|
|
|
155
211
|
| Watcher | Description |
|
|
156
212
|
|---------|-------------|
|
|
157
213
|
| `progress` | Detects stuck/spinning agents |
|
|
158
|
-
| `budget` | Monitors step/session limits |
|
|
214
|
+
| `budget` | Monitors step/session limits (counts only active sessions) |
|
|
159
215
|
| `scope` | Detects scope creep from original task |
|
|
160
216
|
| `pattern` | Custom regex pattern matching |
|
|
161
217
|
| `quality` | Code quality checks |
|
|
218
|
+
| `delegation` | Ensures orchestrator delegates instead of writing code directly |
|
|
162
219
|
|
|
163
220
|
### Enabling Watchers
|
|
164
221
|
|
|
@@ -216,6 +273,41 @@ View traces at: `https://wandb.ai/your-entity/zwarm/weave`
|
|
|
216
273
|
|
|
217
274
|
## CLI Reference
|
|
218
275
|
|
|
276
|
+
### init
|
|
277
|
+
|
|
278
|
+
Initialize zwarm in a project directory.
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
zwarm init [OPTIONS]
|
|
282
|
+
|
|
283
|
+
Options:
|
|
284
|
+
-w, --working-dir PATH Working directory [default: .]
|
|
285
|
+
-y, --yes Accept defaults, no prompts
|
|
286
|
+
--with-project Also create zwarm.yaml project config
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**What it creates:**
|
|
290
|
+
|
|
291
|
+
1. `config.toml` - User settings (Weave project, adapter preferences, watchers)
|
|
292
|
+
2. `.zwarm/` - State directory for sessions and events
|
|
293
|
+
3. `zwarm.yaml` (optional) - Project-specific task configuration
|
|
294
|
+
|
|
295
|
+
**Examples:**
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
# Interactive setup with prompts
|
|
299
|
+
zwarm init
|
|
300
|
+
|
|
301
|
+
# Non-interactive with defaults
|
|
302
|
+
zwarm init --yes
|
|
303
|
+
|
|
304
|
+
# Create project config too
|
|
305
|
+
zwarm init --with-project
|
|
306
|
+
|
|
307
|
+
# Initialize in a different directory
|
|
308
|
+
zwarm init --working-dir /path/to/project
|
|
309
|
+
```
|
|
310
|
+
|
|
219
311
|
### orchestrate
|
|
220
312
|
|
|
221
313
|
Start an orchestrator session to delegate tasks.
|
|
@@ -234,19 +326,20 @@ Options:
|
|
|
234
326
|
|
|
235
327
|
### exec
|
|
236
328
|
|
|
237
|
-
Run a single executor directly (for testing).
|
|
329
|
+
Run a single executor directly (for testing). This bypasses the orchestrator entirely and hits the adapter (Codex/Claude) immediately with your task - useful for verifying adapters work before running full orchestration.
|
|
238
330
|
|
|
239
331
|
```bash
|
|
240
332
|
zwarm exec [OPTIONS]
|
|
241
333
|
|
|
242
334
|
Options:
|
|
243
335
|
-t, --task TEXT Task to execute
|
|
244
|
-
-f, --task-file PATH Read task from file
|
|
245
336
|
--adapter TEXT Adapter to use [default: codex_mcp]
|
|
246
337
|
--model TEXT Model override
|
|
247
338
|
--mode [sync|async] Execution mode [default: sync]
|
|
248
339
|
```
|
|
249
340
|
|
|
341
|
+
**Note:** Unlike `orchestrate`, this does NOT use watchers, compaction, state persistence, or multi-step planning. It's a single direct call to the executor.
|
|
342
|
+
|
|
250
343
|
### status
|
|
251
344
|
|
|
252
345
|
Show current orchestrator state.
|
|
@@ -282,6 +375,30 @@ zwarm configs list # List available configs
|
|
|
282
375
|
zwarm configs show NAME # Show config contents
|
|
283
376
|
```
|
|
284
377
|
|
|
378
|
+
### clean
|
|
379
|
+
|
|
380
|
+
Clean up zwarm state (useful for starting fresh).
|
|
381
|
+
|
|
382
|
+
```bash
|
|
383
|
+
zwarm clean [OPTIONS]
|
|
384
|
+
|
|
385
|
+
Options:
|
|
386
|
+
--all Remove everything (events, sessions, state)
|
|
387
|
+
--events Remove only events
|
|
388
|
+
--sessions Remove only sessions
|
|
389
|
+
-y, --yes Skip confirmation prompt
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
**Examples:**
|
|
393
|
+
|
|
394
|
+
```bash
|
|
395
|
+
# Clean everything and start fresh
|
|
396
|
+
zwarm clean --all --yes
|
|
397
|
+
|
|
398
|
+
# Clean only events log
|
|
399
|
+
zwarm clean --events
|
|
400
|
+
```
|
|
401
|
+
|
|
285
402
|
## Architecture
|
|
286
403
|
|
|
287
404
|
```
|
|
@@ -332,10 +449,16 @@ All state is stored in flat files under `.zwarm/`:
|
|
|
332
449
|
### Running Tests
|
|
333
450
|
|
|
334
451
|
```bash
|
|
335
|
-
#
|
|
336
|
-
uv run pytest
|
|
452
|
+
# Run all zwarm tests (68 tests)
|
|
453
|
+
uv run pytest src/zwarm/ -v
|
|
454
|
+
|
|
455
|
+
# Run specific test modules
|
|
456
|
+
uv run pytest src/zwarm/core/test_compact.py -v # Context compaction
|
|
457
|
+
uv run pytest src/zwarm/watchers/test_watchers.py -v # Watchers
|
|
458
|
+
uv run pytest src/zwarm/adapters/test_codex_mcp.py -v # Codex adapter
|
|
337
459
|
|
|
338
|
-
#
|
|
460
|
+
# Run integration tests (requires codex CLI)
|
|
461
|
+
uv run pytest -m integration
|
|
339
462
|
```
|
|
340
463
|
|
|
341
464
|
### Project Structure
|
|
@@ -345,19 +468,22 @@ zwarm/
|
|
|
345
468
|
├── src/zwarm/
|
|
346
469
|
│ ├── adapters/ # Executor adapters
|
|
347
470
|
│ │ ├── base.py # ExecutorAdapter protocol
|
|
348
|
-
│ │ ├── codex_mcp.py # Codex MCP adapter
|
|
349
|
-
│ │ └── claude_code.py # Claude Code adapter
|
|
471
|
+
│ │ ├── codex_mcp.py # Codex MCP adapter (with token tracking)
|
|
472
|
+
│ │ └── claude_code.py # Claude Code adapter (with token tracking)
|
|
350
473
|
│ ├── cli/
|
|
351
474
|
│ │ └── main.py # Typer CLI
|
|
352
475
|
│ ├── core/
|
|
476
|
+
│ │ ├── compact.py # Context window compaction (LRU pruning)
|
|
353
477
|
│ │ ├── config.py # Configuration loading
|
|
354
|
-
│ │ ├──
|
|
478
|
+
│ │ ├── environment.py # OrchestratorEnv (progress display)
|
|
479
|
+
│ │ ├── models.py # ConversationSession, Message, Event, etc.
|
|
355
480
|
│ │ └── state.py # Flat-file state management
|
|
356
481
|
│ ├── tools/
|
|
357
|
-
│ │ └── delegation.py # delegate, converse, etc.
|
|
482
|
+
│ │ └── delegation.py # delegate, converse, check_session, etc.
|
|
358
483
|
│ ├── watchers/
|
|
359
484
|
│ │ ├── base.py # Watcher protocol
|
|
360
|
-
│ │ ├── builtin.py # Built-in watchers
|
|
485
|
+
│ │ ├── builtin.py # Built-in watchers (progress, budget, scope, etc.)
|
|
486
|
+
│ │ ├── registry.py # Watcher registration
|
|
361
487
|
│ │ └── manager.py # WatcherManager
|
|
362
488
|
│ ├── prompts/
|
|
363
489
|
│ │ └── orchestrator.py # Orchestrator system prompt
|
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
# zwarm
|
|
2
2
|
|
|
3
|
-
Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, and
|
|
3
|
+
Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, trajectory alignment, and automatic context management.
|
|
4
|
+
|
|
5
|
+
## Key Features
|
|
6
|
+
|
|
7
|
+
- **Multi-adapter support**: Codex MCP, Claude Code adapters with unified interface
|
|
8
|
+
- **Sync & async modes**: Conversational (iterative refinement) or fire-and-forget
|
|
9
|
+
- **Token tracking**: Per-session token usage tracked and persisted for cost analysis
|
|
10
|
+
- **Context compaction**: Automatic LRU-style pruning when approaching context limits
|
|
11
|
+
- **Trajectory watchers**: Composable guardrails (progress, budget, scope, pattern, delegation)
|
|
12
|
+
- **State persistence**: Resume sessions, track history, replay events
|
|
13
|
+
- **Weave integration**: Full tracing and observability
|
|
4
14
|
|
|
5
15
|
## Installation
|
|
6
16
|
|
|
@@ -21,16 +31,19 @@ uv pip install -e ./zwarm
|
|
|
21
31
|
## Quick Start
|
|
22
32
|
|
|
23
33
|
```bash
|
|
24
|
-
# 1.
|
|
34
|
+
# 1. Initialize zwarm in your project
|
|
35
|
+
zwarm init
|
|
36
|
+
|
|
37
|
+
# 2. Test an executor directly
|
|
25
38
|
zwarm exec --task "What is 2+2?"
|
|
26
39
|
|
|
27
|
-
#
|
|
40
|
+
# 3. Run the orchestrator with a task
|
|
28
41
|
zwarm orchestrate --task "Create a hello world Python function"
|
|
29
42
|
|
|
30
|
-
#
|
|
43
|
+
# 4. Check state after running
|
|
31
44
|
zwarm status
|
|
32
45
|
|
|
33
|
-
#
|
|
46
|
+
# 5. View event history
|
|
34
47
|
zwarm history
|
|
35
48
|
```
|
|
36
49
|
|
|
@@ -68,24 +81,38 @@ adapter = "codex_mcp" # or "claude_code"
|
|
|
68
81
|
### Environment Variables
|
|
69
82
|
|
|
70
83
|
```bash
|
|
71
|
-
#
|
|
84
|
+
# Weave tracing (optional but recommended)
|
|
72
85
|
export WEAVE_PROJECT="your-entity/zwarm"
|
|
73
86
|
|
|
74
|
-
#
|
|
75
|
-
export OPENAI_API_KEY="
|
|
76
|
-
export ANTHROPIC_API_KEY="
|
|
87
|
+
# Executor authentication (required - set based on which adapter you use)
|
|
88
|
+
export OPENAI_API_KEY="sk-..." # Required for codex_mcp adapter
|
|
89
|
+
export ANTHROPIC_API_KEY="sk-ant-..." # Required for claude_code adapter
|
|
77
90
|
```
|
|
78
91
|
|
|
92
|
+
**Important:** The orchestrator agent runs with your credentials, but the executor adapters (Codex, Claude Code) need their own authentication. If executors fail with auth errors, check that the appropriate API key is set in your environment.
|
|
93
|
+
|
|
94
|
+
You can also put these in a `.env` file in your project root - zwarm will load it automatically.
|
|
95
|
+
|
|
79
96
|
### Full Configuration Reference
|
|
80
97
|
|
|
81
98
|
```yaml
|
|
82
99
|
# config.yaml
|
|
83
100
|
orchestrator:
|
|
101
|
+
lm: gpt-5-mini # Model for the orchestrator itself
|
|
84
102
|
max_steps: 100 # Maximum orchestrator steps
|
|
103
|
+
compaction: # Context window management
|
|
104
|
+
enabled: true
|
|
105
|
+
max_tokens: 100000 # Trigger compaction above this
|
|
106
|
+
threshold_pct: 0.85 # Compact at 85% of max
|
|
107
|
+
target_pct: 0.7 # Target 70% after compaction
|
|
108
|
+
keep_first_n: 2 # Always keep system + task
|
|
109
|
+
keep_last_n: 10 # Always keep recent context
|
|
85
110
|
|
|
86
111
|
executor:
|
|
87
112
|
adapter: codex_mcp # Default adapter: codex_mcp | claude_code
|
|
88
|
-
model: null # Model override (adapter
|
|
113
|
+
model: null # Model override (null = use adapter default)
|
|
114
|
+
# codex_mcp default: gpt-5.1-codex-mini
|
|
115
|
+
# claude_code default: claude-sonnet-4-5-20250514
|
|
89
116
|
sandbox: workspace-write # Codex sandbox mode
|
|
90
117
|
|
|
91
118
|
weave:
|
|
@@ -95,24 +122,25 @@ weave:
|
|
|
95
122
|
state_dir: .zwarm # State directory for sessions/events
|
|
96
123
|
|
|
97
124
|
watchers:
|
|
98
|
-
enabled:
|
|
99
|
-
|
|
100
|
-
progress
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
scope
|
|
106
|
-
|
|
125
|
+
enabled: true
|
|
126
|
+
watchers:
|
|
127
|
+
- name: progress
|
|
128
|
+
- name: budget
|
|
129
|
+
config:
|
|
130
|
+
max_steps: 50
|
|
131
|
+
max_sessions: 10
|
|
132
|
+
- name: scope
|
|
133
|
+
config:
|
|
134
|
+
keywords: []
|
|
107
135
|
```
|
|
108
136
|
|
|
109
137
|
## Adapters
|
|
110
138
|
|
|
111
|
-
zwarm supports multiple CLI coding agents through adapters.
|
|
139
|
+
zwarm supports multiple CLI coding agents through adapters. Each adapter wraps a different coding CLI and handles the mechanics of starting sessions, sending messages, and capturing responses.
|
|
112
140
|
|
|
113
141
|
### Codex MCP (default)
|
|
114
142
|
|
|
115
|
-
Uses Codex via MCP server for true conversational sessions.
|
|
143
|
+
Uses Codex via MCP server for true conversational sessions. This is the recommended adapter for iterative work where you need back-and-forth refinement.
|
|
116
144
|
|
|
117
145
|
```bash
|
|
118
146
|
# Sync mode (conversational)
|
|
@@ -122,17 +150,45 @@ zwarm exec --adapter codex_mcp --task "Add a login function"
|
|
|
122
150
|
# using delegate() and converse() tools
|
|
123
151
|
```
|
|
124
152
|
|
|
125
|
-
|
|
153
|
+
| Setting | Value |
|
|
154
|
+
|---------|-------|
|
|
155
|
+
| Default model | `gpt-5.1-codex-mini` |
|
|
156
|
+
| Requires | `codex` CLI installed |
|
|
157
|
+
| Auth | `OPENAI_API_KEY` environment variable |
|
|
126
158
|
|
|
127
159
|
### Claude Code
|
|
128
160
|
|
|
129
|
-
Uses Claude Code CLI for execution.
|
|
161
|
+
Uses Claude Code CLI for execution. Good alternative when you want Claude's capabilities.
|
|
130
162
|
|
|
131
163
|
```bash
|
|
132
164
|
zwarm exec --adapter claude_code --task "Fix the type errors"
|
|
133
165
|
```
|
|
134
166
|
|
|
135
|
-
|
|
167
|
+
| Setting | Value |
|
|
168
|
+
|---------|-------|
|
|
169
|
+
| Default model | `claude-sonnet-4-5-20250514` |
|
|
170
|
+
| Requires | `claude` CLI installed and authenticated |
|
|
171
|
+
| Auth | `ANTHROPIC_API_KEY` or `claude` CLI auth |
|
|
172
|
+
|
|
173
|
+
### Model Selection
|
|
174
|
+
|
|
175
|
+
Models are selected with this precedence (highest to lowest):
|
|
176
|
+
|
|
177
|
+
1. **Per-delegation override**: `delegate(task="...", model="o3")`
|
|
178
|
+
2. **Config file**: `executor.model` in config.toml or zwarm.yaml
|
|
179
|
+
3. **Adapter default**: Each adapter has a sensible default
|
|
180
|
+
|
|
181
|
+
```yaml
|
|
182
|
+
# config.toml - override the default model
|
|
183
|
+
[executor]
|
|
184
|
+
adapter = "codex_mcp"
|
|
185
|
+
model = "gpt-5.1-codex-max" # Use the more capable model
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# Or override per-execution
|
|
190
|
+
zwarm exec --model gpt-5.1-codex-max --task "Complex refactoring"
|
|
191
|
+
```
|
|
136
192
|
|
|
137
193
|
## Watchers (Trajectory Alignment)
|
|
138
194
|
|
|
@@ -143,10 +199,11 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
|
|
|
143
199
|
| Watcher | Description |
|
|
144
200
|
|---------|-------------|
|
|
145
201
|
| `progress` | Detects stuck/spinning agents |
|
|
146
|
-
| `budget` | Monitors step/session limits |
|
|
202
|
+
| `budget` | Monitors step/session limits (counts only active sessions) |
|
|
147
203
|
| `scope` | Detects scope creep from original task |
|
|
148
204
|
| `pattern` | Custom regex pattern matching |
|
|
149
205
|
| `quality` | Code quality checks |
|
|
206
|
+
| `delegation` | Ensures orchestrator delegates instead of writing code directly |
|
|
150
207
|
|
|
151
208
|
### Enabling Watchers
|
|
152
209
|
|
|
@@ -204,6 +261,41 @@ View traces at: `https://wandb.ai/your-entity/zwarm/weave`
|
|
|
204
261
|
|
|
205
262
|
## CLI Reference
|
|
206
263
|
|
|
264
|
+
### init
|
|
265
|
+
|
|
266
|
+
Initialize zwarm in a project directory.
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
zwarm init [OPTIONS]
|
|
270
|
+
|
|
271
|
+
Options:
|
|
272
|
+
-w, --working-dir PATH Working directory [default: .]
|
|
273
|
+
-y, --yes Accept defaults, no prompts
|
|
274
|
+
--with-project Also create zwarm.yaml project config
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**What it creates:**
|
|
278
|
+
|
|
279
|
+
1. `config.toml` - User settings (Weave project, adapter preferences, watchers)
|
|
280
|
+
2. `.zwarm/` - State directory for sessions and events
|
|
281
|
+
3. `zwarm.yaml` (optional) - Project-specific task configuration
|
|
282
|
+
|
|
283
|
+
**Examples:**
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
# Interactive setup with prompts
|
|
287
|
+
zwarm init
|
|
288
|
+
|
|
289
|
+
# Non-interactive with defaults
|
|
290
|
+
zwarm init --yes
|
|
291
|
+
|
|
292
|
+
# Create project config too
|
|
293
|
+
zwarm init --with-project
|
|
294
|
+
|
|
295
|
+
# Initialize in a different directory
|
|
296
|
+
zwarm init --working-dir /path/to/project
|
|
297
|
+
```
|
|
298
|
+
|
|
207
299
|
### orchestrate
|
|
208
300
|
|
|
209
301
|
Start an orchestrator session to delegate tasks.
|
|
@@ -222,19 +314,20 @@ Options:
|
|
|
222
314
|
|
|
223
315
|
### exec
|
|
224
316
|
|
|
225
|
-
Run a single executor directly (for testing).
|
|
317
|
+
Run a single executor directly (for testing). This bypasses the orchestrator entirely and hits the adapter (Codex/Claude) immediately with your task - useful for verifying adapters work before running full orchestration.
|
|
226
318
|
|
|
227
319
|
```bash
|
|
228
320
|
zwarm exec [OPTIONS]
|
|
229
321
|
|
|
230
322
|
Options:
|
|
231
323
|
-t, --task TEXT Task to execute
|
|
232
|
-
-f, --task-file PATH Read task from file
|
|
233
324
|
--adapter TEXT Adapter to use [default: codex_mcp]
|
|
234
325
|
--model TEXT Model override
|
|
235
326
|
--mode [sync|async] Execution mode [default: sync]
|
|
236
327
|
```
|
|
237
328
|
|
|
329
|
+
**Note:** Unlike `orchestrate`, this does NOT use watchers, compaction, state persistence, or multi-step planning. It's a single direct call to the executor.
|
|
330
|
+
|
|
238
331
|
### status
|
|
239
332
|
|
|
240
333
|
Show current orchestrator state.
|
|
@@ -270,6 +363,30 @@ zwarm configs list # List available configs
|
|
|
270
363
|
zwarm configs show NAME # Show config contents
|
|
271
364
|
```
|
|
272
365
|
|
|
366
|
+
### clean
|
|
367
|
+
|
|
368
|
+
Clean up zwarm state (useful for starting fresh).
|
|
369
|
+
|
|
370
|
+
```bash
|
|
371
|
+
zwarm clean [OPTIONS]
|
|
372
|
+
|
|
373
|
+
Options:
|
|
374
|
+
--all Remove everything (events, sessions, state)
|
|
375
|
+
--events Remove only events
|
|
376
|
+
--sessions Remove only sessions
|
|
377
|
+
-y, --yes Skip confirmation prompt
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
**Examples:**
|
|
381
|
+
|
|
382
|
+
```bash
|
|
383
|
+
# Clean everything and start fresh
|
|
384
|
+
zwarm clean --all --yes
|
|
385
|
+
|
|
386
|
+
# Clean only events log
|
|
387
|
+
zwarm clean --events
|
|
388
|
+
```
|
|
389
|
+
|
|
273
390
|
## Architecture
|
|
274
391
|
|
|
275
392
|
```
|
|
@@ -320,10 +437,16 @@ All state is stored in flat files under `.zwarm/`:
|
|
|
320
437
|
### Running Tests
|
|
321
438
|
|
|
322
439
|
```bash
|
|
323
|
-
#
|
|
324
|
-
uv run pytest
|
|
440
|
+
# Run all zwarm tests (68 tests)
|
|
441
|
+
uv run pytest src/zwarm/ -v
|
|
442
|
+
|
|
443
|
+
# Run specific test modules
|
|
444
|
+
uv run pytest src/zwarm/core/test_compact.py -v # Context compaction
|
|
445
|
+
uv run pytest src/zwarm/watchers/test_watchers.py -v # Watchers
|
|
446
|
+
uv run pytest src/zwarm/adapters/test_codex_mcp.py -v # Codex adapter
|
|
325
447
|
|
|
326
|
-
#
|
|
448
|
+
# Run integration tests (requires codex CLI)
|
|
449
|
+
uv run pytest -m integration
|
|
327
450
|
```
|
|
328
451
|
|
|
329
452
|
### Project Structure
|
|
@@ -333,19 +456,22 @@ zwarm/
|
|
|
333
456
|
├── src/zwarm/
|
|
334
457
|
│ ├── adapters/ # Executor adapters
|
|
335
458
|
│ │ ├── base.py # ExecutorAdapter protocol
|
|
336
|
-
│ │ ├── codex_mcp.py # Codex MCP adapter
|
|
337
|
-
│ │ └── claude_code.py # Claude Code adapter
|
|
459
|
+
│ │ ├── codex_mcp.py # Codex MCP adapter (with token tracking)
|
|
460
|
+
│ │ └── claude_code.py # Claude Code adapter (with token tracking)
|
|
338
461
|
│ ├── cli/
|
|
339
462
|
│ │ └── main.py # Typer CLI
|
|
340
463
|
│ ├── core/
|
|
464
|
+
│ │ ├── compact.py # Context window compaction (LRU pruning)
|
|
341
465
|
│ │ ├── config.py # Configuration loading
|
|
342
|
-
│ │ ├──
|
|
466
|
+
│ │ ├── environment.py # OrchestratorEnv (progress display)
|
|
467
|
+
│ │ ├── models.py # ConversationSession, Message, Event, etc.
|
|
343
468
|
│ │ └── state.py # Flat-file state management
|
|
344
469
|
│ ├── tools/
|
|
345
|
-
│ │ └── delegation.py # delegate, converse, etc.
|
|
470
|
+
│ │ └── delegation.py # delegate, converse, check_session, etc.
|
|
346
471
|
│ ├── watchers/
|
|
347
472
|
│ │ ├── base.py # Watcher protocol
|
|
348
|
-
│ │ ├── builtin.py # Built-in watchers
|
|
473
|
+
│ │ ├── builtin.py # Built-in watchers (progress, budget, scope, etc.)
|
|
474
|
+
│ │ ├── registry.py # Watcher registration
|
|
349
475
|
│ │ └── manager.py # WatcherManager
|
|
350
476
|
│ ├── prompts/
|
|
351
477
|
│ │ └── orchestrator.py # Orchestrator system prompt
|