zwarm 0.1.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {zwarm-0.1.0 → zwarm-1.0.0}/PKG-INFO +162 -36
  2. {zwarm-0.1.0 → zwarm-1.0.0}/README.md +161 -35
  3. {zwarm-0.1.0 → zwarm-1.0.0}/pyproject.toml +1 -1
  4. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/claude_code.py +55 -3
  5. zwarm-1.0.0/src/zwarm/adapters/codex_mcp.py +739 -0
  6. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/test_codex_mcp.py +26 -26
  7. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/cli/main.py +464 -3
  8. zwarm-1.0.0/src/zwarm/core/compact.py +312 -0
  9. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/config.py +51 -9
  10. zwarm-1.0.0/src/zwarm/core/environment.py +154 -0
  11. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/models.py +16 -0
  12. zwarm-1.0.0/src/zwarm/core/test_compact.py +266 -0
  13. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/orchestrator.py +222 -39
  14. zwarm-1.0.0/src/zwarm/prompts/orchestrator.py +196 -0
  15. zwarm-1.0.0/src/zwarm/test_orchestrator_watchers.py +23 -0
  16. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/tools/delegation.py +23 -4
  17. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/builtin.py +90 -4
  18. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/manager.py +46 -8
  19. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/test_watchers.py +42 -0
  20. zwarm-0.1.0/src/zwarm/adapters/codex_mcp.py +0 -428
  21. zwarm-0.1.0/src/zwarm/core/environment.py +0 -83
  22. zwarm-0.1.0/src/zwarm/prompts/orchestrator.py +0 -214
  23. {zwarm-0.1.0 → zwarm-1.0.0}/.gitignore +0 -0
  24. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/__init__.py +0 -0
  25. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/__init__.py +0 -0
  26. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/adapters/base.py +0 -0
  27. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/cli/__init__.py +0 -0
  28. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/__init__.py +0 -0
  29. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/state.py +0 -0
  30. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/test_config.py +0 -0
  31. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/core/test_models.py +0 -0
  32. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/prompts/__init__.py +0 -0
  33. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/tools/__init__.py +0 -0
  34. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/__init__.py +0 -0
  35. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/base.py +0 -0
  36. {zwarm-0.1.0 → zwarm-1.0.0}/src/zwarm/watchers/registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 0.1.0
3
+ Version: 1.0.0
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: python-dotenv>=1.0.0
@@ -12,7 +12,17 @@ Description-Content-Type: text/markdown
12
12
 
13
13
  # zwarm
14
14
 
15
- Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, and trajectory alignment.
15
+ Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, trajectory alignment, and automatic context management.
16
+
17
+ ## Key Features
18
+
19
+ - **Multi-adapter support**: Codex MCP, Claude Code adapters with unified interface
20
+ - **Sync & async modes**: Conversational (iterative refinement) or fire-and-forget
21
+ - **Token tracking**: Per-session token usage tracked and persisted for cost analysis
22
+ - **Context compaction**: Automatic LRU-style pruning when approaching context limits
23
+ - **Trajectory watchers**: Composable guardrails (progress, budget, scope, pattern, delegation)
24
+ - **State persistence**: Resume sessions, track history, replay events
25
+ - **Weave integration**: Full tracing and observability
16
26
 
17
27
  ## Installation
18
28
 
@@ -33,16 +43,19 @@ uv pip install -e ./zwarm
33
43
  ## Quick Start
34
44
 
35
45
  ```bash
36
- # 1. Test an executor directly
46
+ # 1. Initialize zwarm in your project
47
+ zwarm init
48
+
49
+ # 2. Test an executor directly
37
50
  zwarm exec --task "What is 2+2?"
38
51
 
39
- # 2. Run the orchestrator with a task
52
+ # 3. Run the orchestrator with a task
40
53
  zwarm orchestrate --task "Create a hello world Python function"
41
54
 
42
- # 3. Check state after running
55
+ # 4. Check state after running
43
56
  zwarm status
44
57
 
45
- # 4. View event history
58
+ # 5. View event history
46
59
  zwarm history
47
60
  ```
48
61
 
@@ -80,24 +93,38 @@ adapter = "codex_mcp" # or "claude_code"
80
93
  ### Environment Variables
81
94
 
82
95
  ```bash
83
- # Enable Weave tracing (alternative to config.toml)
96
+ # Weave tracing (optional but recommended)
84
97
  export WEAVE_PROJECT="your-entity/zwarm"
85
98
 
86
- # Required for adapters
87
- export OPENAI_API_KEY="..." # for Codex
88
- export ANTHROPIC_API_KEY="..." # for Claude Code
99
+ # Executor authentication (required - set based on which adapter you use)
100
+ export OPENAI_API_KEY="sk-..." # Required for codex_mcp adapter
101
+ export ANTHROPIC_API_KEY="sk-ant-..." # Required for claude_code adapter
89
102
  ```
90
103
 
104
+ **Important:** The orchestrator agent runs with your credentials, but the executor adapters (Codex, Claude Code) need their own authentication. If executors fail with auth errors, check that the appropriate API key is set in your environment.
105
+
106
+ You can also put these in a `.env` file in your project root - zwarm will load it automatically.
107
+
91
108
  ### Full Configuration Reference
92
109
 
93
110
  ```yaml
94
111
  # config.yaml
95
112
  orchestrator:
113
+ lm: gpt-5-mini # Model for the orchestrator itself
96
114
  max_steps: 100 # Maximum orchestrator steps
115
+ compaction: # Context window management
116
+ enabled: true
117
+ max_tokens: 100000 # Trigger compaction above this
118
+ threshold_pct: 0.85 # Compact at 85% of max
119
+ target_pct: 0.7 # Target 70% after compaction
120
+ keep_first_n: 2 # Always keep system + task
121
+ keep_last_n: 10 # Always keep recent context
97
122
 
98
123
  executor:
99
124
  adapter: codex_mcp # Default adapter: codex_mcp | claude_code
100
- model: null # Model override (adapter-specific)
125
+ model: null # Model override (null = use adapter default)
126
+ # codex_mcp default: gpt-5.1-codex-mini
127
+ # claude_code default: claude-sonnet-4-5-20250514
101
128
  sandbox: workspace-write # Codex sandbox mode
102
129
 
103
130
  weave:
@@ -107,24 +134,25 @@ weave:
107
134
  state_dir: .zwarm # State directory for sessions/events
108
135
 
109
136
  watchers:
110
- enabled: [] # List of enabled watchers
111
- config:
112
- progress:
113
- stuck_threshold: 5
114
- budget:
115
- max_steps: 50
116
- max_sessions: 10
117
- scope:
118
- keywords: []
137
+ enabled: true
138
+ watchers:
139
+ - name: progress
140
+ - name: budget
141
+ config:
142
+ max_steps: 50
143
+ max_sessions: 10
144
+ - name: scope
145
+ config:
146
+ keywords: []
119
147
  ```
120
148
 
121
149
  ## Adapters
122
150
 
123
- zwarm supports multiple CLI coding agents through adapters.
151
+ zwarm supports multiple CLI coding agents through adapters. Each adapter wraps a different coding CLI and handles the mechanics of starting sessions, sending messages, and capturing responses.
124
152
 
125
153
  ### Codex MCP (default)
126
154
 
127
- Uses Codex via MCP server for true conversational sessions.
155
+ Uses Codex via MCP server for true conversational sessions. This is the recommended adapter for iterative work where you need back-and-forth refinement.
128
156
 
129
157
  ```bash
130
158
  # Sync mode (conversational)
@@ -134,17 +162,45 @@ zwarm exec --adapter codex_mcp --task "Add a login function"
134
162
  # using delegate() and converse() tools
135
163
  ```
136
164
 
137
- **Requires:** `codex` CLI installed, `OPENAI_API_KEY` set
165
+ | Setting | Value |
166
+ |---------|-------|
167
+ | Default model | `gpt-5.1-codex-mini` |
168
+ | Requires | `codex` CLI installed |
169
+ | Auth | `OPENAI_API_KEY` environment variable |
138
170
 
139
171
  ### Claude Code
140
172
 
141
- Uses Claude Code CLI for execution.
173
+ Uses Claude Code CLI for execution. Good alternative when you want Claude's capabilities.
142
174
 
143
175
  ```bash
144
176
  zwarm exec --adapter claude_code --task "Fix the type errors"
145
177
  ```
146
178
 
147
- **Requires:** `claude` CLI installed, authenticated
179
+ | Setting | Value |
180
+ |---------|-------|
181
+ | Default model | `claude-sonnet-4-5-20250514` |
182
+ | Requires | `claude` CLI installed and authenticated |
183
+ | Auth | `ANTHROPIC_API_KEY` or `claude` CLI auth |
184
+
185
+ ### Model Selection
186
+
187
+ Models are selected with this precedence (highest to lowest):
188
+
189
+ 1. **Per-delegation override**: `delegate(task="...", model="o3")`
190
+ 2. **Config file**: `executor.model` in config.toml or zwarm.yaml
191
+ 3. **Adapter default**: Each adapter has a sensible default
192
+
193
+ ```yaml
194
+ # config.toml - override the default model
195
+ [executor]
196
+ adapter = "codex_mcp"
197
+ model = "gpt-5.1-codex-max" # Use the more capable model
198
+ ```
199
+
200
+ ```bash
201
+ # Or override per-execution
202
+ zwarm exec --model gpt-5.1-codex-max --task "Complex refactoring"
203
+ ```
148
204
 
149
205
  ## Watchers (Trajectory Alignment)
150
206
 
@@ -155,10 +211,11 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
155
211
  | Watcher | Description |
156
212
  |---------|-------------|
157
213
  | `progress` | Detects stuck/spinning agents |
158
- | `budget` | Monitors step/session limits |
214
+ | `budget` | Monitors step/session limits (counts only active sessions) |
159
215
  | `scope` | Detects scope creep from original task |
160
216
  | `pattern` | Custom regex pattern matching |
161
217
  | `quality` | Code quality checks |
218
+ | `delegation` | Ensures orchestrator delegates instead of writing code directly |
162
219
 
163
220
  ### Enabling Watchers
164
221
 
@@ -216,6 +273,41 @@ View traces at: `https://wandb.ai/your-entity/zwarm/weave`
216
273
 
217
274
  ## CLI Reference
218
275
 
276
+ ### init
277
+
278
+ Initialize zwarm in a project directory.
279
+
280
+ ```bash
281
+ zwarm init [OPTIONS]
282
+
283
+ Options:
284
+ -w, --working-dir PATH Working directory [default: .]
285
+ -y, --yes Accept defaults, no prompts
286
+ --with-project Also create zwarm.yaml project config
287
+ ```
288
+
289
+ **What it creates:**
290
+
291
+ 1. `config.toml` - User settings (Weave project, adapter preferences, watchers)
292
+ 2. `.zwarm/` - State directory for sessions and events
293
+ 3. `zwarm.yaml` (optional) - Project-specific task configuration
294
+
295
+ **Examples:**
296
+
297
+ ```bash
298
+ # Interactive setup with prompts
299
+ zwarm init
300
+
301
+ # Non-interactive with defaults
302
+ zwarm init --yes
303
+
304
+ # Create project config too
305
+ zwarm init --with-project
306
+
307
+ # Initialize in a different directory
308
+ zwarm init --working-dir /path/to/project
309
+ ```
310
+
219
311
  ### orchestrate
220
312
 
221
313
  Start an orchestrator session to delegate tasks.
@@ -234,19 +326,20 @@ Options:
234
326
 
235
327
  ### exec
236
328
 
237
- Run a single executor directly (for testing).
329
+ Run a single executor directly (for testing). This bypasses the orchestrator entirely and hits the adapter (Codex/Claude) immediately with your task - useful for verifying adapters work before running full orchestration.
238
330
 
239
331
  ```bash
240
332
  zwarm exec [OPTIONS]
241
333
 
242
334
  Options:
243
335
  -t, --task TEXT Task to execute
244
- -f, --task-file PATH Read task from file
245
336
  --adapter TEXT Adapter to use [default: codex_mcp]
246
337
  --model TEXT Model override
247
338
  --mode [sync|async] Execution mode [default: sync]
248
339
  ```
249
340
 
341
+ **Note:** Unlike `orchestrate`, this does NOT use watchers, compaction, state persistence, or multi-step planning. It's a single direct call to the executor.
342
+
250
343
  ### status
251
344
 
252
345
  Show current orchestrator state.
@@ -282,6 +375,30 @@ zwarm configs list # List available configs
282
375
  zwarm configs show NAME # Show config contents
283
376
  ```
284
377
 
378
+ ### clean
379
+
380
+ Clean up zwarm state (useful for starting fresh).
381
+
382
+ ```bash
383
+ zwarm clean [OPTIONS]
384
+
385
+ Options:
386
+ --all Remove everything (events, sessions, state)
387
+ --events Remove only events
388
+ --sessions Remove only sessions
389
+ -y, --yes Skip confirmation prompt
390
+ ```
391
+
392
+ **Examples:**
393
+
394
+ ```bash
395
+ # Clean everything and start fresh
396
+ zwarm clean --all --yes
397
+
398
+ # Clean only events log
399
+ zwarm clean --events
400
+ ```
401
+
285
402
  ## Architecture
286
403
 
287
404
  ```
@@ -332,10 +449,16 @@ All state is stored in flat files under `.zwarm/`:
332
449
  ### Running Tests
333
450
 
334
451
  ```bash
335
- # From workspace root
336
- uv run pytest wbal/tests/ -v
452
+ # Run all zwarm tests (68 tests)
453
+ uv run pytest src/zwarm/ -v
454
+
455
+ # Run specific test modules
456
+ uv run pytest src/zwarm/core/test_compact.py -v # Context compaction
457
+ uv run pytest src/zwarm/watchers/test_watchers.py -v # Watchers
458
+ uv run pytest src/zwarm/adapters/test_codex_mcp.py -v # Codex adapter
337
459
 
338
- # zwarm doesn't have its own tests yet
460
+ # Run integration tests (requires codex CLI)
461
+ uv run pytest -m integration
339
462
  ```
340
463
 
341
464
  ### Project Structure
@@ -345,19 +468,22 @@ zwarm/
345
468
  ├── src/zwarm/
346
469
  │ ├── adapters/ # Executor adapters
347
470
  │ │ ├── base.py # ExecutorAdapter protocol
348
- │ │ ├── codex_mcp.py # Codex MCP adapter
349
- │ │ └── claude_code.py # Claude Code adapter
471
+ │ │ ├── codex_mcp.py # Codex MCP adapter (with token tracking)
472
+ │ │ └── claude_code.py # Claude Code adapter (with token tracking)
350
473
  │ ├── cli/
351
474
  │ │ └── main.py # Typer CLI
352
475
  │ ├── core/
476
+ │ │ ├── compact.py # Context window compaction (LRU pruning)
353
477
  │ │ ├── config.py # Configuration loading
354
- │ │ ├── models.py # ConversationSession, Message, etc.
478
+ │ │ ├── environment.py # OrchestratorEnv (progress display)
479
+ │ │ ├── models.py # ConversationSession, Message, Event, etc.
355
480
  │ │ └── state.py # Flat-file state management
356
481
  │ ├── tools/
357
- │ │ └── delegation.py # delegate, converse, etc.
482
+ │ │ └── delegation.py # delegate, converse, check_session, etc.
358
483
  │ ├── watchers/
359
484
  │ │ ├── base.py # Watcher protocol
360
- │ │ ├── builtin.py # Built-in watchers
485
+ │ │ ├── builtin.py # Built-in watchers (progress, budget, scope, etc.)
486
+ │ │ ├── registry.py # Watcher registration
361
487
  │ │ └── manager.py # WatcherManager
362
488
  │ ├── prompts/
363
489
  │ │ └── orchestrator.py # Orchestrator system prompt
@@ -1,6 +1,16 @@
1
1
  # zwarm
2
2
 
3
- Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, and trajectory alignment.
3
+ Multi-agent CLI orchestration research platform. Coordinate multiple coding agents (Codex, Claude Code) with delegation, conversation, trajectory alignment, and automatic context management.
4
+
5
+ ## Key Features
6
+
7
+ - **Multi-adapter support**: Codex MCP, Claude Code adapters with unified interface
8
+ - **Sync & async modes**: Conversational (iterative refinement) or fire-and-forget
9
+ - **Token tracking**: Per-session token usage tracked and persisted for cost analysis
10
+ - **Context compaction**: Automatic LRU-style pruning when approaching context limits
11
+ - **Trajectory watchers**: Composable guardrails (progress, budget, scope, pattern, delegation)
12
+ - **State persistence**: Resume sessions, track history, replay events
13
+ - **Weave integration**: Full tracing and observability
4
14
 
5
15
  ## Installation
6
16
 
@@ -21,16 +31,19 @@ uv pip install -e ./zwarm
21
31
  ## Quick Start
22
32
 
23
33
  ```bash
24
- # 1. Test an executor directly
34
+ # 1. Initialize zwarm in your project
35
+ zwarm init
36
+
37
+ # 2. Test an executor directly
25
38
  zwarm exec --task "What is 2+2?"
26
39
 
27
- # 2. Run the orchestrator with a task
40
+ # 3. Run the orchestrator with a task
28
41
  zwarm orchestrate --task "Create a hello world Python function"
29
42
 
30
- # 3. Check state after running
43
+ # 4. Check state after running
31
44
  zwarm status
32
45
 
33
- # 4. View event history
46
+ # 5. View event history
34
47
  zwarm history
35
48
  ```
36
49
 
@@ -68,24 +81,38 @@ adapter = "codex_mcp" # or "claude_code"
68
81
  ### Environment Variables
69
82
 
70
83
  ```bash
71
- # Enable Weave tracing (alternative to config.toml)
84
+ # Weave tracing (optional but recommended)
72
85
  export WEAVE_PROJECT="your-entity/zwarm"
73
86
 
74
- # Required for adapters
75
- export OPENAI_API_KEY="..." # for Codex
76
- export ANTHROPIC_API_KEY="..." # for Claude Code
87
+ # Executor authentication (required - set based on which adapter you use)
88
+ export OPENAI_API_KEY="sk-..." # Required for codex_mcp adapter
89
+ export ANTHROPIC_API_KEY="sk-ant-..." # Required for claude_code adapter
77
90
  ```
78
91
 
92
+ **Important:** The orchestrator agent runs with your credentials, but the executor adapters (Codex, Claude Code) need their own authentication. If executors fail with auth errors, check that the appropriate API key is set in your environment.
93
+
94
+ You can also put these in a `.env` file in your project root - zwarm will load it automatically.
95
+
79
96
  ### Full Configuration Reference
80
97
 
81
98
  ```yaml
82
99
  # config.yaml
83
100
  orchestrator:
101
+ lm: gpt-5-mini # Model for the orchestrator itself
84
102
  max_steps: 100 # Maximum orchestrator steps
103
+ compaction: # Context window management
104
+ enabled: true
105
+ max_tokens: 100000 # Trigger compaction above this
106
+ threshold_pct: 0.85 # Compact at 85% of max
107
+ target_pct: 0.7 # Target 70% after compaction
108
+ keep_first_n: 2 # Always keep system + task
109
+ keep_last_n: 10 # Always keep recent context
85
110
 
86
111
  executor:
87
112
  adapter: codex_mcp # Default adapter: codex_mcp | claude_code
88
- model: null # Model override (adapter-specific)
113
+ model: null # Model override (null = use adapter default)
114
+ # codex_mcp default: gpt-5.1-codex-mini
115
+ # claude_code default: claude-sonnet-4-5-20250514
89
116
  sandbox: workspace-write # Codex sandbox mode
90
117
 
91
118
  weave:
@@ -95,24 +122,25 @@ weave:
95
122
  state_dir: .zwarm # State directory for sessions/events
96
123
 
97
124
  watchers:
98
- enabled: [] # List of enabled watchers
99
- config:
100
- progress:
101
- stuck_threshold: 5
102
- budget:
103
- max_steps: 50
104
- max_sessions: 10
105
- scope:
106
- keywords: []
125
+ enabled: true
126
+ watchers:
127
+ - name: progress
128
+ - name: budget
129
+ config:
130
+ max_steps: 50
131
+ max_sessions: 10
132
+ - name: scope
133
+ config:
134
+ keywords: []
107
135
  ```
108
136
 
109
137
  ## Adapters
110
138
 
111
- zwarm supports multiple CLI coding agents through adapters.
139
+ zwarm supports multiple CLI coding agents through adapters. Each adapter wraps a different coding CLI and handles the mechanics of starting sessions, sending messages, and capturing responses.
112
140
 
113
141
  ### Codex MCP (default)
114
142
 
115
- Uses Codex via MCP server for true conversational sessions.
143
+ Uses Codex via MCP server for true conversational sessions. This is the recommended adapter for iterative work where you need back-and-forth refinement.
116
144
 
117
145
  ```bash
118
146
  # Sync mode (conversational)
@@ -122,17 +150,45 @@ zwarm exec --adapter codex_mcp --task "Add a login function"
122
150
  # using delegate() and converse() tools
123
151
  ```
124
152
 
125
- **Requires:** `codex` CLI installed, `OPENAI_API_KEY` set
153
+ | Setting | Value |
154
+ |---------|-------|
155
+ | Default model | `gpt-5.1-codex-mini` |
156
+ | Requires | `codex` CLI installed |
157
+ | Auth | `OPENAI_API_KEY` environment variable |
126
158
 
127
159
  ### Claude Code
128
160
 
129
- Uses Claude Code CLI for execution.
161
+ Uses Claude Code CLI for execution. Good alternative when you want Claude's capabilities.
130
162
 
131
163
  ```bash
132
164
  zwarm exec --adapter claude_code --task "Fix the type errors"
133
165
  ```
134
166
 
135
- **Requires:** `claude` CLI installed, authenticated
167
+ | Setting | Value |
168
+ |---------|-------|
169
+ | Default model | `claude-sonnet-4-5-20250514` |
170
+ | Requires | `claude` CLI installed and authenticated |
171
+ | Auth | `ANTHROPIC_API_KEY` or `claude` CLI auth |
172
+
173
+ ### Model Selection
174
+
175
+ Models are selected with this precedence (highest to lowest):
176
+
177
+ 1. **Per-delegation override**: `delegate(task="...", model="o3")`
178
+ 2. **Config file**: `executor.model` in config.toml or zwarm.yaml
179
+ 3. **Adapter default**: Each adapter has a sensible default
180
+
181
+ ```yaml
182
+ # config.toml - override the default model
183
+ [executor]
184
+ adapter = "codex_mcp"
185
+ model = "gpt-5.1-codex-max" # Use the more capable model
186
+ ```
187
+
188
+ ```bash
189
+ # Or override per-execution
190
+ zwarm exec --model gpt-5.1-codex-max --task "Complex refactoring"
191
+ ```
136
192
 
137
193
  ## Watchers (Trajectory Alignment)
138
194
 
@@ -143,10 +199,11 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
143
199
  | Watcher | Description |
144
200
  |---------|-------------|
145
201
  | `progress` | Detects stuck/spinning agents |
146
- | `budget` | Monitors step/session limits |
202
+ | `budget` | Monitors step/session limits (counts only active sessions) |
147
203
  | `scope` | Detects scope creep from original task |
148
204
  | `pattern` | Custom regex pattern matching |
149
205
  | `quality` | Code quality checks |
206
+ | `delegation` | Ensures orchestrator delegates instead of writing code directly |
150
207
 
151
208
  ### Enabling Watchers
152
209
 
@@ -204,6 +261,41 @@ View traces at: `https://wandb.ai/your-entity/zwarm/weave`
204
261
 
205
262
  ## CLI Reference
206
263
 
264
+ ### init
265
+
266
+ Initialize zwarm in a project directory.
267
+
268
+ ```bash
269
+ zwarm init [OPTIONS]
270
+
271
+ Options:
272
+ -w, --working-dir PATH Working directory [default: .]
273
+ -y, --yes Accept defaults, no prompts
274
+ --with-project Also create zwarm.yaml project config
275
+ ```
276
+
277
+ **What it creates:**
278
+
279
+ 1. `config.toml` - User settings (Weave project, adapter preferences, watchers)
280
+ 2. `.zwarm/` - State directory for sessions and events
281
+ 3. `zwarm.yaml` (optional) - Project-specific task configuration
282
+
283
+ **Examples:**
284
+
285
+ ```bash
286
+ # Interactive setup with prompts
287
+ zwarm init
288
+
289
+ # Non-interactive with defaults
290
+ zwarm init --yes
291
+
292
+ # Create project config too
293
+ zwarm init --with-project
294
+
295
+ # Initialize in a different directory
296
+ zwarm init --working-dir /path/to/project
297
+ ```
298
+
207
299
  ### orchestrate
208
300
 
209
301
  Start an orchestrator session to delegate tasks.
@@ -222,19 +314,20 @@ Options:
222
314
 
223
315
  ### exec
224
316
 
225
- Run a single executor directly (for testing).
317
+ Run a single executor directly (for testing). This bypasses the orchestrator entirely and hits the adapter (Codex/Claude) immediately with your task - useful for verifying adapters work before running full orchestration.
226
318
 
227
319
  ```bash
228
320
  zwarm exec [OPTIONS]
229
321
 
230
322
  Options:
231
323
  -t, --task TEXT Task to execute
232
- -f, --task-file PATH Read task from file
233
324
  --adapter TEXT Adapter to use [default: codex_mcp]
234
325
  --model TEXT Model override
235
326
  --mode [sync|async] Execution mode [default: sync]
236
327
  ```
237
328
 
329
+ **Note:** Unlike `orchestrate`, this does NOT use watchers, compaction, state persistence, or multi-step planning. It's a single direct call to the executor.
330
+
238
331
  ### status
239
332
 
240
333
  Show current orchestrator state.
@@ -270,6 +363,30 @@ zwarm configs list # List available configs
270
363
  zwarm configs show NAME # Show config contents
271
364
  ```
272
365
 
366
+ ### clean
367
+
368
+ Clean up zwarm state (useful for starting fresh).
369
+
370
+ ```bash
371
+ zwarm clean [OPTIONS]
372
+
373
+ Options:
374
+ --all Remove everything (events, sessions, state)
375
+ --events Remove only events
376
+ --sessions Remove only sessions
377
+ -y, --yes Skip confirmation prompt
378
+ ```
379
+
380
+ **Examples:**
381
+
382
+ ```bash
383
+ # Clean everything and start fresh
384
+ zwarm clean --all --yes
385
+
386
+ # Clean only events log
387
+ zwarm clean --events
388
+ ```
389
+
273
390
  ## Architecture
274
391
 
275
392
  ```
@@ -320,10 +437,16 @@ All state is stored in flat files under `.zwarm/`:
320
437
  ### Running Tests
321
438
 
322
439
  ```bash
323
- # From workspace root
324
- uv run pytest wbal/tests/ -v
440
+ # Run all zwarm tests (68 tests)
441
+ uv run pytest src/zwarm/ -v
442
+
443
+ # Run specific test modules
444
+ uv run pytest src/zwarm/core/test_compact.py -v # Context compaction
445
+ uv run pytest src/zwarm/watchers/test_watchers.py -v # Watchers
446
+ uv run pytest src/zwarm/adapters/test_codex_mcp.py -v # Codex adapter
325
447
 
326
- # zwarm doesn't have its own tests yet
448
+ # Run integration tests (requires codex CLI)
449
+ uv run pytest -m integration
327
450
  ```
328
451
 
329
452
  ### Project Structure
@@ -333,19 +456,22 @@ zwarm/
333
456
  ├── src/zwarm/
334
457
  │ ├── adapters/ # Executor adapters
335
458
  │ │ ├── base.py # ExecutorAdapter protocol
336
- │ │ ├── codex_mcp.py # Codex MCP adapter
337
- │ │ └── claude_code.py # Claude Code adapter
459
+ │ │ ├── codex_mcp.py # Codex MCP adapter (with token tracking)
460
+ │ │ └── claude_code.py # Claude Code adapter (with token tracking)
338
461
  │ ├── cli/
339
462
  │ │ └── main.py # Typer CLI
340
463
  │ ├── core/
464
+ │ │ ├── compact.py # Context window compaction (LRU pruning)
341
465
  │ │ ├── config.py # Configuration loading
342
- │ │ ├── models.py # ConversationSession, Message, etc.
466
+ │ │ ├── environment.py # OrchestratorEnv (progress display)
467
+ │ │ ├── models.py # ConversationSession, Message, Event, etc.
343
468
  │ │ └── state.py # Flat-file state management
344
469
  │ ├── tools/
345
- │ │ └── delegation.py # delegate, converse, etc.
470
+ │ │ └── delegation.py # delegate, converse, check_session, etc.
346
471
  │ ├── watchers/
347
472
  │ │ ├── base.py # Watcher protocol
348
- │ │ ├── builtin.py # Built-in watchers
473
+ │ │ ├── builtin.py # Built-in watchers (progress, budget, scope, etc.)
474
+ │ │ ├── registry.py # Watcher registration
349
475
  │ │ └── manager.py # WatcherManager
350
476
  │ ├── prompts/
351
477
  │ │ └── orchestrator.py # Orchestrator system prompt
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zwarm"
3
- version = "0.1.0"
3
+ version = "1.0.0"
4
4
  description = "Multi-Agent CLI Orchestration Research Platform"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13,<3.14"