zwarm 1.3.11__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/adapters/codex_mcp.py +475 -227
- zwarm/cli/main.py +485 -143
- zwarm/core/config.py +2 -0
- zwarm/orchestrator.py +83 -28
- zwarm/prompts/orchestrator.py +29 -13
- zwarm/sessions/__init__.py +2 -0
- zwarm/sessions/manager.py +87 -8
- zwarm/tools/delegation.py +358 -323
- zwarm-2.0.1.dist-info/METADATA +309 -0
- {zwarm-1.3.11.dist-info → zwarm-2.0.1.dist-info}/RECORD +12 -12
- zwarm-1.3.11.dist-info/METADATA +0 -525
- {zwarm-1.3.11.dist-info → zwarm-2.0.1.dist-info}/WHEEL +0 -0
- {zwarm-1.3.11.dist-info → zwarm-2.0.1.dist-info}/entry_points.txt +0 -0
zwarm/core/config.py
CHANGED
|
@@ -36,6 +36,7 @@ class ExecutorConfig:
|
|
|
36
36
|
model: str | None = None
|
|
37
37
|
sandbox: str = "workspace-write" # read-only | workspace-write | danger-full-access
|
|
38
38
|
timeout: int = 3600
|
|
39
|
+
reasoning_effort: str | None = "high" # low | medium | high (default to high for compatibility)
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
@dataclass
|
|
@@ -164,6 +165,7 @@ class ZwarmConfig:
|
|
|
164
165
|
"model": self.executor.model,
|
|
165
166
|
"sandbox": self.executor.sandbox,
|
|
166
167
|
"timeout": self.executor.timeout,
|
|
168
|
+
"reasoning_effort": self.executor.reasoning_effort,
|
|
167
169
|
},
|
|
168
170
|
"orchestrator": {
|
|
169
171
|
"lm": self.orchestrator.lm,
|
zwarm/orchestrator.py
CHANGED
|
@@ -20,6 +20,8 @@ import weave
|
|
|
20
20
|
from pydantic import Field, PrivateAttr
|
|
21
21
|
from wbal.agents.yaml_agent import YamlAgent
|
|
22
22
|
from wbal.helper import TOOL_CALL_TYPE, format_openai_tool_response
|
|
23
|
+
from wbal.lm import LM as wbalLMGeneric
|
|
24
|
+
from wbal.lm import GPT5LargeVerbose
|
|
23
25
|
|
|
24
26
|
from zwarm.adapters import ExecutorAdapter, get_adapter
|
|
25
27
|
from zwarm.core.compact import compact_messages, should_compact
|
|
@@ -29,9 +31,9 @@ from zwarm.core.models import ConversationSession
|
|
|
29
31
|
from zwarm.core.state import StateManager
|
|
30
32
|
from zwarm.prompts import get_orchestrator_prompt
|
|
31
33
|
from zwarm.watchers import (
|
|
32
|
-
WatcherManager,
|
|
33
|
-
WatcherContext,
|
|
34
34
|
WatcherAction,
|
|
35
|
+
WatcherContext,
|
|
36
|
+
WatcherManager,
|
|
35
37
|
build_watcher_manager,
|
|
36
38
|
)
|
|
37
39
|
|
|
@@ -48,6 +50,9 @@ class Orchestrator(YamlAgent):
|
|
|
48
50
|
- Weave integration
|
|
49
51
|
"""
|
|
50
52
|
|
|
53
|
+
# LM definition override:
|
|
54
|
+
lm: wbalLMGeneric = Field(default_factory=GPT5LargeVerbose)
|
|
55
|
+
|
|
51
56
|
# Configuration
|
|
52
57
|
config: ZwarmConfig = Field(default_factory=ZwarmConfig)
|
|
53
58
|
working_dir: Path = Field(default_factory=Path.cwd)
|
|
@@ -71,11 +76,13 @@ class Orchestrator(YamlAgent):
|
|
|
71
76
|
_watcher_manager: WatcherManager | None = PrivateAttr(default=None)
|
|
72
77
|
_resumed: bool = PrivateAttr(default=False)
|
|
73
78
|
_total_tokens: int = PrivateAttr(default=0) # Cumulative orchestrator tokens
|
|
74
|
-
_executor_usage: dict[str, int] = PrivateAttr(
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
+
_executor_usage: dict[str, int] = PrivateAttr(
|
|
80
|
+
default_factory=lambda: {
|
|
81
|
+
"input_tokens": 0,
|
|
82
|
+
"output_tokens": 0,
|
|
83
|
+
"total_tokens": 0,
|
|
84
|
+
}
|
|
85
|
+
)
|
|
79
86
|
|
|
80
87
|
def model_post_init(self, __context: Any) -> None:
|
|
81
88
|
"""Initialize state and adapters after model creation."""
|
|
@@ -93,6 +100,7 @@ class Orchestrator(YamlAgent):
|
|
|
93
100
|
# Register instance if using instance isolation
|
|
94
101
|
if self.instance_id:
|
|
95
102
|
from zwarm.core.state import register_instance
|
|
103
|
+
|
|
96
104
|
register_instance(
|
|
97
105
|
instance_id=self.instance_id,
|
|
98
106
|
name=self.instance_name,
|
|
@@ -143,7 +151,15 @@ class Orchestrator(YamlAgent):
|
|
|
143
151
|
if name not in self._adapters:
|
|
144
152
|
# Get model from config (adapters have their own defaults if None)
|
|
145
153
|
model = self.config.executor.model
|
|
146
|
-
|
|
154
|
+
|
|
155
|
+
# Use isolated codex config if available
|
|
156
|
+
config_path = self.working_dir / self.config.state_dir / "codex.toml"
|
|
157
|
+
if not config_path.exists():
|
|
158
|
+
config_path = None # Fallback to adapter defaults
|
|
159
|
+
|
|
160
|
+
self._adapters[name] = get_adapter(
|
|
161
|
+
name, model=model, config_path=config_path
|
|
162
|
+
)
|
|
147
163
|
return self._adapters[name]
|
|
148
164
|
|
|
149
165
|
def get_executor_usage(self) -> dict[str, int]:
|
|
@@ -178,10 +194,43 @@ class Orchestrator(YamlAgent):
|
|
|
178
194
|
"""
|
|
179
195
|
loaded_messages = self._state.load_orchestrator_messages()
|
|
180
196
|
if loaded_messages:
|
|
181
|
-
self.messages = loaded_messages
|
|
197
|
+
self.messages = self._sanitize_messages_for_resume(loaded_messages)
|
|
182
198
|
self._resumed = True
|
|
183
199
|
# If no messages were saved, don't set _resumed - start fresh
|
|
184
200
|
|
|
201
|
+
def _sanitize_messages_for_resume(self, messages: list[dict]) -> list[dict]:
|
|
202
|
+
"""
|
|
203
|
+
Sanitize messages loaded from disk for sending back to the API.
|
|
204
|
+
|
|
205
|
+
OpenAI's reasoning models include response-only fields (status, encrypted_content)
|
|
206
|
+
in reasoning blocks that can't be sent back as input. We keep the reasoning
|
|
207
|
+
items but strip the response-only fields.
|
|
208
|
+
|
|
209
|
+
Response-only fields that must be removed:
|
|
210
|
+
- status: reasoning item status (null, "in_progress", "completed")
|
|
211
|
+
- encrypted_content: encrypted reasoning content
|
|
212
|
+
"""
|
|
213
|
+
# Fields that are response-only and must be stripped for input
|
|
214
|
+
RESPONSE_ONLY_FIELDS = {
|
|
215
|
+
"status",
|
|
216
|
+
"encrypted_content",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
def clean_item(item: Any) -> Any:
|
|
220
|
+
"""Recursively clean an item, removing response-only fields."""
|
|
221
|
+
if isinstance(item, dict):
|
|
222
|
+
return {
|
|
223
|
+
k: clean_item(v)
|
|
224
|
+
for k, v in item.items()
|
|
225
|
+
if k not in RESPONSE_ONLY_FIELDS
|
|
226
|
+
}
|
|
227
|
+
elif isinstance(item, list):
|
|
228
|
+
return [clean_item(x) for x in item]
|
|
229
|
+
else:
|
|
230
|
+
return item
|
|
231
|
+
|
|
232
|
+
return [clean_item(msg) for msg in messages]
|
|
233
|
+
|
|
185
234
|
def _maybe_compact(self) -> bool:
|
|
186
235
|
"""
|
|
187
236
|
Check if compaction is needed and compact if so.
|
|
@@ -214,15 +263,18 @@ class Orchestrator(YamlAgent):
|
|
|
214
263
|
|
|
215
264
|
# Log compaction event
|
|
216
265
|
from zwarm.core.models import Event
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
"
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
266
|
+
|
|
267
|
+
self._state.log_event(
|
|
268
|
+
Event(
|
|
269
|
+
kind="context_compacted",
|
|
270
|
+
payload={
|
|
271
|
+
"step": self._step_count,
|
|
272
|
+
"original_count": result.original_count,
|
|
273
|
+
"new_count": len(result.messages),
|
|
274
|
+
"removed_count": result.removed_count,
|
|
275
|
+
},
|
|
276
|
+
)
|
|
277
|
+
)
|
|
226
278
|
|
|
227
279
|
return True
|
|
228
280
|
|
|
@@ -338,17 +390,20 @@ Review what was accomplished in the previous session and delegate new tasks as n
|
|
|
338
390
|
|
|
339
391
|
# Log watcher execution to events
|
|
340
392
|
from zwarm.core.models import Event
|
|
393
|
+
|
|
341
394
|
watcher_names = [w.name for w in self.config.watchers.watchers if w.enabled]
|
|
342
|
-
self.state.log_event(
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
395
|
+
self.state.log_event(
|
|
396
|
+
Event(
|
|
397
|
+
kind="watchers_run",
|
|
398
|
+
payload={
|
|
399
|
+
"step": self._step_count,
|
|
400
|
+
"watchers": watcher_names,
|
|
401
|
+
"action": result.action.value,
|
|
402
|
+
"triggered_by": result.metadata.get("triggered_by"),
|
|
403
|
+
"reason": result.metadata.get("reason"),
|
|
404
|
+
},
|
|
405
|
+
)
|
|
406
|
+
)
|
|
352
407
|
|
|
353
408
|
# Handle watcher result
|
|
354
409
|
if result.action == WatcherAction.NUDGE and result.guidance:
|
zwarm/prompts/orchestrator.py
CHANGED
|
@@ -27,15 +27,17 @@ For everything else, make your best judgment and proceed. If you're unsure wheth
|
|
|
27
27
|
|
|
28
28
|
Your primary tools are for delegation and verification:
|
|
29
29
|
|
|
30
|
-
**delegate(task,
|
|
30
|
+
**delegate(task, working_dir=None, model=None, wait=True)** - Start a new executor session. The `task` should be a clear, specific description of what you want done. Use `wait=True` (default) for interactive work where you'll iterate with the executor. Use `wait=False` to spawn background work and continue immediately. The `working_dir` parameter lets you run the executor in a specific directory.
|
|
31
31
|
|
|
32
|
-
**converse(session_id, message)** -
|
|
32
|
+
**converse(session_id, message, wait=True)** - Continue an existing conversation. Use this to provide feedback, ask for changes, or guide the executor through complex work. The executor maintains full context. Use `wait=False` to send the message and continue without waiting for a response.
|
|
33
33
|
|
|
34
|
-
**
|
|
34
|
+
**peek_session(session_id)** - Quick status check. Returns just the session status and latest message. Use this for fast polling when you have multiple sessions running.
|
|
35
35
|
|
|
36
|
-
**
|
|
36
|
+
**check_session(session_id)** - Full session details including all messages, token usage, runtime. Use this when you need the complete picture.
|
|
37
37
|
|
|
38
|
-
**list_sessions(status)** -
|
|
38
|
+
**list_sessions(status=None)** - List all sessions. Returns a `needs_attention` flag for each session indicating if it recently completed or failed. Use this to monitor multiple parallel sessions and see which ones have new responses ready for review.
|
|
39
|
+
|
|
40
|
+
**end_session(session_id, reason=None, delete=False)** - Kill a running session or clean up a completed one. Use `delete=True` to remove the session entirely (won't show in list_sessions anymore).
|
|
39
41
|
|
|
40
42
|
**bash(command)** - Run shell commands directly. Use this primarily for verification: running tests, type checkers, linters, build commands, or inspecting the filesystem. Do NOT use bash to write code yourself - that's what executors are for.
|
|
41
43
|
|
|
@@ -61,21 +63,35 @@ The watchers are on your side. They exist to help you succeed, not to criticize.
|
|
|
61
63
|
|
|
62
64
|
---
|
|
63
65
|
|
|
64
|
-
# Sync vs Async: Choosing the Right
|
|
66
|
+
# Sync vs Async: Choosing the Right Approach
|
|
67
|
+
|
|
68
|
+
The `wait` parameter controls whether you block waiting for a response or continue immediately.
|
|
65
69
|
|
|
66
|
-
|
|
70
|
+
**Sync (wait=True)** creates an interactive conversation with the executor. After your task description, you receive the executor's response immediately. You can then provide feedback via converse(), ask for changes, or confirm the work is acceptable. This back-and-forth continues until you're satisfied.
|
|
67
71
|
|
|
68
|
-
|
|
72
|
+
Use sync when the task involves ambiguity, when you expect to iterate, when you want to review results before proceeding, or for high-stakes work needing close supervision.
|
|
69
73
|
|
|
70
|
-
|
|
74
|
+
Typical sync pattern:
|
|
75
|
+
1. `delegate(task)` - get initial response
|
|
76
|
+
2. Evaluate - does it meet requirements?
|
|
77
|
+
3. `converse(id, "feedback...")` - if changes needed
|
|
78
|
+
4. Repeat until satisfied
|
|
79
|
+
5. `end_session(id)` or just move on
|
|
71
80
|
|
|
72
|
-
|
|
81
|
+
**Async (wait=False)** is fire-and-forget. You spawn the work and continue immediately without waiting. The executor works in the background.
|
|
73
82
|
|
|
74
|
-
|
|
83
|
+
Use async when tasks are well-defined and self-contained, when you're confident the executor can complete without guidance, or when you want to parallelize multiple independent pieces of work. Async is efficient for clear-cut tasks like "add tests for this function" or "fix this lint error".
|
|
75
84
|
|
|
76
|
-
|
|
85
|
+
Async pattern for parallel work:
|
|
86
|
+
1. `delegate(task1, wait=False)` → session a
|
|
87
|
+
2. `delegate(task2, wait=False)` → session b
|
|
88
|
+
3. `delegate(task3, wait=False)` → session c
|
|
89
|
+
4. `list_sessions()` → check `needs_attention` flags
|
|
90
|
+
5. `peek_session(a)` → quick status check
|
|
91
|
+
6. `check_session(b)` → full details when ready
|
|
92
|
+
7. `converse(a, "now do X", wait=False)` → continue without blocking
|
|
77
93
|
|
|
78
|
-
When in doubt, prefer sync
|
|
94
|
+
When in doubt, prefer sync. The overhead of waiting is small compared to an executor going off in the wrong direction unsupervised.
|
|
79
95
|
|
|
80
96
|
---
|
|
81
97
|
|
zwarm/sessions/__init__.py
CHANGED
zwarm/sessions/manager.py
CHANGED
|
@@ -225,9 +225,11 @@ class CodexSessionManager:
|
|
|
225
225
|
continue
|
|
226
226
|
session = self._load_session(session_dir.name)
|
|
227
227
|
if session:
|
|
228
|
-
# Update status if process died
|
|
229
|
-
|
|
230
|
-
|
|
228
|
+
# Update status if process died OR output indicates completion
|
|
229
|
+
# (output check is more reliable than PID check due to PID reuse)
|
|
230
|
+
if session.status == SessionStatus.RUNNING:
|
|
231
|
+
if self._is_output_complete(session.id, session.turn) or not session.is_running:
|
|
232
|
+
self._update_session_status(session)
|
|
231
233
|
|
|
232
234
|
if status is None or session.status == status:
|
|
233
235
|
sessions.append(session)
|
|
@@ -241,8 +243,9 @@ class CodexSessionManager:
|
|
|
241
243
|
# Try exact match first
|
|
242
244
|
session = self._load_session(session_id)
|
|
243
245
|
if session:
|
|
244
|
-
if session.status == SessionStatus.RUNNING
|
|
245
|
-
self.
|
|
246
|
+
if session.status == SessionStatus.RUNNING:
|
|
247
|
+
if self._is_output_complete(session.id, session.turn) or not session.is_running:
|
|
248
|
+
self._update_session_status(session)
|
|
246
249
|
return session
|
|
247
250
|
|
|
248
251
|
# Try partial match
|
|
@@ -250,12 +253,45 @@ class CodexSessionManager:
|
|
|
250
253
|
if session_dir.name.startswith(session_id):
|
|
251
254
|
session = self._load_session(session_dir.name)
|
|
252
255
|
if session:
|
|
253
|
-
if session.status == SessionStatus.RUNNING
|
|
254
|
-
self.
|
|
256
|
+
if session.status == SessionStatus.RUNNING:
|
|
257
|
+
if self._is_output_complete(session.id, session.turn) or not session.is_running:
|
|
258
|
+
self._update_session_status(session)
|
|
255
259
|
return session
|
|
256
260
|
|
|
257
261
|
return None
|
|
258
262
|
|
|
263
|
+
def _is_output_complete(self, session_id: str, turn: int) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Check if output file indicates the task completed.
|
|
266
|
+
|
|
267
|
+
Looks for completion markers like 'turn.completed' or 'task.completed'
|
|
268
|
+
in the JSONL output. This is more reliable than PID checking.
|
|
269
|
+
"""
|
|
270
|
+
output_path = self._output_path(session_id, turn)
|
|
271
|
+
if not output_path.exists():
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
content = output_path.read_text()
|
|
276
|
+
for line in content.strip().split("\n"):
|
|
277
|
+
if not line.strip():
|
|
278
|
+
continue
|
|
279
|
+
try:
|
|
280
|
+
event = json.loads(line)
|
|
281
|
+
event_type = event.get("type", "")
|
|
282
|
+
# Check for any completion marker
|
|
283
|
+
if event_type in ("turn.completed", "task.completed", "completed", "done"):
|
|
284
|
+
return True
|
|
285
|
+
# Also check for error as a form of completion
|
|
286
|
+
if event_type == "error":
|
|
287
|
+
return True
|
|
288
|
+
except json.JSONDecodeError:
|
|
289
|
+
continue
|
|
290
|
+
except Exception:
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
return False
|
|
294
|
+
|
|
259
295
|
def _update_session_status(self, session: CodexSession) -> None:
|
|
260
296
|
"""Update session status after process completion."""
|
|
261
297
|
# Parse output to determine status
|
|
@@ -325,6 +361,8 @@ class CodexSessionManager:
|
|
|
325
361
|
cmd = [
|
|
326
362
|
"codex", "exec",
|
|
327
363
|
"--json",
|
|
364
|
+
"--full-auto",
|
|
365
|
+
"--skip-git-repo-check",
|
|
328
366
|
"--model", model,
|
|
329
367
|
"-C", str(working_dir.absolute()),
|
|
330
368
|
]
|
|
@@ -408,6 +446,8 @@ Continue from where you left off, addressing the user's new message."""
|
|
|
408
446
|
cmd = [
|
|
409
447
|
"codex", "exec",
|
|
410
448
|
"--json",
|
|
449
|
+
"--full-auto",
|
|
450
|
+
"--skip-git-repo-check",
|
|
411
451
|
"--model", session.model,
|
|
412
452
|
"-C", str(session.working_dir.absolute()),
|
|
413
453
|
"--", augmented_task,
|
|
@@ -431,10 +471,14 @@ Continue from where you left off, addressing the user's new message."""
|
|
|
431
471
|
|
|
432
472
|
return session
|
|
433
473
|
|
|
434
|
-
def kill_session(self, session_id: str) -> bool:
|
|
474
|
+
def kill_session(self, session_id: str, delete: bool = False) -> bool:
|
|
435
475
|
"""
|
|
436
476
|
Kill a running session.
|
|
437
477
|
|
|
478
|
+
Args:
|
|
479
|
+
session_id: Session to kill
|
|
480
|
+
delete: If True, also delete session data entirely
|
|
481
|
+
|
|
438
482
|
Returns True if killed, False if not found or not running.
|
|
439
483
|
"""
|
|
440
484
|
session = self.get_session(session_id)
|
|
@@ -453,11 +497,46 @@ Continue from where you left off, addressing the user's new message."""
|
|
|
453
497
|
except (OSError, ProcessLookupError):
|
|
454
498
|
pass
|
|
455
499
|
|
|
500
|
+
if delete:
|
|
501
|
+
return self.delete_session(session.id)
|
|
502
|
+
|
|
456
503
|
session.status = SessionStatus.KILLED
|
|
457
504
|
session.error = "Manually killed"
|
|
458
505
|
self._save_session(session)
|
|
459
506
|
return True
|
|
460
507
|
|
|
508
|
+
def delete_session(self, session_id: str) -> bool:
|
|
509
|
+
"""
|
|
510
|
+
Delete a session entirely (removes from disk).
|
|
511
|
+
|
|
512
|
+
Kills the process first if still running.
|
|
513
|
+
|
|
514
|
+
Returns True if deleted, False if not found.
|
|
515
|
+
"""
|
|
516
|
+
import shutil
|
|
517
|
+
|
|
518
|
+
session = self.get_session(session_id)
|
|
519
|
+
if not session:
|
|
520
|
+
return False
|
|
521
|
+
|
|
522
|
+
# Kill if running
|
|
523
|
+
if session.pid and session.is_running:
|
|
524
|
+
try:
|
|
525
|
+
os.killpg(os.getpgid(session.pid), signal.SIGTERM)
|
|
526
|
+
time.sleep(0.3)
|
|
527
|
+
if session.is_running:
|
|
528
|
+
os.killpg(os.getpgid(session.pid), signal.SIGKILL)
|
|
529
|
+
except (OSError, ProcessLookupError):
|
|
530
|
+
pass
|
|
531
|
+
|
|
532
|
+
# Remove session directory
|
|
533
|
+
session_dir = self._session_dir(session.id)
|
|
534
|
+
if session_dir.exists():
|
|
535
|
+
shutil.rmtree(session_dir)
|
|
536
|
+
return True
|
|
537
|
+
|
|
538
|
+
return False
|
|
539
|
+
|
|
461
540
|
def get_output(self, session_id: str, turn: int | None = None) -> str:
|
|
462
541
|
"""Get raw JSONL output for a session."""
|
|
463
542
|
session = self.get_session(session_id)
|