zwarm 2.3.5__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/cli/interactive.py +1065 -0
- zwarm/cli/main.py +525 -934
- zwarm/cli/pilot.py +1240 -0
- zwarm/core/__init__.py +20 -0
- zwarm/core/checkpoints.py +216 -0
- zwarm/core/config.py +26 -9
- zwarm/core/costs.py +71 -0
- zwarm/core/registry.py +329 -0
- zwarm/core/test_config.py +2 -3
- zwarm/orchestrator.py +17 -43
- zwarm/prompts/__init__.py +3 -0
- zwarm/prompts/orchestrator.py +36 -29
- zwarm/prompts/pilot.py +147 -0
- zwarm/sessions/__init__.py +48 -9
- zwarm/sessions/base.py +501 -0
- zwarm/sessions/claude.py +481 -0
- zwarm/sessions/manager.py +233 -486
- zwarm/tools/delegation.py +150 -187
- zwarm-3.6.0.dist-info/METADATA +445 -0
- zwarm-3.6.0.dist-info/RECORD +39 -0
- zwarm/adapters/__init__.py +0 -21
- zwarm/adapters/base.py +0 -109
- zwarm/adapters/claude_code.py +0 -357
- zwarm/adapters/codex_mcp.py +0 -1262
- zwarm/adapters/registry.py +0 -69
- zwarm/adapters/test_codex_mcp.py +0 -274
- zwarm/adapters/test_registry.py +0 -68
- zwarm-2.3.5.dist-info/METADATA +0 -309
- zwarm-2.3.5.dist-info/RECORD +0 -38
- {zwarm-2.3.5.dist-info → zwarm-3.6.0.dist-info}/WHEEL +0 -0
- {zwarm-2.3.5.dist-info → zwarm-3.6.0.dist-info}/entry_points.txt +0 -0
zwarm/tools/delegation.py
CHANGED
|
@@ -2,13 +2,17 @@
|
|
|
2
2
|
Delegation tools for the orchestrator.
|
|
3
3
|
|
|
4
4
|
These are the core tools that orchestrators use to delegate work to executors.
|
|
5
|
-
They use the
|
|
5
|
+
They use the same session managers that `zwarm interactive` uses - no special
|
|
6
6
|
MCP integration, no separate code path.
|
|
7
7
|
|
|
8
8
|
The orchestrator LLM has access to the exact same tools a human would use.
|
|
9
9
|
|
|
10
|
+
Supports multiple adapters:
|
|
11
|
+
- codex: OpenAI's Codex CLI (default)
|
|
12
|
+
- claude: Anthropic's Claude Code CLI
|
|
13
|
+
|
|
10
14
|
Tools:
|
|
11
|
-
- delegate: Start a new
|
|
15
|
+
- delegate: Start a new session (with adapter selection)
|
|
12
16
|
- converse: Continue a conversation (inject follow-up message)
|
|
13
17
|
- check_session: Check status of a session
|
|
14
18
|
- end_session: End/kill a session
|
|
@@ -19,60 +23,58 @@ from __future__ import annotations
|
|
|
19
23
|
|
|
20
24
|
import time
|
|
21
25
|
from pathlib import Path
|
|
22
|
-
from typing import TYPE_CHECKING, Any
|
|
26
|
+
from typing import TYPE_CHECKING, Any
|
|
23
27
|
|
|
24
28
|
from wbal.helper import weaveTool
|
|
25
29
|
|
|
26
30
|
if TYPE_CHECKING:
|
|
27
31
|
from zwarm.orchestrator import Orchestrator
|
|
28
32
|
|
|
33
|
+
# Available adapters
|
|
34
|
+
ADAPTERS = ["codex", "claude"]
|
|
35
|
+
|
|
29
36
|
|
|
30
37
|
def _get_session_manager(orchestrator: "Orchestrator"):
|
|
31
38
|
"""
|
|
32
|
-
Get the
|
|
33
|
-
|
|
34
|
-
Both `zwarm interactive` and `zwarm orchestrate` use the same session manager.
|
|
35
|
-
The orchestrator is just another user that happens to be an LLM.
|
|
39
|
+
Get the default session manager for list/get operations.
|
|
36
40
|
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
Uses CodexSessionManager as the default since all adapters share
|
|
42
|
+
the same .zwarm/sessions/ directory structure.
|
|
39
43
|
"""
|
|
40
|
-
# Should already exist from model_post_init, but create if not
|
|
41
44
|
if not hasattr(orchestrator, "_session_manager") or orchestrator._session_manager is None:
|
|
42
45
|
from zwarm.sessions import CodexSessionManager
|
|
43
46
|
orchestrator._session_manager = CodexSessionManager(orchestrator.working_dir / ".zwarm")
|
|
44
47
|
return orchestrator._session_manager
|
|
45
48
|
|
|
46
49
|
|
|
47
|
-
def
|
|
50
|
+
def _get_adapter_manager(orchestrator: "Orchestrator", adapter: str):
|
|
48
51
|
"""
|
|
49
|
-
|
|
52
|
+
Get the session manager for a specific adapter.
|
|
53
|
+
|
|
54
|
+
Each adapter has its own manager for start_session/inject_message,
|
|
55
|
+
but they all share the same .zwarm/sessions/ directory.
|
|
50
56
|
|
|
51
57
|
Args:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
timeout: Max seconds to wait
|
|
55
|
-
poll_interval: Seconds between polls
|
|
58
|
+
orchestrator: The orchestrator instance
|
|
59
|
+
adapter: Adapter name ("codex" or "claude")
|
|
56
60
|
|
|
57
61
|
Returns:
|
|
58
|
-
|
|
62
|
+
Session manager for the specified adapter
|
|
59
63
|
"""
|
|
60
|
-
|
|
64
|
+
# Initialize adapter managers dict if needed
|
|
65
|
+
if not hasattr(orchestrator, "_adapter_managers"):
|
|
66
|
+
orchestrator._adapter_managers = {}
|
|
61
67
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
session = manager.get_session(session_id)
|
|
66
|
-
if not session:
|
|
67
|
-
return False
|
|
68
|
+
# Return cached manager if exists
|
|
69
|
+
if adapter in orchestrator._adapter_managers:
|
|
70
|
+
return orchestrator._adapter_managers[adapter]
|
|
68
71
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
# Create new manager for this adapter
|
|
73
|
+
from zwarm.sessions import get_session_manager
|
|
74
|
+
manager = get_session_manager(adapter, str(orchestrator.working_dir / ".zwarm"))
|
|
75
|
+
orchestrator._adapter_managers[adapter] = manager
|
|
72
76
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
return False
|
|
77
|
+
return manager
|
|
76
78
|
|
|
77
79
|
|
|
78
80
|
def _truncate(text: str, max_len: int = 200) -> str:
|
|
@@ -84,7 +86,8 @@ def _truncate(text: str, max_len: int = 200) -> str:
|
|
|
84
86
|
|
|
85
87
|
def _format_session_header(session) -> str:
|
|
86
88
|
"""Format a nice session header."""
|
|
87
|
-
|
|
89
|
+
adapter = getattr(session, "adapter", "codex")
|
|
90
|
+
return f"[{session.short_id}] {adapter} ({session.status.value})"
|
|
88
91
|
|
|
89
92
|
|
|
90
93
|
def _get_total_tokens(session) -> int:
|
|
@@ -158,36 +161,49 @@ def _validate_working_dir(
|
|
|
158
161
|
def delegate(
|
|
159
162
|
self: "Orchestrator",
|
|
160
163
|
task: str,
|
|
161
|
-
mode: Literal["sync", "async"] = "sync",
|
|
162
164
|
model: str | None = None,
|
|
163
165
|
working_dir: str | None = None,
|
|
166
|
+
adapter: str = "codex",
|
|
164
167
|
) -> dict[str, Any]:
|
|
165
168
|
"""
|
|
166
|
-
Delegate work to
|
|
169
|
+
Delegate work to an executor agent.
|
|
167
170
|
|
|
168
|
-
|
|
169
|
-
|
|
171
|
+
Supports multiple adapters:
|
|
172
|
+
- codex: OpenAI's Codex CLI (default, fast, good for code tasks)
|
|
173
|
+
- claude: Claude Code CLI (powerful, good for complex reasoning)
|
|
170
174
|
|
|
171
|
-
|
|
172
|
-
Best for: most tasks - you get the full response immediately.
|
|
175
|
+
All sessions run async - you get a session_id immediately and poll for results.
|
|
173
176
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
+
Workflow pattern:
|
|
178
|
+
1. delegate(task="Add logout button") -> session_id
|
|
179
|
+
2. sleep(30) -> give it time
|
|
180
|
+
3. peek_session(session_id) -> check if done
|
|
181
|
+
4. Repeat 2-3 if still running
|
|
182
|
+
5. check_session(session_id) -> get full results
|
|
177
183
|
|
|
178
184
|
Args:
|
|
179
185
|
task: Clear description of what to do. Be specific about requirements.
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
186
|
+
model: Model override (codex: gpt-5.1-codex-mini, claude: sonnet).
|
|
187
|
+
working_dir: Directory for executor to work in (default: orchestrator's dir).
|
|
188
|
+
adapter: Which executor to use - "codex" (default) or "claude".
|
|
183
189
|
|
|
184
190
|
Returns:
|
|
185
|
-
{session_id, status,
|
|
191
|
+
{session_id, status: "running", task, adapter, hint}
|
|
186
192
|
|
|
187
|
-
Example:
|
|
188
|
-
delegate(task="Add a logout button to the navbar"
|
|
189
|
-
|
|
193
|
+
Example with codex (default):
|
|
194
|
+
delegate(task="Add a logout button to the navbar")
|
|
195
|
+
|
|
196
|
+
Example with claude for complex tasks:
|
|
197
|
+
delegate(task="Refactor the auth system to use OAuth2", adapter="claude")
|
|
190
198
|
"""
|
|
199
|
+
# Validate adapter
|
|
200
|
+
if adapter not in ADAPTERS:
|
|
201
|
+
return {
|
|
202
|
+
"success": False,
|
|
203
|
+
"error": f"Unknown adapter: {adapter}. Available: {ADAPTERS}",
|
|
204
|
+
"hint": f"Use one of: {ADAPTERS}",
|
|
205
|
+
}
|
|
206
|
+
|
|
191
207
|
# Validate working directory
|
|
192
208
|
effective_dir, dir_error = _validate_working_dir(
|
|
193
209
|
working_dir,
|
|
@@ -202,94 +218,41 @@ def delegate(
|
|
|
202
218
|
"hint": "Use the default working directory or ask user to update allowed_dirs config",
|
|
203
219
|
}
|
|
204
220
|
|
|
205
|
-
# Get the session manager
|
|
206
|
-
manager =
|
|
221
|
+
# Get the session manager for this adapter
|
|
222
|
+
manager = _get_adapter_manager(self, adapter)
|
|
207
223
|
|
|
208
|
-
# Determine model
|
|
209
|
-
|
|
224
|
+
# Determine model (defaults vary by adapter)
|
|
225
|
+
if model:
|
|
226
|
+
effective_model = model
|
|
227
|
+
elif self.config.executor.model:
|
|
228
|
+
effective_model = self.config.executor.model
|
|
229
|
+
else:
|
|
230
|
+
# Use adapter-specific defaults
|
|
231
|
+
effective_model = manager.default_model
|
|
210
232
|
|
|
211
233
|
# Determine sandbox mode
|
|
212
234
|
sandbox = self.config.executor.sandbox or "workspace-write"
|
|
213
235
|
|
|
214
|
-
# Start the session
|
|
215
|
-
# This is the SAME method that `zwarm interactive` uses
|
|
236
|
+
# Start the session
|
|
216
237
|
session = manager.start_session(
|
|
217
238
|
task=task,
|
|
218
239
|
working_dir=effective_dir,
|
|
219
240
|
model=effective_model,
|
|
220
241
|
sandbox=sandbox,
|
|
221
242
|
source=f"orchestrator:{self.instance_id or 'default'}",
|
|
222
|
-
adapter="codex",
|
|
223
243
|
)
|
|
224
244
|
|
|
225
|
-
#
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
if not completed:
|
|
237
|
-
return {
|
|
238
|
-
"success": False,
|
|
239
|
-
"session_id": session.id,
|
|
240
|
-
"status": "timeout",
|
|
241
|
-
"error": "Session timed out waiting for codex to complete",
|
|
242
|
-
"hint": "Use check_session() to monitor progress, or use async mode for long tasks",
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
# Get the response from messages
|
|
246
|
-
response_text = ""
|
|
247
|
-
messages = manager.get_messages(session.id)
|
|
248
|
-
for msg in messages:
|
|
249
|
-
if msg.role == "assistant":
|
|
250
|
-
response_text = msg.content
|
|
251
|
-
break # Take first assistant message
|
|
252
|
-
|
|
253
|
-
# Build log path for debugging
|
|
254
|
-
log_path = str(manager._output_path(session.id, session.turn))
|
|
255
|
-
|
|
256
|
-
# Check if session failed
|
|
257
|
-
from zwarm.sessions import SessionStatus
|
|
258
|
-
if session.status == SessionStatus.FAILED:
|
|
259
|
-
return {
|
|
260
|
-
"success": False,
|
|
261
|
-
"session": _format_session_header(session),
|
|
262
|
-
"session_id": session.id,
|
|
263
|
-
"status": "failed",
|
|
264
|
-
"task": _truncate(task, 100),
|
|
265
|
-
"error": session.error or "Unknown error",
|
|
266
|
-
"response": response_text or "(no response captured)",
|
|
267
|
-
"tokens": _get_total_tokens(session),
|
|
268
|
-
"log_file": log_path,
|
|
269
|
-
"hint": "Check log_file for raw codex output. Use bash('cat <log_file>') to inspect.",
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
return {
|
|
273
|
-
"success": True,
|
|
274
|
-
"session": _format_session_header(session),
|
|
275
|
-
"session_id": session.id,
|
|
276
|
-
"status": session.status.value,
|
|
277
|
-
"task": _truncate(task, 100),
|
|
278
|
-
"response": response_text or "(no response captured)",
|
|
279
|
-
"tokens": _get_total_tokens(session),
|
|
280
|
-
"log_file": log_path,
|
|
281
|
-
"hint": "Use converse(session_id, message) to send follow-up messages",
|
|
282
|
-
}
|
|
283
|
-
else:
|
|
284
|
-
# Async mode - return immediately
|
|
285
|
-
return {
|
|
286
|
-
"success": True,
|
|
287
|
-
"session": _format_session_header(session),
|
|
288
|
-
"session_id": session.id,
|
|
289
|
-
"status": "running",
|
|
290
|
-
"task": _truncate(task, 100),
|
|
291
|
-
"hint": "Use check_session(session_id) to monitor progress",
|
|
292
|
-
}
|
|
245
|
+
# Return immediately - session runs in background
|
|
246
|
+
return {
|
|
247
|
+
"success": True,
|
|
248
|
+
"session": _format_session_header(session),
|
|
249
|
+
"session_id": session.id,
|
|
250
|
+
"status": "running",
|
|
251
|
+
"task": _truncate(task, 100),
|
|
252
|
+
"adapter": adapter,
|
|
253
|
+
"model": effective_model,
|
|
254
|
+
"hint": "Use sleep() then check_session(session_id) to monitor progress",
|
|
255
|
+
}
|
|
293
256
|
|
|
294
257
|
|
|
295
258
|
@weaveTool
|
|
@@ -297,41 +260,33 @@ def converse(
|
|
|
297
260
|
self: "Orchestrator",
|
|
298
261
|
session_id: str,
|
|
299
262
|
message: str,
|
|
300
|
-
wait: bool = True,
|
|
301
263
|
) -> dict[str, Any]:
|
|
302
264
|
"""
|
|
303
|
-
Continue a conversation with a
|
|
265
|
+
Continue a conversation with a session.
|
|
304
266
|
|
|
305
267
|
This injects a follow-up message into the session, providing the
|
|
306
268
|
conversation history as context. Like chatting with a developer.
|
|
269
|
+
Returns immediately - use sleep() + check_session() to poll for the response.
|
|
307
270
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
- **wait=False**: Fire-and-forget. Message sent, codex runs in background.
|
|
311
|
-
Use check_session() later to see the response.
|
|
271
|
+
Works with any adapter (codex or claude) - automatically uses the
|
|
272
|
+
correct adapter based on the session's original adapter.
|
|
312
273
|
|
|
313
274
|
Args:
|
|
314
275
|
session_id: The session to continue (from delegate() result).
|
|
315
|
-
message: Your next message
|
|
316
|
-
wait: If True, wait for response. If False, return immediately.
|
|
276
|
+
message: Your next message.
|
|
317
277
|
|
|
318
278
|
Returns:
|
|
319
|
-
{session_id,
|
|
279
|
+
{session_id, turn, status: "running"}
|
|
320
280
|
|
|
321
|
-
Example
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
#
|
|
325
|
-
|
|
326
|
-
Example (async - managing multiple sessions):
|
|
327
|
-
converse(session_id="abc123", message="Add tests", wait=False)
|
|
328
|
-
converse(session_id="def456", message="Fix bug", wait=False)
|
|
329
|
-
# Both running in parallel, check later with check_session()
|
|
281
|
+
Example:
|
|
282
|
+
converse(session_id="abc123", message="Add tests")
|
|
283
|
+
sleep(30)
|
|
284
|
+
check_session(session_id) # Get response
|
|
330
285
|
"""
|
|
331
|
-
|
|
286
|
+
# First get session to determine adapter
|
|
287
|
+
default_manager = _get_session_manager(self)
|
|
288
|
+
session = default_manager.get_session(session_id)
|
|
332
289
|
|
|
333
|
-
# Get current session
|
|
334
|
-
session = manager.get_session(session_id)
|
|
335
290
|
if not session:
|
|
336
291
|
return {
|
|
337
292
|
"success": False,
|
|
@@ -355,8 +310,12 @@ def converse(
|
|
|
355
310
|
"hint": "Start a new session with delegate()",
|
|
356
311
|
}
|
|
357
312
|
|
|
313
|
+
# Get the correct adapter manager for this session
|
|
314
|
+
adapter = getattr(session, "adapter", "codex")
|
|
315
|
+
manager = _get_adapter_manager(self, adapter)
|
|
316
|
+
|
|
358
317
|
# Inject the follow-up message
|
|
359
|
-
# This uses
|
|
318
|
+
# This uses the adapter's inject_message() which:
|
|
360
319
|
# 1. Builds context from previous messages
|
|
361
320
|
# 2. Starts a new turn with the context + new message (background process)
|
|
362
321
|
updated_session = manager.inject_message(session_id, message)
|
|
@@ -368,53 +327,16 @@ def converse(
|
|
|
368
327
|
"session_id": session_id,
|
|
369
328
|
}
|
|
370
329
|
|
|
371
|
-
|
|
372
|
-
# Async mode - return immediately
|
|
373
|
-
return {
|
|
374
|
-
"success": True,
|
|
375
|
-
"session": _format_session_header(updated_session),
|
|
376
|
-
"session_id": session_id,
|
|
377
|
-
"turn": updated_session.turn,
|
|
378
|
-
"status": "running",
|
|
379
|
-
"you_said": _truncate(message, 100),
|
|
380
|
-
"hint": "Use check_session(session_id) to see the response when ready",
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
# Sync mode - wait for completion
|
|
384
|
-
completed = _wait_for_completion(
|
|
385
|
-
manager,
|
|
386
|
-
session_id,
|
|
387
|
-
timeout=self.config.executor.timeout or 300.0,
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
# Refresh session
|
|
391
|
-
session = manager.get_session(session_id)
|
|
392
|
-
|
|
393
|
-
if not completed:
|
|
394
|
-
return {
|
|
395
|
-
"success": False,
|
|
396
|
-
"session_id": session_id,
|
|
397
|
-
"status": "timeout",
|
|
398
|
-
"error": "Session timed out waiting for response",
|
|
399
|
-
"hint": "Use check_session() to monitor progress",
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
# Get the response (last assistant message)
|
|
403
|
-
response_text = ""
|
|
404
|
-
messages = manager.get_messages(session_id)
|
|
405
|
-
for msg in reversed(messages):
|
|
406
|
-
if msg.role == "assistant":
|
|
407
|
-
response_text = msg.content
|
|
408
|
-
break
|
|
409
|
-
|
|
330
|
+
# Return immediately - session runs in background
|
|
410
331
|
return {
|
|
411
332
|
"success": True,
|
|
412
|
-
"session": _format_session_header(
|
|
333
|
+
"session": _format_session_header(updated_session),
|
|
413
334
|
"session_id": session_id,
|
|
414
|
-
"turn":
|
|
335
|
+
"turn": updated_session.turn,
|
|
336
|
+
"status": "running",
|
|
337
|
+
"adapter": adapter,
|
|
415
338
|
"you_said": _truncate(message, 100),
|
|
416
|
-
"
|
|
417
|
-
"tokens": _get_total_tokens(session),
|
|
339
|
+
"hint": "Use sleep() then check_session(session_id) to see the response",
|
|
418
340
|
}
|
|
419
341
|
|
|
420
342
|
|
|
@@ -782,3 +704,44 @@ def list_sessions(
|
|
|
782
704
|
"filter": status or "all",
|
|
783
705
|
"hint": "Sessions with needs_attention=True have new responses to review" if needs_attention_count else None,
|
|
784
706
|
}
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
@weaveTool
|
|
710
|
+
def sleep(self, seconds: float) -> dict[str, Any]:
|
|
711
|
+
"""
|
|
712
|
+
Sleep for a specified number of seconds.
|
|
713
|
+
|
|
714
|
+
Use this when you've started async sessions (wait=False) and want to
|
|
715
|
+
give them time to complete before checking their status. This lets you
|
|
716
|
+
manage your own polling loop:
|
|
717
|
+
|
|
718
|
+
1. delegate(task, wait=False) -> start background work
|
|
719
|
+
2. sleep(10) -> wait a bit
|
|
720
|
+
3. peek_session(id) -> check if done
|
|
721
|
+
4. Repeat 2-3 if still running
|
|
722
|
+
|
|
723
|
+
Args:
|
|
724
|
+
seconds: Number of seconds to sleep (max 300 = 5 minutes)
|
|
725
|
+
|
|
726
|
+
Returns:
|
|
727
|
+
Dict with success status and actual sleep duration
|
|
728
|
+
"""
|
|
729
|
+
# Cap at 5 minutes to prevent accidental long hangs
|
|
730
|
+
max_sleep = 300.0
|
|
731
|
+
actual_seconds = min(float(seconds), max_sleep)
|
|
732
|
+
|
|
733
|
+
if actual_seconds <= 0:
|
|
734
|
+
return {
|
|
735
|
+
"success": False,
|
|
736
|
+
"error": "Sleep duration must be positive",
|
|
737
|
+
"requested": seconds,
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
time.sleep(actual_seconds)
|
|
741
|
+
|
|
742
|
+
return {
|
|
743
|
+
"success": True,
|
|
744
|
+
"slept_seconds": actual_seconds,
|
|
745
|
+
"capped": actual_seconds < seconds,
|
|
746
|
+
"max_allowed": max_sleep if actual_seconds < seconds else None,
|
|
747
|
+
}
|