zwarm 1.3.11__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/core/config.py CHANGED
@@ -36,6 +36,7 @@ class ExecutorConfig:
36
36
  model: str | None = None
37
37
  sandbox: str = "workspace-write" # read-only | workspace-write | danger-full-access
38
38
  timeout: int = 3600
39
+ reasoning_effort: str | None = "high" # low | medium | high (default to high for compatibility)
39
40
 
40
41
 
41
42
  @dataclass
@@ -164,6 +165,7 @@ class ZwarmConfig:
164
165
  "model": self.executor.model,
165
166
  "sandbox": self.executor.sandbox,
166
167
  "timeout": self.executor.timeout,
168
+ "reasoning_effort": self.executor.reasoning_effort,
167
169
  },
168
170
  "orchestrator": {
169
171
  "lm": self.orchestrator.lm,
zwarm/orchestrator.py CHANGED
@@ -20,6 +20,8 @@ import weave
20
20
  from pydantic import Field, PrivateAttr
21
21
  from wbal.agents.yaml_agent import YamlAgent
22
22
  from wbal.helper import TOOL_CALL_TYPE, format_openai_tool_response
23
+ from wbal.lm import LM as wbalLMGeneric
24
+ from wbal.lm import GPT5LargeVerbose
23
25
 
24
26
  from zwarm.adapters import ExecutorAdapter, get_adapter
25
27
  from zwarm.core.compact import compact_messages, should_compact
@@ -29,9 +31,9 @@ from zwarm.core.models import ConversationSession
29
31
  from zwarm.core.state import StateManager
30
32
  from zwarm.prompts import get_orchestrator_prompt
31
33
  from zwarm.watchers import (
32
- WatcherManager,
33
- WatcherContext,
34
34
  WatcherAction,
35
+ WatcherContext,
36
+ WatcherManager,
35
37
  build_watcher_manager,
36
38
  )
37
39
 
@@ -48,6 +50,9 @@ class Orchestrator(YamlAgent):
48
50
  - Weave integration
49
51
  """
50
52
 
53
+ # LM definition override:
54
+ lm: wbalLMGeneric = Field(default_factory=GPT5LargeVerbose)
55
+
51
56
  # Configuration
52
57
  config: ZwarmConfig = Field(default_factory=ZwarmConfig)
53
58
  working_dir: Path = Field(default_factory=Path.cwd)
@@ -71,11 +76,13 @@ class Orchestrator(YamlAgent):
71
76
  _watcher_manager: WatcherManager | None = PrivateAttr(default=None)
72
77
  _resumed: bool = PrivateAttr(default=False)
73
78
  _total_tokens: int = PrivateAttr(default=0) # Cumulative orchestrator tokens
74
- _executor_usage: dict[str, int] = PrivateAttr(default_factory=lambda: {
75
- "input_tokens": 0,
76
- "output_tokens": 0,
77
- "total_tokens": 0,
78
- })
79
+ _executor_usage: dict[str, int] = PrivateAttr(
80
+ default_factory=lambda: {
81
+ "input_tokens": 0,
82
+ "output_tokens": 0,
83
+ "total_tokens": 0,
84
+ }
85
+ )
79
86
 
80
87
  def model_post_init(self, __context: Any) -> None:
81
88
  """Initialize state and adapters after model creation."""
@@ -93,6 +100,7 @@ class Orchestrator(YamlAgent):
93
100
  # Register instance if using instance isolation
94
101
  if self.instance_id:
95
102
  from zwarm.core.state import register_instance
103
+
96
104
  register_instance(
97
105
  instance_id=self.instance_id,
98
106
  name=self.instance_name,
@@ -143,7 +151,15 @@ class Orchestrator(YamlAgent):
143
151
  if name not in self._adapters:
144
152
  # Get model from config (adapters have their own defaults if None)
145
153
  model = self.config.executor.model
146
- self._adapters[name] = get_adapter(name, model=model)
154
+
155
+ # Use isolated codex config if available
156
+ config_path = self.working_dir / self.config.state_dir / "codex.toml"
157
+ if not config_path.exists():
158
+ config_path = None # Fallback to adapter defaults
159
+
160
+ self._adapters[name] = get_adapter(
161
+ name, model=model, config_path=config_path
162
+ )
147
163
  return self._adapters[name]
148
164
 
149
165
  def get_executor_usage(self) -> dict[str, int]:
@@ -178,10 +194,43 @@ class Orchestrator(YamlAgent):
178
194
  """
179
195
  loaded_messages = self._state.load_orchestrator_messages()
180
196
  if loaded_messages:
181
- self.messages = loaded_messages
197
+ self.messages = self._sanitize_messages_for_resume(loaded_messages)
182
198
  self._resumed = True
183
199
  # If no messages were saved, don't set _resumed - start fresh
184
200
 
201
+ def _sanitize_messages_for_resume(self, messages: list[dict]) -> list[dict]:
202
+ """
203
+ Sanitize messages loaded from disk for sending back to the API.
204
+
205
+ OpenAI's reasoning models include response-only fields (status, encrypted_content)
206
+ in reasoning blocks that can't be sent back as input. We keep the reasoning
207
+ items but strip the response-only fields.
208
+
209
+ Response-only fields that must be removed:
210
+ - status: reasoning item status (null, "in_progress", "completed")
211
+ - encrypted_content: encrypted reasoning content
212
+ """
213
+ # Fields that are response-only and must be stripped for input
214
+ RESPONSE_ONLY_FIELDS = {
215
+ "status",
216
+ "encrypted_content",
217
+ }
218
+
219
+ def clean_item(item: Any) -> Any:
220
+ """Recursively clean an item, removing response-only fields."""
221
+ if isinstance(item, dict):
222
+ return {
223
+ k: clean_item(v)
224
+ for k, v in item.items()
225
+ if k not in RESPONSE_ONLY_FIELDS
226
+ }
227
+ elif isinstance(item, list):
228
+ return [clean_item(x) for x in item]
229
+ else:
230
+ return item
231
+
232
+ return [clean_item(msg) for msg in messages]
233
+
185
234
  def _maybe_compact(self) -> bool:
186
235
  """
187
236
  Check if compaction is needed and compact if so.
@@ -214,15 +263,18 @@ class Orchestrator(YamlAgent):
214
263
 
215
264
  # Log compaction event
216
265
  from zwarm.core.models import Event
217
- self._state.log_event(Event(
218
- kind="context_compacted",
219
- payload={
220
- "step": self._step_count,
221
- "original_count": result.original_count,
222
- "new_count": len(result.messages),
223
- "removed_count": result.removed_count,
224
- },
225
- ))
266
+
267
+ self._state.log_event(
268
+ Event(
269
+ kind="context_compacted",
270
+ payload={
271
+ "step": self._step_count,
272
+ "original_count": result.original_count,
273
+ "new_count": len(result.messages),
274
+ "removed_count": result.removed_count,
275
+ },
276
+ )
277
+ )
226
278
 
227
279
  return True
228
280
 
@@ -338,17 +390,20 @@ Review what was accomplished in the previous session and delegate new tasks as n
338
390
 
339
391
  # Log watcher execution to events
340
392
  from zwarm.core.models import Event
393
+
341
394
  watcher_names = [w.name for w in self.config.watchers.watchers if w.enabled]
342
- self.state.log_event(Event(
343
- kind="watchers_run",
344
- payload={
345
- "step": self._step_count,
346
- "watchers": watcher_names,
347
- "action": result.action.value,
348
- "triggered_by": result.metadata.get("triggered_by"),
349
- "reason": result.metadata.get("reason"),
350
- },
351
- ))
395
+ self.state.log_event(
396
+ Event(
397
+ kind="watchers_run",
398
+ payload={
399
+ "step": self._step_count,
400
+ "watchers": watcher_names,
401
+ "action": result.action.value,
402
+ "triggered_by": result.metadata.get("triggered_by"),
403
+ "reason": result.metadata.get("reason"),
404
+ },
405
+ )
406
+ )
352
407
 
353
408
  # Handle watcher result
354
409
  if result.action == WatcherAction.NUDGE and result.guidance:
@@ -27,15 +27,17 @@ For everything else, make your best judgment and proceed. If you're unsure wheth
27
27
 
28
28
  Your primary tools are for delegation and verification:
29
29
 
30
- **delegate(task, mode, adapter, model)** - This is how you assign work to an executor. The `task` parameter should be a clear, specific description of what you want done. The `mode` parameter controls whether this is a conversational interaction ("sync") or a fire-and-forget background task ("async"). You can optionally specify which `adapter` (executor type) to use and which `model` to run.
30
+ **delegate(task, working_dir=None, model=None, wait=True)** - Start a new executor session. The `task` should be a clear, specific description of what you want done. Use `wait=True` (default) for interactive work where you'll iterate with the executor. Use `wait=False` to spawn background work and continue immediately. The `working_dir` parameter lets you run the executor in a specific directory.
31
31
 
32
- **converse(session_id, message)** - After starting a sync session with delegate(), use this to continue the conversation. This is how you provide feedback, ask for changes, or guide the executor through a complex task. The executor maintains full context of the conversation, so you can reference previous messages naturally.
32
+ **converse(session_id, message, wait=True)** - Continue an existing conversation. Use this to provide feedback, ask for changes, or guide the executor through complex work. The executor maintains full context. Use `wait=False` to send the message and continue without waiting for a response.
33
33
 
34
- **check_session(session_id)** - For async sessions, use this to poll for completion status. Also useful for sync sessions if you want to verify the current state.
34
+ **peek_session(session_id)** - Quick status check. Returns just the session status and latest message. Use this for fast polling when you have multiple sessions running.
35
35
 
36
- **end_session(session_id, verdict, summary)** - Call this to close out a session. The verdict should be "completed" if the work was successful, "failed" if it couldn't be salvaged, or "cancelled" if you're abandoning it for strategic reasons. Always provide a summary describing what was accomplished or why it failed.
36
+ **check_session(session_id)** - Full session details including all messages, token usage, runtime. Use this when you need the complete picture.
37
37
 
38
- **list_sessions(status)** - Shows all your active and completed sessions. Useful for tracking parallel work or reviewing what's been done.
38
+ **list_sessions(status=None)** - List all sessions. Returns a `needs_attention` flag for each session indicating if it recently completed or failed. Use this to monitor multiple parallel sessions and see which ones have new responses ready for review.
39
+
40
+ **end_session(session_id, reason=None, delete=False)** - Kill a running session or clean up a completed one. Use `delete=True` to remove the session entirely (won't show in list_sessions anymore).
39
41
 
40
42
  **bash(command)** - Run shell commands directly. Use this primarily for verification: running tests, type checkers, linters, build commands, or inspecting the filesystem. Do NOT use bash to write code yourself - that's what executors are for.
41
43
 
@@ -61,21 +63,35 @@ The watchers are on your side. They exist to help you succeed, not to criticize.
61
63
 
62
64
  ---
63
65
 
64
- # Sync vs Async: Choosing the Right Mode
66
+ # Sync vs Async: Choosing the Right Approach
67
+
68
+ The `wait` parameter controls whether you block waiting for a response or continue immediately.
65
69
 
66
- The mode you choose for delegation significantly affects how work proceeds.
70
+ **Sync (wait=True)** creates an interactive conversation with the executor. After your task description, you receive the executor's response immediately. You can then provide feedback via converse(), ask for changes, or confirm the work is acceptable. This back-and-forth continues until you're satisfied.
67
71
 
68
- **Sync mode** creates an interactive conversation with the executor. After your initial task description, the executor responds with either a clarifying question or their initial work. You can then provide feedback, ask for changes, or confirm the work is acceptable. This back-and-forth continues until you're satisfied, at which point you call end_session().
72
+ Use sync when the task involves ambiguity, when you expect to iterate, when you want to review results before proceeding, or for high-stakes work needing close supervision.
69
73
 
70
- Use sync mode when the task involves ambiguity that the executor might need to resolve, when you expect to iterate on the solution, when you want to review intermediate results before proceeding, or when the task requires exploration or research where the path isn't clear upfront. Sync mode is also appropriate for high-stakes work where you want close supervision.
74
+ Typical sync pattern:
75
+ 1. `delegate(task)` - get initial response
76
+ 2. Evaluate - does it meet requirements?
77
+ 3. `converse(id, "feedback...")` - if changes needed
78
+ 4. Repeat until satisfied
79
+ 5. `end_session(id)` or just move on
71
80
 
72
- The typical sync pattern is: delegate with your task description, receive the executor's initial response, evaluate whether it meets your requirements, use converse() to provide corrections or additional guidance if needed, repeat until satisfied, then end_session() with verdict="completed".
81
+ **Async (wait=False)** is fire-and-forget. You spawn the work and continue immediately without waiting. The executor works in the background.
73
82
 
74
- **Async mode** is fire-and-forget. You describe the task, the executor works on it in the background, and you can check on progress periodically or wait for completion. You don't have the opportunity for mid-task guidance.
83
+ Use async when tasks are well-defined and self-contained, when you're confident the executor can complete without guidance, or when you want to parallelize multiple independent pieces of work. Async is efficient for clear-cut tasks like "add tests for this function" or "fix this lint error".
75
84
 
76
- Use async mode when the task is well-defined and self-contained, when you're confident the executor can complete it without guidance, or when you want to parallelize multiple independent pieces of work. Async is efficient for clear-cut tasks like "add tests for this function" or "fix this specific lint error" where there's little ambiguity about what success looks like.
85
+ Async pattern for parallel work:
86
+ 1. `delegate(task1, wait=False)` → session a
87
+ 2. `delegate(task2, wait=False)` → session b
88
+ 3. `delegate(task3, wait=False)` → session c
89
+ 4. `list_sessions()` → check `needs_attention` flags
90
+ 5. `peek_session(a)` → quick status check
91
+ 6. `check_session(b)` → full details when ready
92
+ 7. `converse(a, "now do X", wait=False)` → continue without blocking
77
93
 
78
- When in doubt, prefer sync mode. The overhead of conversation is small compared to the cost of an executor going off in the wrong direction unsupervised.
94
+ When in doubt, prefer sync. The overhead of waiting is small compared to an executor going off in the wrong direction unsupervised.
79
95
 
80
96
  ---
81
97
 
@@ -14,11 +14,13 @@ Features:
14
14
  from zwarm.sessions.manager import (
15
15
  CodexSession,
16
16
  CodexSessionManager,
17
+ SessionMessage,
17
18
  SessionStatus,
18
19
  )
19
20
 
20
21
  __all__ = [
21
22
  "CodexSession",
22
23
  "CodexSessionManager",
24
+ "SessionMessage",
23
25
  "SessionStatus",
24
26
  ]
zwarm/sessions/manager.py CHANGED
@@ -225,9 +225,11 @@ class CodexSessionManager:
225
225
  continue
226
226
  session = self._load_session(session_dir.name)
227
227
  if session:
228
- # Update status if process died
229
- if session.status == SessionStatus.RUNNING and not session.is_running:
230
- self._update_session_status(session)
228
+ # Update status if process died OR output indicates completion
229
+ # (output check is more reliable than PID check due to PID reuse)
230
+ if session.status == SessionStatus.RUNNING:
231
+ if self._is_output_complete(session.id, session.turn) or not session.is_running:
232
+ self._update_session_status(session)
231
233
 
232
234
  if status is None or session.status == status:
233
235
  sessions.append(session)
@@ -241,8 +243,9 @@ class CodexSessionManager:
241
243
  # Try exact match first
242
244
  session = self._load_session(session_id)
243
245
  if session:
244
- if session.status == SessionStatus.RUNNING and not session.is_running:
245
- self._update_session_status(session)
246
+ if session.status == SessionStatus.RUNNING:
247
+ if self._is_output_complete(session.id, session.turn) or not session.is_running:
248
+ self._update_session_status(session)
246
249
  return session
247
250
 
248
251
  # Try partial match
@@ -250,12 +253,45 @@ class CodexSessionManager:
250
253
  if session_dir.name.startswith(session_id):
251
254
  session = self._load_session(session_dir.name)
252
255
  if session:
253
- if session.status == SessionStatus.RUNNING and not session.is_running:
254
- self._update_session_status(session)
256
+ if session.status == SessionStatus.RUNNING:
257
+ if self._is_output_complete(session.id, session.turn) or not session.is_running:
258
+ self._update_session_status(session)
255
259
  return session
256
260
 
257
261
  return None
258
262
 
263
+ def _is_output_complete(self, session_id: str, turn: int) -> bool:
264
+ """
265
+ Check if output file indicates the task completed.
266
+
267
+ Looks for completion markers like 'turn.completed' or 'task.completed'
268
+ in the JSONL output. This is more reliable than PID checking.
269
+ """
270
+ output_path = self._output_path(session_id, turn)
271
+ if not output_path.exists():
272
+ return False
273
+
274
+ try:
275
+ content = output_path.read_text()
276
+ for line in content.strip().split("\n"):
277
+ if not line.strip():
278
+ continue
279
+ try:
280
+ event = json.loads(line)
281
+ event_type = event.get("type", "")
282
+ # Check for any completion marker
283
+ if event_type in ("turn.completed", "task.completed", "completed", "done"):
284
+ return True
285
+ # Also check for error as a form of completion
286
+ if event_type == "error":
287
+ return True
288
+ except json.JSONDecodeError:
289
+ continue
290
+ except Exception:
291
+ pass
292
+
293
+ return False
294
+
259
295
  def _update_session_status(self, session: CodexSession) -> None:
260
296
  """Update session status after process completion."""
261
297
  # Parse output to determine status
@@ -325,6 +361,8 @@ class CodexSessionManager:
325
361
  cmd = [
326
362
  "codex", "exec",
327
363
  "--json",
364
+ "--full-auto",
365
+ "--skip-git-repo-check",
328
366
  "--model", model,
329
367
  "-C", str(working_dir.absolute()),
330
368
  ]
@@ -408,6 +446,8 @@ Continue from where you left off, addressing the user's new message."""
408
446
  cmd = [
409
447
  "codex", "exec",
410
448
  "--json",
449
+ "--full-auto",
450
+ "--skip-git-repo-check",
411
451
  "--model", session.model,
412
452
  "-C", str(session.working_dir.absolute()),
413
453
  "--", augmented_task,
@@ -431,10 +471,14 @@ Continue from where you left off, addressing the user's new message."""
431
471
 
432
472
  return session
433
473
 
434
- def kill_session(self, session_id: str) -> bool:
474
+ def kill_session(self, session_id: str, delete: bool = False) -> bool:
435
475
  """
436
476
  Kill a running session.
437
477
 
478
+ Args:
479
+ session_id: Session to kill
480
+ delete: If True, also delete session data entirely
481
+
438
482
  Returns True if killed, False if not found or not running.
439
483
  """
440
484
  session = self.get_session(session_id)
@@ -453,11 +497,46 @@ Continue from where you left off, addressing the user's new message."""
453
497
  except (OSError, ProcessLookupError):
454
498
  pass
455
499
 
500
+ if delete:
501
+ return self.delete_session(session.id)
502
+
456
503
  session.status = SessionStatus.KILLED
457
504
  session.error = "Manually killed"
458
505
  self._save_session(session)
459
506
  return True
460
507
 
508
+ def delete_session(self, session_id: str) -> bool:
509
+ """
510
+ Delete a session entirely (removes from disk).
511
+
512
+ Kills the process first if still running.
513
+
514
+ Returns True if deleted, False if not found.
515
+ """
516
+ import shutil
517
+
518
+ session = self.get_session(session_id)
519
+ if not session:
520
+ return False
521
+
522
+ # Kill if running
523
+ if session.pid and session.is_running:
524
+ try:
525
+ os.killpg(os.getpgid(session.pid), signal.SIGTERM)
526
+ time.sleep(0.3)
527
+ if session.is_running:
528
+ os.killpg(os.getpgid(session.pid), signal.SIGKILL)
529
+ except (OSError, ProcessLookupError):
530
+ pass
531
+
532
+ # Remove session directory
533
+ session_dir = self._session_dir(session.id)
534
+ if session_dir.exists():
535
+ shutil.rmtree(session_dir)
536
+ return True
537
+
538
+ return False
539
+
461
540
  def get_output(self, session_id: str, turn: int | None = None) -> str:
462
541
  """Get raw JSONL output for a session."""
463
542
  session = self.get_session(session_id)