PyPI - zwarm - Versions diffs - 1.3.11__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

zwarm 1.3.11py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

zwarm/adapters/codex_mcp.py +475 -227
zwarm/cli/main.py +485 -143
zwarm/core/config.py +2 -0
zwarm/orchestrator.py +83 -28
zwarm/prompts/orchestrator.py +29 -13
zwarm/sessions/__init__.py +2 -0
zwarm/sessions/manager.py +87 -8
zwarm/tools/delegation.py +358 -323
zwarm-2.0.1.dist-info/METADATA +309 -0
{zwarm-1.3.11.dist-info → zwarm-2.0.1.dist-info}/RECORD +12 -12
zwarm-1.3.11.dist-info/METADATA +0 -525
{zwarm-1.3.11.dist-info → zwarm-2.0.1.dist-info}/WHEEL +0 -0
{zwarm-1.3.11.dist-info → zwarm-2.0.1.dist-info}/entry_points.txt +0 -0

zwarm/core/config.py CHANGED Viewed

@@ -36,6 +36,7 @@ class ExecutorConfig:
     model: str | None = None
     sandbox: str = "workspace-write"  # read-only | workspace-write | danger-full-access
     timeout: int = 3600
+    reasoning_effort: str | None = "high"  # low | medium | high (default to high for compatibility)
 @dataclass
@@ -164,6 +165,7 @@ class ZwarmConfig:
                 "model": self.executor.model,
                 "sandbox": self.executor.sandbox,
                 "timeout": self.executor.timeout,
+                "reasoning_effort": self.executor.reasoning_effort,
             },
             "orchestrator": {
                 "lm": self.orchestrator.lm,

zwarm/orchestrator.py CHANGED Viewed

@@ -20,6 +20,8 @@ import weave
 from pydantic import Field, PrivateAttr
 from wbal.agents.yaml_agent import YamlAgent
 from wbal.helper import TOOL_CALL_TYPE, format_openai_tool_response
+from wbal.lm import LM as wbalLMGeneric
+from wbal.lm import GPT5LargeVerbose
 from zwarm.adapters import ExecutorAdapter, get_adapter
 from zwarm.core.compact import compact_messages, should_compact
@@ -29,9 +31,9 @@ from zwarm.core.models import ConversationSession
 from zwarm.core.state import StateManager
 from zwarm.prompts import get_orchestrator_prompt
 from zwarm.watchers import (
-    WatcherManager,
-    WatcherContext,
     WatcherAction,
+    WatcherContext,
+    WatcherManager,
     build_watcher_manager,
 )
@@ -48,6 +50,9 @@ class Orchestrator(YamlAgent):
     - Weave integration
     """
+    # LM definition override:
+    lm: wbalLMGeneric = Field(default_factory=GPT5LargeVerbose)
     # Configuration
     config: ZwarmConfig = Field(default_factory=ZwarmConfig)
     working_dir: Path = Field(default_factory=Path.cwd)
@@ -71,11 +76,13 @@ class Orchestrator(YamlAgent):
     _watcher_manager: WatcherManager | None = PrivateAttr(default=None)
     _resumed: bool = PrivateAttr(default=False)
     _total_tokens: int = PrivateAttr(default=0)  # Cumulative orchestrator tokens
-    _executor_usage: dict[str, int] = PrivateAttr(default_factory=lambda: {
-        "input_tokens": 0,
-        "output_tokens": 0,
-        "total_tokens": 0,
-    })
+    _executor_usage: dict[str, int] = PrivateAttr(
+        default_factory=lambda: {
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "total_tokens": 0,
+        }
+    )
     def model_post_init(self, __context: Any) -> None:
         """Initialize state and adapters after model creation."""
@@ -93,6 +100,7 @@ class Orchestrator(YamlAgent):
         # Register instance if using instance isolation
         if self.instance_id:
             from zwarm.core.state import register_instance
             register_instance(
                 instance_id=self.instance_id,
                 name=self.instance_name,
@@ -143,7 +151,15 @@ class Orchestrator(YamlAgent):
         if name not in self._adapters:
             # Get model from config (adapters have their own defaults if None)
             model = self.config.executor.model
-            self._adapters[name] = get_adapter(name, model=model)
+            # Use isolated codex config if available
+            config_path = self.working_dir / self.config.state_dir / "codex.toml"
+            if not config_path.exists():
+                config_path = None  # Fallback to adapter defaults
+            self._adapters[name] = get_adapter(
+                name, model=model, config_path=config_path
+            )
         return self._adapters[name]
     def get_executor_usage(self) -> dict[str, int]:
@@ -178,10 +194,43 @@ class Orchestrator(YamlAgent):
         """
         loaded_messages = self._state.load_orchestrator_messages()
         if loaded_messages:
-            self.messages = loaded_messages
+            self.messages = self._sanitize_messages_for_resume(loaded_messages)
             self._resumed = True
         # If no messages were saved, don't set _resumed - start fresh
+    def _sanitize_messages_for_resume(self, messages: list[dict]) -> list[dict]:
+        """
+        Sanitize messages loaded from disk for sending back to the API.
+        OpenAI's reasoning models include response-only fields (status, encrypted_content)
+        in reasoning blocks that can't be sent back as input. We keep the reasoning
+        items but strip the response-only fields.
+        Response-only fields that must be removed:
+        - status: reasoning item status (null, "in_progress", "completed")
+        - encrypted_content: encrypted reasoning content
+        """
+        # Fields that are response-only and must be stripped for input
+        RESPONSE_ONLY_FIELDS = {
+            "status",
+            "encrypted_content",
+        }
+        def clean_item(item: Any) -> Any:
+            """Recursively clean an item, removing response-only fields."""
+            if isinstance(item, dict):
+                return {
+                    k: clean_item(v)
+                    for k, v in item.items()
+                    if k not in RESPONSE_ONLY_FIELDS
+                }
+            elif isinstance(item, list):
+                return [clean_item(x) for x in item]
+            else:
+                return item
+        return [clean_item(msg) for msg in messages]
     def _maybe_compact(self) -> bool:
         """
         Check if compaction is needed and compact if so.
@@ -214,15 +263,18 @@ class Orchestrator(YamlAgent):
             # Log compaction event
             from zwarm.core.models import Event
-            self._state.log_event(Event(
-                kind="context_compacted",
-                payload={
-                    "step": self._step_count,
-                    "original_count": result.original_count,
-                    "new_count": len(result.messages),
-                    "removed_count": result.removed_count,
-                },
-            ))
+            self._state.log_event(
+                Event(
+                    kind="context_compacted",
+                    payload={
+                        "step": self._step_count,
+                        "original_count": result.original_count,
+                        "new_count": len(result.messages),
+                        "removed_count": result.removed_count,
+                    },
+                )
+            )
             return True
@@ -338,17 +390,20 @@ Review what was accomplished in the previous session and delegate new tasks as n
         # Log watcher execution to events
         from zwarm.core.models import Event
         watcher_names = [w.name for w in self.config.watchers.watchers if w.enabled]
-        self.state.log_event(Event(
-            kind="watchers_run",
-            payload={
-                "step": self._step_count,
-                "watchers": watcher_names,
-                "action": result.action.value,
-                "triggered_by": result.metadata.get("triggered_by"),
-                "reason": result.metadata.get("reason"),
-            },
-        ))
+        self.state.log_event(
+            Event(
+                kind="watchers_run",
+                payload={
+                    "step": self._step_count,
+                    "watchers": watcher_names,
+                    "action": result.action.value,
+                    "triggered_by": result.metadata.get("triggered_by"),
+                    "reason": result.metadata.get("reason"),
+                },
+            )
+        )
         # Handle watcher result
         if result.action == WatcherAction.NUDGE and result.guidance:

zwarm/prompts/orchestrator.py CHANGED Viewed

@@ -27,15 +27,17 @@ For everything else, make your best judgment and proceed. If you're unsure wheth
 Your primary tools are for delegation and verification:
-**delegate(task, mode, adapter, model)** - This is how you assign work to an executor. The `task` parameter should be a clear, specific description of what you want done. The `mode` parameter controls whether this is a conversational interaction ("sync") or a fire-and-forget background task ("async"). You can optionally specify which `adapter` (executor type) to use and which `model` to run.
+**delegate(task, working_dir=None, model=None, wait=True)** - Start a new executor session. The `task` should be a clear, specific description of what you want done. Use `wait=True` (default) for interactive work where you'll iterate with the executor. Use `wait=False` to spawn background work and continue immediately. The `working_dir` parameter lets you run the executor in a specific directory.
-**converse(session_id, message)** - After starting a sync session with delegate(), use this to continue the conversation. This is how you provide feedback, ask for changes, or guide the executor through a complex task. The executor maintains full context of the conversation, so you can reference previous messages naturally.
+**converse(session_id, message, wait=True)** - Continue an existing conversation. Use this to provide feedback, ask for changes, or guide the executor through complex work. The executor maintains full context. Use `wait=False` to send the message and continue without waiting for a response.
-**check_session(session_id)** - For async sessions, use this to poll for completion status. Also useful for sync sessions if you want to verify the current state.
+**peek_session(session_id)** - Quick status check. Returns just the session status and latest message. Use this for fast polling when you have multiple sessions running.
-**end_session(session_id, verdict, summary)** - Call this to close out a session. The verdict should be "completed" if the work was successful, "failed" if it couldn't be salvaged, or "cancelled" if you're abandoning it for strategic reasons. Always provide a summary describing what was accomplished or why it failed.
+**check_session(session_id)** - Full session details including all messages, token usage, runtime. Use this when you need the complete picture.
-**list_sessions(status)** - Shows all your active and completed sessions. Useful for tracking parallel work or reviewing what's been done.
+**list_sessions(status=None)** - List all sessions. Returns a `needs_attention` flag for each session indicating if it recently completed or failed. Use this to monitor multiple parallel sessions and see which ones have new responses ready for review.
+**end_session(session_id, reason=None, delete=False)** - Kill a running session or clean up a completed one. Use `delete=True` to remove the session entirely (won't show in list_sessions anymore).
 **bash(command)** - Run shell commands directly. Use this primarily for verification: running tests, type checkers, linters, build commands, or inspecting the filesystem. Do NOT use bash to write code yourself - that's what executors are for.
@@ -61,21 +63,35 @@ The watchers are on your side. They exist to help you succeed, not to criticize.
 ---
-# Sync vs Async: Choosing the Right Mode
+# Sync vs Async: Choosing the Right Approach
+The `wait` parameter controls whether you block waiting for a response or continue immediately.
-The mode you choose for delegation significantly affects how work proceeds.
+**Sync (wait=True)** creates an interactive conversation with the executor. After your task description, you receive the executor's response immediately. You can then provide feedback via converse(), ask for changes, or confirm the work is acceptable. This back-and-forth continues until you're satisfied.
-**Sync mode** creates an interactive conversation with the executor. After your initial task description, the executor responds with either a clarifying question or their initial work. You can then provide feedback, ask for changes, or confirm the work is acceptable. This back-and-forth continues until you're satisfied, at which point you call end_session().
+Use sync when the task involves ambiguity, when you expect to iterate, when you want to review results before proceeding, or for high-stakes work needing close supervision.
-Use sync mode when the task involves ambiguity that the executor might need to resolve, when you expect to iterate on the solution, when you want to review intermediate results before proceeding, or when the task requires exploration or research where the path isn't clear upfront. Sync mode is also appropriate for high-stakes work where you want close supervision.
+Typical sync pattern:
+1. `delegate(task)` - get initial response
+2. Evaluate - does it meet requirements?
+3. `converse(id, "feedback...")` - if changes needed
+4. Repeat until satisfied
+5. `end_session(id)` or just move on
-The typical sync pattern is: delegate with your task description, receive the executor's initial response, evaluate whether it meets your requirements, use converse() to provide corrections or additional guidance if needed, repeat until satisfied, then end_session() with verdict="completed".
+**Async (wait=False)** is fire-and-forget. You spawn the work and continue immediately without waiting. The executor works in the background.
-**Async mode** is fire-and-forget. You describe the task, the executor works on it in the background, and you can check on progress periodically or wait for completion. You don't have the opportunity for mid-task guidance.
+Use async when tasks are well-defined and self-contained, when you're confident the executor can complete without guidance, or when you want to parallelize multiple independent pieces of work. Async is efficient for clear-cut tasks like "add tests for this function" or "fix this lint error".
-Use async mode when the task is well-defined and self-contained, when you're confident the executor can complete it without guidance, or when you want to parallelize multiple independent pieces of work. Async is efficient for clear-cut tasks like "add tests for this function" or "fix this specific lint error" where there's little ambiguity about what success looks like.
+Async pattern for parallel work:
+1. `delegate(task1, wait=False)` → session a
+2. `delegate(task2, wait=False)` → session b
+3. `delegate(task3, wait=False)` → session c
+4. `list_sessions()` → check `needs_attention` flags
+5. `peek_session(a)` → quick status check
+6. `check_session(b)` → full details when ready
+7. `converse(a, "now do X", wait=False)` → continue without blocking
-When in doubt, prefer sync mode. The overhead of conversation is small compared to the cost of an executor going off in the wrong direction unsupervised.
+When in doubt, prefer sync. The overhead of waiting is small compared to an executor going off in the wrong direction unsupervised.
 ---

zwarm/sessions/__init__.py CHANGED Viewed

@@ -14,11 +14,13 @@ Features:
 from zwarm.sessions.manager import (
     CodexSession,
     CodexSessionManager,
+    SessionMessage,
     SessionStatus,
 )
 __all__ = [
     "CodexSession",
     "CodexSessionManager",
+    "SessionMessage",
     "SessionStatus",
 ]

zwarm/sessions/manager.py CHANGED Viewed

@@ -225,9 +225,11 @@ class CodexSessionManager:
                 continue
             session = self._load_session(session_dir.name)
             if session:
-                # Update status if process died
-                if session.status == SessionStatus.RUNNING and not session.is_running:
-                    self._update_session_status(session)
+                # Update status if process died OR output indicates completion
+                # (output check is more reliable than PID check due to PID reuse)
+                if session.status == SessionStatus.RUNNING:
+                    if self._is_output_complete(session.id, session.turn) or not session.is_running:
+                        self._update_session_status(session)
                 if status is None or session.status == status:
                     sessions.append(session)
@@ -241,8 +243,9 @@ class CodexSessionManager:
         # Try exact match first
         session = self._load_session(session_id)
         if session:
-            if session.status == SessionStatus.RUNNING and not session.is_running:
-                self._update_session_status(session)
+            if session.status == SessionStatus.RUNNING:
+                if self._is_output_complete(session.id, session.turn) or not session.is_running:
+                    self._update_session_status(session)
             return session
         # Try partial match
@@ -250,12 +253,45 @@ class CodexSessionManager:
             if session_dir.name.startswith(session_id):
                 session = self._load_session(session_dir.name)
                 if session:
-                    if session.status == SessionStatus.RUNNING and not session.is_running:
-                        self._update_session_status(session)
+                    if session.status == SessionStatus.RUNNING:
+                        if self._is_output_complete(session.id, session.turn) or not session.is_running:
+                            self._update_session_status(session)
                     return session
         return None
+    def _is_output_complete(self, session_id: str, turn: int) -> bool:
+        """
+        Check if output file indicates the task completed.
+        Looks for completion markers like 'turn.completed' or 'task.completed'
+        in the JSONL output. This is more reliable than PID checking.
+        """
+        output_path = self._output_path(session_id, turn)
+        if not output_path.exists():
+            return False
+        try:
+            content = output_path.read_text()
+            for line in content.strip().split("\n"):
+                if not line.strip():
+                    continue
+                try:
+                    event = json.loads(line)
+                    event_type = event.get("type", "")
+                    # Check for any completion marker
+                    if event_type in ("turn.completed", "task.completed", "completed", "done"):
+                        return True
+                    # Also check for error as a form of completion
+                    if event_type == "error":
+                        return True
+                except json.JSONDecodeError:
+                    continue
+        except Exception:
+            pass
+        return False
     def _update_session_status(self, session: CodexSession) -> None:
         """Update session status after process completion."""
         # Parse output to determine status
@@ -325,6 +361,8 @@ class CodexSessionManager:
         cmd = [
             "codex", "exec",
             "--json",
+            "--full-auto",
+            "--skip-git-repo-check",
             "--model", model,
             "-C", str(working_dir.absolute()),
         ]
@@ -408,6 +446,8 @@ Continue from where you left off, addressing the user's new message."""
         cmd = [
             "codex", "exec",
             "--json",
+            "--full-auto",
+            "--skip-git-repo-check",
             "--model", session.model,
             "-C", str(session.working_dir.absolute()),
             "--", augmented_task,
@@ -431,10 +471,14 @@ Continue from where you left off, addressing the user's new message."""
         return session
-    def kill_session(self, session_id: str) -> bool:
+    def kill_session(self, session_id: str, delete: bool = False) -> bool:
         """
         Kill a running session.
+        Args:
+            session_id: Session to kill
+            delete: If True, also delete session data entirely
         Returns True if killed, False if not found or not running.
         """
         session = self.get_session(session_id)
@@ -453,11 +497,46 @@ Continue from where you left off, addressing the user's new message."""
             except (OSError, ProcessLookupError):
                 pass
+        if delete:
+            return self.delete_session(session.id)
         session.status = SessionStatus.KILLED
         session.error = "Manually killed"
         self._save_session(session)
         return True
+    def delete_session(self, session_id: str) -> bool:
+        """
+        Delete a session entirely (removes from disk).
+        Kills the process first if still running.
+        Returns True if deleted, False if not found.
+        """
+        import shutil
+        session = self.get_session(session_id)
+        if not session:
+            return False
+        # Kill if running
+        if session.pid and session.is_running:
+            try:
+                os.killpg(os.getpgid(session.pid), signal.SIGTERM)
+                time.sleep(0.3)
+                if session.is_running:
+                    os.killpg(os.getpgid(session.pid), signal.SIGKILL)
+            except (OSError, ProcessLookupError):
+                pass
+        # Remove session directory
+        session_dir = self._session_dir(session.id)
+        if session_dir.exists():
+            shutil.rmtree(session_dir)
+            return True
+        return False
     def get_output(self, session_id: str, turn: int | None = None) -> str:
         """Get raw JSONL output for a session."""
         session = self.get_session(session_id)

zwarm 1.3.11__py3-none-any.whl → 2.0.1__py3-none-any.whl

zwarm 1.3.11py3-none-any.whl → 2.0.1py3-none-any.whl