PyPI - zwarm - Versions diffs - 0.1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

zwarm 0.1.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

zwarm/adapters/claude_code.py +55 -3
zwarm/adapters/codex_mcp.py +433 -122
zwarm/adapters/test_codex_mcp.py +26 -26
zwarm/cli/main.py +464 -3
zwarm/core/compact.py +329 -0
zwarm/core/config.py +51 -9
zwarm/core/environment.py +104 -33
zwarm/core/models.py +16 -0
zwarm/core/test_compact.py +312 -0
zwarm/orchestrator.py +222 -39
zwarm/prompts/orchestrator.py +128 -146
zwarm/test_orchestrator_watchers.py +23 -0
zwarm/tools/delegation.py +23 -4
zwarm/watchers/builtin.py +90 -4
zwarm/watchers/manager.py +46 -8
zwarm/watchers/test_watchers.py +42 -0
{zwarm-0.1.0.dist-info → zwarm-1.0.1.dist-info}/METADATA +162 -36
zwarm-1.0.1.dist-info/RECORD +33 -0
zwarm-0.1.0.dist-info/RECORD +0 -30
{zwarm-0.1.0.dist-info → zwarm-1.0.1.dist-info}/WHEEL +0 -0
{zwarm-0.1.0.dist-info → zwarm-1.0.1.dist-info}/entry_points.txt +0 -0

zwarm/adapters/codex_mcp.py CHANGED Viewed

@@ -8,9 +8,12 @@ Uses codex mcp-server for true iterative conversations:
 from __future__ import annotations
-import asyncio
 import json
+import logging
+import queue
 import subprocess
+import threading
+import time
 from pathlib import Path
 from typing import Any, Literal
@@ -23,113 +26,270 @@ from zwarm.core.models import (
     SessionStatus,
 )
+logger = logging.getLogger(__name__)
 class MCPClient:
-    """Minimal MCP client for communicating with codex mcp-server."""
+    """
+    Robust MCP client for communicating with codex mcp-server.
+    Uses subprocess.Popen (NOT asyncio.subprocess) to avoid being tied to
+    any specific event loop. This allows the MCP server to stay alive across
+    multiple asyncio.run() calls, preserving conversation state.
+    Uses dedicated reader threads that queue lines, avoiding the race condition
+    of spawning new reader threads on timeout.
+    """
-    def __init__(self, proc: subprocess.Popen):
-        self.proc = proc
+    def __init__(self):
+        self._proc: subprocess.Popen | None = None
         self._request_id = 0
         self._initialized = False
+        self._stderr_thread: threading.Thread | None = None
+        self._stdout_thread: threading.Thread | None = None
+        self._stderr_lines: list[str] = []
+        self._stdout_queue: queue.Queue[str | None] = queue.Queue()
+        self._lock = threading.Lock()  # Protect writes only
+    def start(self) -> None:
+        """Start the MCP server process."""
+        with self._lock:
+            if self._proc is not None and self._proc.poll() is None:
+                return  # Already running
+            logger.info("Starting codex mcp-server...")
+            self._proc = subprocess.Popen(
+                ["codex", "mcp-server"],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=False,  # Binary mode for explicit encoding control
+            )
+            self._initialized = False
+            self._stderr_lines = []
+            self._stdout_queue = queue.Queue()  # Fresh queue
+            # Start background thread to read stderr
+            self._stderr_thread = threading.Thread(
+                target=self._read_stderr_loop,
+                daemon=True,
+                name="mcp-stderr-reader",
+            )
+            self._stderr_thread.start()
+            # Start background thread to read stdout into queue
+            self._stdout_thread = threading.Thread(
+                target=self._read_stdout_loop,
+                daemon=True,
+                name="mcp-stdout-reader",
+            )
+            self._stdout_thread.start()
+            logger.info(f"MCP server started (pid={self._proc.pid})")
+    def _read_stderr_loop(self) -> None:
+        """Background thread to read stderr and log errors."""
+        if not self._proc or not self._proc.stderr:
+            return
+        try:
+            while True:
+                line = self._proc.stderr.readline()
+                if not line:
+                    break
+                decoded = line.decode().strip()
+                if decoded:
+                    self._stderr_lines.append(decoded)
+                    # Keep only last 100 lines
+                    if len(self._stderr_lines) > 100:
+                        self._stderr_lines = self._stderr_lines[-100:]
+                    # Log errors prominently
+                    if "error" in decoded.lower() or "ERROR" in decoded:
+                        logger.error(f"[MCP stderr] {decoded}")
+                    else:
+                        logger.debug(f"[MCP stderr] {decoded}")
+        except Exception as e:
+            logger.warning(f"stderr reader stopped: {e}")
+    def _read_stdout_loop(self) -> None:
+        """Background thread to read stdout and queue lines."""
+        if not self._proc or not self._proc.stdout:
+            return
+        try:
+            while True:
+                line = self._proc.stdout.readline()
+                if not line:
+                    # EOF - signal end
+                    self._stdout_queue.put(None)
+                    break
+                decoded = line.decode()
+                self._stdout_queue.put(decoded)
+        except Exception as e:
+            logger.warning(f"stdout reader stopped: {e}")
+            self._stdout_queue.put(None)  # Signal error
     def _next_id(self) -> int:
         self._request_id += 1
         return self._request_id
-    async def _read_line(self) -> str:
-        """Read a line from stdout asynchronously."""
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, self.proc.stdout.readline)
+    def _write(self, data: str) -> None:
+        """Write to stdin with error handling."""
+        if not self._proc or not self._proc.stdin:
+            raise RuntimeError("MCP server not running")
+        if self._proc.poll() is not None:
+            raise RuntimeError(f"MCP server died (exit code {self._proc.returncode})")
-    async def send_request(self, method: str, params: dict | None = None) -> dict:
-        """Send JSON-RPC request and wait for response."""
-        request: dict[str, Any] = {
-            "jsonrpc": "2.0",
-            "id": self._next_id(),
-            "method": method,
-        }
-        if params:
-            request["params"] = params
+        self._proc.stdin.write(data.encode())
+        self._proc.stdin.flush()
-        request_line = json.dumps(request) + "\n"
+    def _read_line(self, timeout: float = 120.0) -> str:
+        """
+        Read a line from the stdout queue with timeout.
-        # Write request
-        self.proc.stdin.write(request_line)
-        self.proc.stdin.flush()
+        Uses a dedicated reader thread that queues lines, so we never
+        lose data on timeout - we just haven't received it yet.
+        """
+        if not self._proc:
+            raise RuntimeError("MCP server not running")
+        try:
+            line = self._stdout_queue.get(timeout=timeout)
+        except queue.Empty:
+            # Timeout - check process health
+            if self._proc.poll() is not None:
+                stderr_context = "\n".join(self._stderr_lines[-10:]) if self._stderr_lines else "(no stderr)"
+                raise RuntimeError(
+                    f"MCP server died (exit code {self._proc.returncode}).\n"
+                    f"Recent stderr:\n{stderr_context}"
+                )
+            # Process still alive, just slow - return empty to let caller decide
+            return ""
+        if line is None:
+            # EOF or error from reader thread
+            stderr_context = "\n".join(self._stderr_lines[-10:]) if self._stderr_lines else "(no stderr)"
+            if self._proc.poll() is not None:
+                raise RuntimeError(
+                    f"MCP server exited (code {self._proc.returncode}).\n"
+                    f"Recent stderr:\n{stderr_context}"
+                )
+            raise RuntimeError(f"MCP stdout closed unexpectedly.\nRecent stderr:\n{stderr_context}")
+        return line
+    def _check_alive(self) -> None:
+        """Check if the MCP server is still alive, raise if not."""
+        if not self._proc:
+            raise RuntimeError("MCP server not started")
+        if self._proc.poll() is not None:
+            stderr_context = "\n".join(self._stderr_lines[-10:]) if self._stderr_lines else "(no stderr)"
+            raise RuntimeError(
+                f"MCP server died (exit code {self._proc.returncode}).\n"
+                f"Recent stderr:\n{stderr_context}"
+            )
-        # Read response
-        response_line = await self._read_line()
+    def initialize(self) -> dict:
+        """Initialize MCP connection."""
+        self._check_alive()
+        request = {
+            "jsonrpc": "2.0",
+            "id": self._next_id(),
+            "method": "initialize",
+            "params": {
+                "protocolVersion": "2024-11-05",
+                "capabilities": {},
+                "clientInfo": {"name": "zwarm", "version": "0.1.0"},
+            },
+        }
+        with self._lock:
+            self._write(json.dumps(request) + "\n")
+        response_line = self._read_line(timeout=30.0)
         if not response_line:
-            raise RuntimeError("No response from MCP server")
+            raise RuntimeError("No response from MCP server during init")
         response = json.loads(response_line)
-        # Check for error
         if "error" in response:
-            error = response["error"]
-            raise RuntimeError(f"MCP error: {error.get('message', error)}")
-        return response
-    async def initialize(self) -> dict:
-        """Initialize MCP connection."""
-        result = await self.send_request("initialize", {
-            "protocolVersion": "2024-11-05",
-            "capabilities": {},
-            "clientInfo": {"name": "zwarm", "version": "0.1.0"},
-        })
+            raise RuntimeError(f"MCP init error: {response['error']}")
         # Send initialized notification
-        notif = json.dumps({
-            "jsonrpc": "2.0",
-            "method": "notifications/initialized",
-        }) + "\n"
-        self.proc.stdin.write(notif)
-        self.proc.stdin.flush()
+        notif = {"jsonrpc": "2.0", "method": "notifications/initialized"}
+        with self._lock:
+            self._write(json.dumps(notif) + "\n")
         self._initialized = True
-        return result
+        logger.info("MCP connection initialized")
+        return response
-    async def call_tool(self, name: str, arguments: dict) -> dict:
+    def call_tool(self, name: str, arguments: dict, timeout: float = 300.0) -> dict:
         """
         Call an MCP tool and collect streaming events.
-        Codex MCP uses streaming events, so we read multiple responses
-        until we get the final result.
+        Args:
+            name: Tool name (codex, codex-reply)
+            arguments: Tool arguments
+            timeout: Overall timeout for the call (default 5 min)
         """
+        self._check_alive()
         if not self._initialized:
-            await self.initialize()
+            self.initialize()
         request_id = self._next_id()
         request = {
             "jsonrpc": "2.0",
             "id": request_id,
             "method": "tools/call",
-            "params": {
-                "name": name,
-                "arguments": arguments,
-            },
+            "params": {"name": name, "arguments": arguments},
         }
-        self.proc.stdin.write(json.dumps(request) + "\n")
-        self.proc.stdin.flush()
+        logger.debug(f"Calling MCP tool: {name} with args: {list(arguments.keys())}")
+        with self._lock:
+            self._write(json.dumps(request) + "\n")
         # Collect streaming events until final result
+        # Reader thread queues lines, we pull from queue with timeout
         session_id = None
         agent_messages: list[str] = []
+        streaming_text: list[str] = []  # Accumulate streaming delta text
         final_result = None
+        token_usage: dict[str, Any] = {}  # Track token usage
+        start_time = time.time()
+        for event_count in range(1000):  # Safety limit on events
+            self._check_alive()
+            # Check overall timeout
+            elapsed = time.time() - start_time
+            if elapsed > timeout:
+                raise RuntimeError(f"MCP call timed out after {timeout}s ({event_count} events received)")
+            # Read from queue with per-event timeout
+            # Empty string = timeout (process still alive, just waiting)
+            # None sentinel is handled inside _read_line (raises RuntimeError)
+            line = self._read_line(timeout=30.0)
-        for _ in range(500):  # Safety limit on events
-            line = await self._read_line()
             if not line:
-                break
+                # Timeout waiting for event - process is still alive, just slow
+                # This is normal during long codex operations
+                logger.debug(f"Waiting for MCP event... (elapsed: {elapsed:.0f}s, events: {event_count})")
+                continue
-            event = json.loads(line)
+            try:
+                event = json.loads(line)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Invalid JSON from MCP: {line[:100]}... - {e}")
+                continue
             # Check for final result (has matching id)
-            if event.get("id") == request_id and "result" in event:
-                final_result = event.get("result", {})
-                break
+            if event.get("id") == request_id:
+                if "result" in event:
+                    final_result = event["result"]
+                    logger.debug(f"Got final result after {event_count} events")
+                    break
+                elif "error" in event:
+                    error = event["error"]
+                    raise RuntimeError(f"MCP tool error: {error.get('message', error)}")
             # Process streaming events
             if event.get("method") == "codex/event":
@@ -137,35 +297,157 @@ class MCPClient:
                 msg = params.get("msg", {})
                 msg_type = msg.get("type")
+                # Log ALL event types to help debug missing messages
+                logger.debug(f"MCP event: type={msg_type}, keys={list(msg.keys())}")
                 if msg_type == "session_configured":
                     session_id = msg.get("session_id")
+                    logger.debug(f"Session configured: {session_id}")
+                elif msg_type == "item_completed":
+                    item = msg.get("item", {})
+                    item_type = item.get("type")
+                    # Agent text responses - codex uses "AgentMessage" type
+                    if item_type == "AgentMessage":
+                        content = item.get("content", [])
+                        for block in content:
+                            if isinstance(block, dict) and block.get("text"):
+                                agent_messages.append(block["text"])
+                            elif isinstance(block, str):
+                                agent_messages.append(block)
+                    # Legacy format check
+                    elif item_type == "message" and item.get("role") == "assistant":
+                        content = item.get("content", [])
+                        for block in content:
+                            if isinstance(block, dict) and block.get("text"):
+                                agent_messages.append(block["text"])
+                            elif isinstance(block, str):
+                                agent_messages.append(block)
+                    # Function call outputs (for context)
+                    elif item_type == "function_call_output":
+                        output = item.get("output", "")
+                        if output and len(output) < 1000:
+                            agent_messages.append(f"[Tool output]: {output[:500]}")
+                    # Log other item types we're not handling
+                    elif item_type not in ("function_call", "tool_call", "UserMessage"):
+                        logger.debug(f"Unhandled item_completed type: {item_type}, keys: {list(item.keys())}")
                 elif msg_type == "agent_message":
-                    agent_messages.append(msg.get("message", ""))
-                elif msg_type == "task_completed":
-                    # Task is done, break
+                    # Direct agent message event
+                    message = msg.get("message", "")
+                    if message:
+                        agent_messages.append(message)
+                elif msg_type in ("task_complete", "task_completed"):
+                    # Task is done - capture last_agent_message as fallback
+                    last_msg = msg.get("last_agent_message")
+                    if last_msg and last_msg not in agent_messages:
+                        agent_messages.append(last_msg)
+                    logger.debug(f"Task complete after {event_count} events")
                     break
-                elif msg_type == "error":
-                    raise RuntimeError(f"Codex error: {msg.get('error', msg)}")
-        # Build result from collected events
+                elif msg_type == "token_count":
+                    # Capture token usage for cost tracking
+                    info = msg.get("info") or {}
+                    if info:
+                        usage = info.get("total_token_usage", {})
+                        if usage:
+                            token_usage = {
+                                "input_tokens": usage.get("input_tokens", 0),
+                                "output_tokens": usage.get("output_tokens", 0),
+                                "cached_input_tokens": usage.get("cached_input_tokens", 0),
+                                "reasoning_tokens": usage.get("reasoning_output_tokens", 0),
+                                "total_tokens": usage.get("total_tokens", 0),
+                            }
+                            logger.debug(f"Token usage: {token_usage}")
+                elif msg_type == "error":
+                    error_msg = msg.get("error", msg.get("message", str(msg)))
+                    raise RuntimeError(f"Codex error: {error_msg}")
+                # Handle streaming text events (various formats)
+                elif msg_type in ("text_delta", "content_block_delta", "message_delta"):
+                    delta = msg.get("delta", {})
+                    text = delta.get("text", "") or msg.get("text", "")
+                    if text:
+                        streaming_text.append(text)
+                elif msg_type == "text":
+                    text = msg.get("text", "")
+                    if text:
+                        streaming_text.append(text)
+                elif msg_type == "response":
+                    # Some versions send the full response this way
+                    response_text = msg.get("response", "") or msg.get("text", "")
+                    if response_text:
+                        agent_messages.append(response_text)
+                elif msg_type == "message":
+                    # Direct message event
+                    text = msg.get("text", "") or msg.get("content", "")
+                    if text:
+                        agent_messages.append(text)
+                else:
+                    # Log unknown event types at debug level to help diagnose
+                    if msg_type and msg_type not in ("session_started", "thinking", "tool_call", "function_call"):
+                        logger.debug(f"Unhandled MCP event type: {msg_type}, msg keys: {list(msg.keys())}")
+        # Merge streaming text into messages if we got any
+        if streaming_text:
+            full_streaming = "".join(streaming_text)
+            if full_streaming.strip():
+                agent_messages.append(full_streaming)
+                logger.debug(f"Captured {len(streaming_text)} streaming chunks ({len(full_streaming)} chars)")
+        # Build result
         result = {
             "conversationId": session_id,
             "messages": agent_messages,
             "output": "\n".join(agent_messages) if agent_messages else "",
+            "usage": token_usage,  # Token usage for cost tracking
         }
+        # Merge final result and try to extract content if no messages
         if final_result:
             result.update(final_result)
+            if not agent_messages and "content" in final_result:
+                content = final_result["content"]
+                if isinstance(content, list):
+                    for block in content:
+                        if isinstance(block, dict) and block.get("text"):
+                            agent_messages.append(block["text"])
+                    if agent_messages:
+                        result["messages"] = agent_messages
+                        result["output"] = "\n".join(agent_messages)
+        logger.debug(f"MCP call complete: {len(agent_messages)} messages, session={session_id}")
         return result
     def close(self) -> None:
-        """Close the MCP connection."""
-        if self.proc and self.proc.poll() is None:
-            self.proc.terminate()
+        """Close the MCP connection gracefully."""
+        if self._proc and self._proc.poll() is None:
+            logger.info("Terminating MCP server...")
+            self._proc.terminate()
             try:
-                self.proc.wait(timeout=5)
+                self._proc.wait(timeout=5)
             except subprocess.TimeoutExpired:
-                self.proc.kill()
+                logger.warning("MCP server didn't terminate, killing...")
+                self._proc.kill()
+                self._proc.wait()
+        self._proc = None
+        self._initialized = False
+    @property
+    def is_alive(self) -> bool:
+        """Check if the MCP server is running."""
+        return self._proc is not None and self._proc.poll() is None
 class CodexMCPAdapter(ExecutorAdapter):
@@ -173,40 +455,50 @@ class CodexMCPAdapter(ExecutorAdapter):
     Codex adapter using MCP server for sync conversations.
     This is the recommended way to have iterative conversations with Codex.
+    The MCP client uses subprocess.Popen (not asyncio) so it persists across
+    multiple asyncio.run() calls, preserving conversation state.
     """
     name = "codex_mcp"
+    DEFAULT_MODEL = "gpt-5.1-codex-mini"  # Default codex model
-    def __init__(self):
+    def __init__(self, model: str | None = None):
+        self._model = model or self.DEFAULT_MODEL
         self._mcp_client: MCPClient | None = None
-        self._mcp_proc: subprocess.Popen | None = None
         self._sessions: dict[str, str] = {}  # session_id -> conversationId
+        # Cumulative token usage for cost tracking
+        self._total_usage: dict[str, int] = {
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "cached_input_tokens": 0,
+            "reasoning_tokens": 0,
+            "total_tokens": 0,
+        }
+    def _accumulate_usage(self, usage: dict[str, Any]) -> None:
+        """Add usage to cumulative totals."""
+        if not usage:
+            return
+        for key in self._total_usage:
+            self._total_usage[key] += usage.get(key, 0)
+    @property
+    def total_usage(self) -> dict[str, int]:
+        """Get cumulative token usage across all calls."""
+        return self._total_usage.copy()
+    def _ensure_client(self) -> MCPClient:
+        """Ensure MCP client is running and return it."""
+        if self._mcp_client is None:
+            self._mcp_client = MCPClient()
+        if not self._mcp_client.is_alive:
+            self._mcp_client.start()
-    async def _ensure_server(self) -> MCPClient:
-        """Ensure MCP server is running and return client."""
-        if self._mcp_client is not None:
-            # Check if process is still alive
-            if self._mcp_proc and self._mcp_proc.poll() is None:
-                return self._mcp_client
-            # Process died, restart
-            self._mcp_client = None
-            self._mcp_proc = None
-        # Start codex mcp-server
-        self._mcp_proc = subprocess.Popen(
-            ["codex", "mcp-server"],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            bufsize=1,
-        )
-        self._mcp_client = MCPClient(self._mcp_proc)
-        await self._mcp_client.initialize()
         return self._mcp_client
     @weave.op()
-    async def _call_codex(
+    def _call_codex(
         self,
         task: str,
         cwd: str,
@@ -216,10 +508,10 @@ class CodexMCPAdapter(ExecutorAdapter):
         """
         Call codex MCP tool - traced by Weave.
-        This wraps the actual codex call so it appears in Weave traces
-        with full input/output visibility.
+        This is synchronous (uses subprocess.Popen, not asyncio) so the MCP
+        server persists across calls.
         """
-        client = await self._ensure_server()
+        client = self._ensure_client()
         args: dict[str, Any] = {
             "prompt": task,
@@ -229,17 +521,22 @@ class CodexMCPAdapter(ExecutorAdapter):
         if model:
             args["model"] = model
-        result = await client.call_tool("codex", args)
+        result = client.call_tool("codex", args)
+        # Track usage
+        usage = result.get("usage", {})
+        self._accumulate_usage(usage)
-        # Return structured result for Weave
         return {
             "conversation_id": result.get("conversationId"),
             "response": self._extract_response(result),
             "raw_messages": result.get("messages", []),
+            "usage": usage,
+            "total_usage": self.total_usage,
         }
     @weave.op()
-    async def _call_codex_reply(
+    def _call_codex_reply(
         self,
         conversation_id: str,
         message: str,
@@ -247,19 +544,25 @@ class CodexMCPAdapter(ExecutorAdapter):
         """
         Call codex-reply MCP tool - traced by Weave.
-        This wraps the reply call so it appears in Weave traces
-        with full input/output visibility.
+        This is synchronous (uses subprocess.Popen, not asyncio) so the MCP
+        server persists across calls.
         """
-        client = await self._ensure_server()
+        client = self._ensure_client()
-        result = await client.call_tool("codex-reply", {
+        result = client.call_tool("codex-reply", {
             "conversationId": conversation_id,
             "prompt": message,
         })
+        # Track usage
+        usage = result.get("usage", {})
+        self._accumulate_usage(usage)
         return {
             "response": self._extract_response(result),
             "raw_messages": result.get("messages", []),
+            "usage": usage,
+            "total_usage": self.total_usage,
         }
     async def start_session(
@@ -272,30 +575,35 @@ class CodexMCPAdapter(ExecutorAdapter):
         **kwargs,
     ) -> ConversationSession:
         """Start a Codex session."""
+        effective_model = model or self._model
         session = ConversationSession(
             adapter=self.name,
             mode=SessionMode(mode),
             working_dir=working_dir,
             task_description=task,
-            model=model,
+            model=effective_model,
         )
         if mode == "sync":
-            # Use traced codex call
-            result = await self._call_codex(
+            # Use traced codex call (synchronous - MCP client persists across calls)
+            result = self._call_codex(
                 task=task,
                 cwd=str(working_dir.absolute()),
                 sandbox=sandbox,
-                model=model,
+                model=effective_model,
             )
             # Extract conversation ID and response
             session.conversation_id = result["conversation_id"]
-            self._sessions[session.id] = session.conversation_id
+            if session.conversation_id:
+                self._sessions[session.id] = session.conversation_id
             session.add_message("user", task)
             session.add_message("assistant", result["response"])
+            # Track token usage on the session
+            session.add_usage(result.get("usage", {}))
         else:
             # Async mode: use codex exec (fire-and-forget)
             # This runs in a subprocess without MCP
@@ -304,9 +612,8 @@ class CodexMCPAdapter(ExecutorAdapter):
                 "--dangerously-bypass-approvals-and-sandbox",
                 "--skip-git-repo-check",
                 "--json",
+                "--model", effective_model,
             ]
-            if model:
-                cmd.extend(["--model", model])
             cmd.extend(["--", task])
             proc = subprocess.Popen(
@@ -334,8 +641,8 @@ class CodexMCPAdapter(ExecutorAdapter):
         if not session.conversation_id:
             raise ValueError("Session has no conversation ID")
-        # Use traced codex-reply call
-        result = await self._call_codex_reply(
+        # Use traced codex-reply call (synchronous - MCP client persists across calls)
+        result = self._call_codex_reply(
             conversation_id=session.conversation_id,
             message=message,
         )
@@ -344,6 +651,9 @@ class CodexMCPAdapter(ExecutorAdapter):
         session.add_message("user", message)
         session.add_message("assistant", response_text)
+        # Track token usage on the session
+        session.add_usage(result.get("usage", {}))
         return response_text
     async def check_status(
@@ -376,6 +686,8 @@ class CodexMCPAdapter(ExecutorAdapter):
         session: ConversationSession,
     ) -> None:
         """Stop a session."""
+        import subprocess
         if session.process and session.process.poll() is None:
             session.process.terminate()
             try:
@@ -394,30 +706,29 @@ class CodexMCPAdapter(ExecutorAdapter):
         if self._mcp_client:
             self._mcp_client.close()
             self._mcp_client = None
-            self._mcp_proc = None
     def _extract_response(self, result: dict) -> str:
         """Extract response text from MCP result."""
         # First check for our collected output
-        if "output" in result and result["output"]:
+        if result.get("output"):
             return result["output"]
         # Check for messages list
-        if "messages" in result and result["messages"]:
+        if result.get("messages"):
             return "\n".join(result["messages"])
         # Result may have different structures depending on codex version
         if "content" in result:
             content = result["content"]
             if isinstance(content, list):
-                # Extract text from content blocks
                 texts = []
                 for block in content:
                     if isinstance(block, dict) and "text" in block:
                         texts.append(block["text"])
                     elif isinstance(block, str):
                         texts.append(block)
-                return "\n".join(texts)
+                if texts:
+                    return "\n".join(texts)
             elif isinstance(content, str):
                 return content

zwarm 0.1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

zwarm 0.1.0py3-none-any.whl → 1.0.1py3-none-any.whl