PyPI - zwarm - Versions diffs - 1.3.3__tar.gz → 1.3.5__tar.gz - Mend

zwarm 1.3.3tar.gz → 1.3.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{zwarm-1.3.3 → zwarm-1.3.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: zwarm
-Version: 1.3.3
+Version: 1.3.5
 Summary: Multi-Agent CLI Orchestration Research Platform
 Requires-Python: <3.14,>=3.13
 Requires-Dist: python-dotenv>=1.0.0

{zwarm-1.3.3 → zwarm-1.3.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "zwarm"
-version = "1.3.3"
+version = "1.3.5"
 description = "Multi-Agent CLI Orchestration Research Platform"
 readme = "README.md"
 requires-python = ">=3.13,<3.14"

{zwarm-1.3.3 → zwarm-1.3.5}/src/zwarm/adapters/codex_mcp.py RENAMED Viewed

@@ -44,6 +44,7 @@ class MCPClient:
     def __init__(self):
         self._proc: subprocess.Popen | None = None
+        self._proc_pid: int | None = None  # Track PID to detect restarts
         self._request_id = 0
         self._initialized = False
         self._stderr_thread: threading.Thread | None = None
@@ -51,14 +52,24 @@ class MCPClient:
         self._stderr_lines: list[str] = []
         self._stdout_queue: queue.Queue[str | None] = queue.Queue()
         self._lock = threading.Lock()  # Protect writes only
+        self._start_count = 0  # Track how many times we've started
     def start(self) -> None:
         """Start the MCP server process."""
         with self._lock:
             if self._proc is not None and self._proc.poll() is None:
+                logger.debug(f"MCP server already running (pid={self._proc.pid}, start_count={self._start_count})")
                 return  # Already running
-            logger.info("Starting codex mcp-server...")
+            # Check if this is a restart (previous server died)
+            if self._proc_pid is not None:
+                logger.warning(
+                    f"MCP server restart detected! Previous pid={self._proc_pid}, "
+                    f"start_count={self._start_count}. All conversation state will be lost."
+                )
+            self._start_count += 1
+            logger.info(f"Starting codex mcp-server... (start_count={self._start_count})")
             self._proc = subprocess.Popen(
                 ["codex", "mcp-server"],
                 stdin=subprocess.PIPE,
@@ -66,6 +77,7 @@ class MCPClient:
                 stderr=subprocess.PIPE,
                 text=False,  # Binary mode for explicit encoding control
             )
+            self._proc_pid = self._proc.pid
             self._initialized = False
             self._stderr_lines = []
             self._stdout_queue = queue.Queue()  # Fresh queue
@@ -251,11 +263,13 @@ class MCPClient:
         # Collect streaming events until final result
         # Reader thread queues lines, we pull from queue with timeout
         session_id = None
+        conversation_id = None  # Track conversation ID separately
         agent_messages: list[str] = []
         streaming_text: list[str] = []  # Accumulate streaming delta text
         final_result = None
         token_usage: dict[str, Any] = {}  # Track token usage
         start_time = time.time()
+        all_events: list[dict] = []  # Keep ALL events for debugging
         for event_count in range(1000):  # Safety limit on events
             self._check_alive()
@@ -278,6 +292,7 @@ class MCPClient:
             try:
                 event = json.loads(line)
+                all_events.append(event)  # Keep for debugging
             except json.JSONDecodeError as e:
                 logger.warning(f"Invalid JSON from MCP: {line[:100]}... - {e}")
                 continue
@@ -286,7 +301,10 @@ class MCPClient:
             if event.get("id") == request_id:
                 if "result" in event:
                     final_result = event["result"]
-                    logger.debug(f"Got final result after {event_count} events")
+                    # Extract conversation ID from final result
+                    if isinstance(final_result, dict):
+                        conversation_id = final_result.get("conversationId") or final_result.get("conversation_id")
+                    logger.debug(f"Got final result after {event_count} events, conversation_id={conversation_id}")
                     break
                 elif "error" in event:
                     error = event["error"]
@@ -309,6 +327,9 @@ class MCPClient:
                     item = msg.get("item", {})
                     item_type = item.get("type")
+                    # Log ALL item_completed events to help debug
+                    logger.debug(f"item_completed: type={item_type}, keys={list(item.keys())}")
                     # Agent text responses - codex uses "AgentMessage" type
                     if item_type == "AgentMessage":
                         content = item.get("content", [])
@@ -318,6 +339,19 @@ class MCPClient:
                             elif isinstance(block, str):
                                 agent_messages.append(block)
+                    # Also check for "agent_message" (lowercase) variant
+                    elif item_type == "agent_message":
+                        text = item.get("text", "") or item.get("message", "")
+                        if text:
+                            agent_messages.append(text)
+                        # Also check content array
+                        content = item.get("content", [])
+                        for block in content:
+                            if isinstance(block, dict) and block.get("text"):
+                                agent_messages.append(block["text"])
+                            elif isinstance(block, str):
+                                agent_messages.append(block)
                     # Legacy format check
                     elif item_type == "message" and item.get("role") == "assistant":
                         content = item.get("content", [])
@@ -327,6 +361,21 @@ class MCPClient:
                             elif isinstance(block, str):
                                 agent_messages.append(block)
+                    # Generic message type - check for text/content
+                    elif item_type == "message":
+                        text = item.get("text", "")
+                        if text:
+                            agent_messages.append(text)
+                        content = item.get("content", [])
+                        if isinstance(content, str):
+                            agent_messages.append(content)
+                        elif isinstance(content, list):
+                            for block in content:
+                                if isinstance(block, dict) and block.get("text"):
+                                    agent_messages.append(block["text"])
+                                elif isinstance(block, str):
+                                    agent_messages.append(block)
                     # Function call outputs (for context)
                     elif item_type == "function_call_output":
                         output = item.get("output", "")
@@ -334,8 +383,8 @@ class MCPClient:
                             agent_messages.append(f"[Tool output]: {output[:500]}")
                     # Log other item types we're not handling
-                    elif item_type not in ("function_call", "tool_call", "UserMessage"):
-                        logger.debug(f"Unhandled item_completed type: {item_type}, keys: {list(item.keys())}")
+                    elif item_type not in ("function_call", "tool_call", "UserMessage", "user_message"):
+                        logger.debug(f"Unhandled item_completed type: {item_type}, item={item}")
                 elif msg_type == "agent_message":
                     # Direct agent message event
@@ -406,28 +455,50 @@ class MCPClient:
                 agent_messages.append(full_streaming)
                 logger.debug(f"Captured {len(streaming_text)} streaming chunks ({len(full_streaming)} chars)")
-        # Build result
-        result = {
-            "conversationId": session_id,
-            "messages": agent_messages,
-            "output": "\n".join(agent_messages) if agent_messages else "",
-            "usage": token_usage,  # Token usage for cost tracking
-        }
-        # Merge final result and try to extract content if no messages
-        if final_result:
-            result.update(final_result)
-            if not agent_messages and "content" in final_result:
+        # Try to extract content from final_result if we have no messages
+        if final_result and not agent_messages:
+            if "content" in final_result:
                 content = final_result["content"]
                 if isinstance(content, list):
                     for block in content:
                         if isinstance(block, dict) and block.get("text"):
                             agent_messages.append(block["text"])
-                    if agent_messages:
-                        result["messages"] = agent_messages
-                        result["output"] = "\n".join(agent_messages)
+                        elif isinstance(block, str):
+                            agent_messages.append(block)
+                elif isinstance(content, str):
+                    agent_messages.append(content)
+            # Also check for text field
+            if not agent_messages and "text" in final_result:
+                agent_messages.append(final_result["text"])
+        # Build result - prefer conversation_id from final result, fallback to session_id from events
+        effective_conversation_id = conversation_id or session_id
+        result = {
+            "conversationId": effective_conversation_id,
+            "messages": agent_messages,
+            "output": "\n".join(agent_messages) if agent_messages else "",
+            "usage": token_usage,  # Token usage for cost tracking
+        }
-        logger.debug(f"MCP call complete: {len(agent_messages)} messages, session={session_id}")
+        # Log detailed debug info if we didn't capture any messages
+        if not agent_messages:
+            event_types = [e.get("method") or f"id:{e.get('id')}" for e in all_events[:20]]
+            logger.warning(
+                f"MCP call returned no messages. "
+                f"conversation_id={effective_conversation_id}, "
+                f"session_id={session_id}, "
+                f"event_count={len(all_events)}, "
+                f"event_types={event_types}, "
+                f"final_result_keys={list(final_result.keys()) if final_result else 'None'}"
+            )
+            # Log codex/event details for debugging
+            codex_events = [e for e in all_events if e.get("method") == "codex/event"]
+            if codex_events:
+                for ce in codex_events[-5:]:  # Last 5 codex events
+                    msg = ce.get("params", {}).get("msg", {})
+                    logger.debug(f"  codex/event: type={msg.get('type')}, keys={list(msg.keys())}")
+        logger.debug(f"MCP call complete: {len(agent_messages)} messages, conversation_id={effective_conversation_id}")
         return result
     def close(self) -> None:
@@ -521,14 +592,35 @@ class CodexMCPAdapter(ExecutorAdapter):
         if model:
             args["model"] = model
+        logger.info(f"Calling codex with task_len={len(task)}, cwd={cwd}, model={model or 'default'}")
         result = client.call_tool("codex", args)
-        # Track usage
+        # Log the result structure
+        conversation_id = result.get("conversationId")
+        messages_count = len(result.get("messages", []))
+        output_len = len(result.get("output", ""))
         usage = result.get("usage", {})
+        logger.info(
+            f"codex result: conversation_id={conversation_id}, "
+            f"messages_count={messages_count}, output_len={output_len}, "
+            f"usage={usage.get('total_tokens', 0)} tokens"
+        )
+        # Warn if we got a conversation ID but no messages (agent did work but we lost output)
+        if conversation_id and not messages_count and not output_len:
+            logger.warning(
+                f"codex returned conversation_id={conversation_id} but NO messages/output! "
+                f"The agent processed {usage.get('total_tokens', 0)} tokens but we didn't capture the response. "
+                f"This may indicate an issue with event parsing."
+            )
+        # Track usage
         self._accumulate_usage(usage)
         return {
-            "conversation_id": result.get("conversationId"),
+            "conversation_id": conversation_id,
             "response": self._extract_response(result),
             "raw_messages": result.get("messages", []),
             "usage": usage,
@@ -549,13 +641,22 @@ class CodexMCPAdapter(ExecutorAdapter):
         """
         client = self._ensure_client()
-        logger.debug(f"Calling codex-reply with conversation_id={conversation_id}")
+        logger.info(f"Calling codex-reply with conversation_id={conversation_id}, message_len={len(message)}")
+        logger.debug(f"MCP client alive: {client.is_alive}, initialized: {client._initialized}")
         result = client.call_tool("codex-reply", {
             "conversationId": conversation_id,
             "prompt": message,
         })
+        # Log the full result structure for debugging
+        logger.info(
+            f"codex-reply result: conversationId={result.get('conversationId')}, "
+            f"messages_count={len(result.get('messages', []))}, "
+            f"output_len={len(result.get('output', ''))}, "
+            f"usage={result.get('usage', {}).get('total_tokens', 0)} tokens"
+        )
         # Check for conversation loss - MCP returns empty result when session not found
         if not result.get("messages") and not result.get("output"):
             logger.error(