zwarm 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/cli/interactive.py +2 -2
- zwarm/cli/main.py +75 -77
- zwarm/cli/pilot.py +3 -1
- zwarm/core/config.py +24 -9
- zwarm/core/test_config.py +2 -3
- zwarm/orchestrator.py +8 -44
- zwarm/sessions/manager.py +210 -90
- zwarm/tools/delegation.py +6 -1
- zwarm-3.3.0.dist-info/METADATA +396 -0
- {zwarm-3.2.0.dist-info → zwarm-3.3.0.dist-info}/RECORD +12 -19
- zwarm/adapters/__init__.py +0 -21
- zwarm/adapters/base.py +0 -109
- zwarm/adapters/claude_code.py +0 -357
- zwarm/adapters/codex_mcp.py +0 -1262
- zwarm/adapters/registry.py +0 -69
- zwarm/adapters/test_codex_mcp.py +0 -274
- zwarm/adapters/test_registry.py +0 -68
- zwarm-3.2.0.dist-info/METADATA +0 -310
- {zwarm-3.2.0.dist-info → zwarm-3.3.0.dist-info}/WHEEL +0 -0
- {zwarm-3.2.0.dist-info → zwarm-3.3.0.dist-info}/entry_points.txt +0 -0
zwarm/adapters/codex_mcp.py
DELETED
|
@@ -1,1262 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Codex MCP adapter for sync conversations.
|
|
3
|
-
|
|
4
|
-
Uses codex mcp-server for true iterative conversations:
|
|
5
|
-
- codex() to start a session with conversationId
|
|
6
|
-
- codex-reply() to continue the conversation
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import hashlib
|
|
12
|
-
import json
|
|
13
|
-
import logging
|
|
14
|
-
import queue
|
|
15
|
-
import subprocess
|
|
16
|
-
import threading
|
|
17
|
-
import time
|
|
18
|
-
from dataclasses import dataclass, field
|
|
19
|
-
from pathlib import Path
|
|
20
|
-
from typing import Any, Literal
|
|
21
|
-
|
|
22
|
-
import weave
|
|
23
|
-
|
|
24
|
-
from zwarm.adapters.base import ExecutorAdapter
|
|
25
|
-
from zwarm.adapters.registry import register_adapter
|
|
26
|
-
from zwarm.core.models import (
|
|
27
|
-
ConversationSession,
|
|
28
|
-
SessionMode,
|
|
29
|
-
SessionStatus,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
logger = logging.getLogger(__name__)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# =============================================================================
|
|
36
|
-
# MessageCollector: Robust event collection with deduplication
|
|
37
|
-
# =============================================================================
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@dataclass
|
|
41
|
-
class MessageSegment:
|
|
42
|
-
"""A segment within an assistant turn (for future segment-aware rendering)."""
|
|
43
|
-
id: str
|
|
44
|
-
kind: Literal["assistant_text", "progress", "tool_call", "tool_result", "error"]
|
|
45
|
-
text: str
|
|
46
|
-
status: Literal["open", "closed"] = "open"
|
|
47
|
-
source_event_ids: set[str] = field(default_factory=set)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class MessageCollector:
|
|
51
|
-
"""
|
|
52
|
-
Collects and deduplicates messages from MCP event stream.
|
|
53
|
-
|
|
54
|
-
Solves the transcript rendering bugs by:
|
|
55
|
-
1. Deduplicating events by ID
|
|
56
|
-
2. Using priority-based message selection (item_completed > task_complete > streaming)
|
|
57
|
-
3. Tracking message sources for debugging
|
|
58
|
-
4. Never mixing streaming deltas with finalized messages
|
|
59
|
-
|
|
60
|
-
Priority order (highest to lowest):
|
|
61
|
-
- item_completed with AgentMessage/agent_message → DEFINITIVE
|
|
62
|
-
- task_complete.last_agent_message → FALLBACK ONLY
|
|
63
|
-
- streaming deltas → ONLY IF NO DEFINITIVE SOURCE
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
def __init__(self):
|
|
67
|
-
# Deduplication
|
|
68
|
-
self._seen_event_ids: set[str] = set()
|
|
69
|
-
self._seen_content_hashes: set[str] = set() # Content-based dedup
|
|
70
|
-
|
|
71
|
-
# Message collection (priority-ordered)
|
|
72
|
-
self._definitive_messages: list[str] = [] # From item_completed
|
|
73
|
-
self._fallback_message: str | None = None # From task_complete
|
|
74
|
-
self._streaming_buffer: list[str] = [] # Streaming deltas
|
|
75
|
-
|
|
76
|
-
# Metadata
|
|
77
|
-
self._conversation_id: str | None = None
|
|
78
|
-
self._session_id: str | None = None
|
|
79
|
-
self._token_usage: dict[str, Any] = {}
|
|
80
|
-
self._is_complete: bool = False
|
|
81
|
-
|
|
82
|
-
# Debug tracking
|
|
83
|
-
self._message_sources: list[tuple[str, str]] = [] # (source, text_preview)
|
|
84
|
-
|
|
85
|
-
def _extract_event_id(self, event: dict) -> str | None:
|
|
86
|
-
"""Extract a unique event ID for deduplication."""
|
|
87
|
-
# Try various ID fields that MCP events might have
|
|
88
|
-
for key in ("id", "event_id", "item_id", "message_id"):
|
|
89
|
-
if key in event:
|
|
90
|
-
return str(event[key])
|
|
91
|
-
|
|
92
|
-
# For nested events, try params
|
|
93
|
-
params = event.get("params", {})
|
|
94
|
-
msg = params.get("msg", {})
|
|
95
|
-
for key in ("id", "event_id", "item_id"):
|
|
96
|
-
if key in msg:
|
|
97
|
-
return str(msg[key])
|
|
98
|
-
|
|
99
|
-
return None
|
|
100
|
-
|
|
101
|
-
def _content_hash(self, text: str) -> str:
|
|
102
|
-
"""Create a hash of content for deduplication."""
|
|
103
|
-
# Normalize whitespace for comparison
|
|
104
|
-
normalized = " ".join(text.split())
|
|
105
|
-
return hashlib.md5(normalized.encode()).hexdigest()[:16]
|
|
106
|
-
|
|
107
|
-
def _is_duplicate_content(self, text: str) -> bool:
|
|
108
|
-
"""Check if this content was already collected."""
|
|
109
|
-
if not text or not text.strip():
|
|
110
|
-
return True # Empty is "duplicate" (skip it)
|
|
111
|
-
|
|
112
|
-
content_hash = self._content_hash(text)
|
|
113
|
-
if content_hash in self._seen_content_hashes:
|
|
114
|
-
return True
|
|
115
|
-
|
|
116
|
-
self._seen_content_hashes.add(content_hash)
|
|
117
|
-
return False
|
|
118
|
-
|
|
119
|
-
def _add_definitive_message(self, text: str, source: str) -> None:
|
|
120
|
-
"""Add a definitive message (from item_completed)."""
|
|
121
|
-
if not text or not text.strip():
|
|
122
|
-
return
|
|
123
|
-
|
|
124
|
-
if self._is_duplicate_content(text):
|
|
125
|
-
logger.debug(f"Skipping duplicate message from {source}: {text[:50]}...")
|
|
126
|
-
return
|
|
127
|
-
|
|
128
|
-
self._definitive_messages.append(text)
|
|
129
|
-
self._message_sources.append((source, text[:50]))
|
|
130
|
-
logger.debug(f"Added definitive message from {source}: {text[:50]}...")
|
|
131
|
-
|
|
132
|
-
def _set_fallback_message(self, text: str, source: str) -> None:
|
|
133
|
-
"""Set fallback message (from task_complete). Only used if no definitive."""
|
|
134
|
-
if not text or not text.strip():
|
|
135
|
-
return
|
|
136
|
-
|
|
137
|
-
# Only set if we don't have definitive messages
|
|
138
|
-
if self._definitive_messages:
|
|
139
|
-
logger.debug(f"Ignoring fallback from {source}: have definitive messages")
|
|
140
|
-
return
|
|
141
|
-
|
|
142
|
-
if self._is_duplicate_content(text):
|
|
143
|
-
logger.debug(f"Skipping duplicate fallback from {source}")
|
|
144
|
-
return
|
|
145
|
-
|
|
146
|
-
self._fallback_message = text
|
|
147
|
-
self._message_sources.append((source, text[:50]))
|
|
148
|
-
|
|
149
|
-
def _add_streaming_delta(self, text: str) -> None:
|
|
150
|
-
"""Add streaming delta. Only used if no definitive messages at end."""
|
|
151
|
-
if text:
|
|
152
|
-
self._streaming_buffer.append(text)
|
|
153
|
-
|
|
154
|
-
def process_event(self, event: dict) -> bool:
|
|
155
|
-
"""
|
|
156
|
-
Process a single MCP event.
|
|
157
|
-
|
|
158
|
-
Returns True if processing should continue, False if complete.
|
|
159
|
-
"""
|
|
160
|
-
# 1. Check for event ID and dedupe
|
|
161
|
-
event_id = self._extract_event_id(event)
|
|
162
|
-
if event_id and event_id in self._seen_event_ids:
|
|
163
|
-
logger.debug(f"Skipping duplicate event: {event_id}")
|
|
164
|
-
return True
|
|
165
|
-
if event_id:
|
|
166
|
-
self._seen_event_ids.add(event_id)
|
|
167
|
-
|
|
168
|
-
# 2. Handle codex/event notifications
|
|
169
|
-
if event.get("method") == "codex/event":
|
|
170
|
-
params = event.get("params", {})
|
|
171
|
-
msg = params.get("msg", {})
|
|
172
|
-
msg_type = msg.get("type")
|
|
173
|
-
|
|
174
|
-
self._handle_codex_event(msg, msg_type)
|
|
175
|
-
|
|
176
|
-
# Check for completion events
|
|
177
|
-
if msg_type in ("task_complete", "task_completed"):
|
|
178
|
-
self._is_complete = True
|
|
179
|
-
return False
|
|
180
|
-
|
|
181
|
-
return True
|
|
182
|
-
|
|
183
|
-
def _handle_codex_event(self, msg: dict, msg_type: str | None) -> None:
|
|
184
|
-
"""Handle a codex/event notification."""
|
|
185
|
-
if not msg_type:
|
|
186
|
-
return
|
|
187
|
-
|
|
188
|
-
# Session configuration
|
|
189
|
-
if msg_type == "session_configured":
|
|
190
|
-
self._session_id = msg.get("session_id")
|
|
191
|
-
logger.debug(f"Session configured: {self._session_id}")
|
|
192
|
-
|
|
193
|
-
# Item completed - DEFINITIVE SOURCE
|
|
194
|
-
elif msg_type == "item_completed":
|
|
195
|
-
self._handle_item_completed(msg)
|
|
196
|
-
|
|
197
|
-
# Direct agent message - DEFINITIVE SOURCE
|
|
198
|
-
elif msg_type == "agent_message":
|
|
199
|
-
text = msg.get("message", "") or msg.get("text", "") or msg.get("content", "")
|
|
200
|
-
self._add_definitive_message(text, "agent_message_event")
|
|
201
|
-
|
|
202
|
-
# Task complete - FALLBACK SOURCE
|
|
203
|
-
elif msg_type in ("task_complete", "task_completed"):
|
|
204
|
-
last_msg = msg.get("last_agent_message")
|
|
205
|
-
if last_msg:
|
|
206
|
-
self._set_fallback_message(last_msg, "task_complete")
|
|
207
|
-
|
|
208
|
-
# Token usage
|
|
209
|
-
elif msg_type == "token_count":
|
|
210
|
-
info = msg.get("info") or {}
|
|
211
|
-
if info:
|
|
212
|
-
usage = info.get("total_token_usage", {})
|
|
213
|
-
if usage:
|
|
214
|
-
self._token_usage = {
|
|
215
|
-
"input_tokens": usage.get("input_tokens", 0),
|
|
216
|
-
"output_tokens": usage.get("output_tokens", 0),
|
|
217
|
-
"cached_input_tokens": usage.get("cached_input_tokens", 0),
|
|
218
|
-
"reasoning_tokens": usage.get("reasoning_output_tokens", 0),
|
|
219
|
-
"total_tokens": usage.get("total_tokens", 0),
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
# Streaming deltas - LOWEST PRIORITY
|
|
223
|
-
elif msg_type in ("text_delta", "content_block_delta", "message_delta", "text"):
|
|
224
|
-
delta = msg.get("delta", {})
|
|
225
|
-
text = delta.get("text", "") or msg.get("text", "")
|
|
226
|
-
self._add_streaming_delta(text)
|
|
227
|
-
|
|
228
|
-
# Response event - MEDIUM PRIORITY (treat as definitive)
|
|
229
|
-
elif msg_type == "response":
|
|
230
|
-
text = msg.get("response", "") or msg.get("text", "")
|
|
231
|
-
self._add_definitive_message(text, "response_event")
|
|
232
|
-
|
|
233
|
-
# Message event - check role
|
|
234
|
-
elif msg_type == "message":
|
|
235
|
-
role = msg.get("role", "").lower()
|
|
236
|
-
if role in ("assistant", "agent", ""):
|
|
237
|
-
text = msg.get("text", "") or msg.get("content", "")
|
|
238
|
-
if text and role != "user":
|
|
239
|
-
self._add_definitive_message(text, "message_event")
|
|
240
|
-
|
|
241
|
-
# Output event
|
|
242
|
-
elif msg_type == "output":
|
|
243
|
-
text = msg.get("output", "") or msg.get("text", "") or msg.get("content", "")
|
|
244
|
-
self._add_definitive_message(text, "output_event")
|
|
245
|
-
|
|
246
|
-
# Completion variants
|
|
247
|
-
elif msg_type in ("item.completed", "response.completed"):
|
|
248
|
-
item = msg.get("item", {})
|
|
249
|
-
if item.get("type") == "agent_message":
|
|
250
|
-
text = item.get("text", "")
|
|
251
|
-
self._add_definitive_message(text, f"{msg_type}_event")
|
|
252
|
-
elif "text" in msg:
|
|
253
|
-
self._add_definitive_message(msg["text"], f"{msg_type}_direct")
|
|
254
|
-
|
|
255
|
-
# Error
|
|
256
|
-
elif msg_type == "error":
|
|
257
|
-
error_msg = msg.get("error", msg.get("message", str(msg)))
|
|
258
|
-
raise RuntimeError(f"Codex error: {error_msg}")
|
|
259
|
-
|
|
260
|
-
def _handle_item_completed(self, msg: dict) -> None:
|
|
261
|
-
"""Handle item_completed event - the primary source of messages."""
|
|
262
|
-
item = msg.get("item", {})
|
|
263
|
-
item_type = item.get("type")
|
|
264
|
-
|
|
265
|
-
# AgentMessage - primary format
|
|
266
|
-
if item_type == "AgentMessage":
|
|
267
|
-
content = item.get("content", [])
|
|
268
|
-
for block in content:
|
|
269
|
-
if isinstance(block, dict) and block.get("text"):
|
|
270
|
-
self._add_definitive_message(block["text"], "AgentMessage")
|
|
271
|
-
elif isinstance(block, str):
|
|
272
|
-
self._add_definitive_message(block, "AgentMessage_str")
|
|
273
|
-
|
|
274
|
-
# agent_message - variant spelling
|
|
275
|
-
elif item_type == "agent_message":
|
|
276
|
-
text = item.get("text", "") or item.get("message", "")
|
|
277
|
-
if text:
|
|
278
|
-
self._add_definitive_message(text, "agent_message")
|
|
279
|
-
content = item.get("content", [])
|
|
280
|
-
for block in content:
|
|
281
|
-
if isinstance(block, dict) and block.get("text"):
|
|
282
|
-
self._add_definitive_message(block["text"], "agent_message_content")
|
|
283
|
-
elif isinstance(block, str):
|
|
284
|
-
self._add_definitive_message(block, "agent_message_content_str")
|
|
285
|
-
|
|
286
|
-
# Generic message with assistant role
|
|
287
|
-
elif item_type == "message":
|
|
288
|
-
role = item.get("role", "")
|
|
289
|
-
if role == "assistant":
|
|
290
|
-
content = item.get("content", [])
|
|
291
|
-
for block in content:
|
|
292
|
-
if isinstance(block, dict) and block.get("text"):
|
|
293
|
-
self._add_definitive_message(block["text"], "message_assistant")
|
|
294
|
-
elif isinstance(block, str):
|
|
295
|
-
self._add_definitive_message(block, "message_assistant_str")
|
|
296
|
-
# Also check text field directly
|
|
297
|
-
text = item.get("text", "")
|
|
298
|
-
if text:
|
|
299
|
-
self._add_definitive_message(text, "message_text")
|
|
300
|
-
|
|
301
|
-
# Function call output (for context, truncated)
|
|
302
|
-
elif item_type == "function_call_output":
|
|
303
|
-
output = item.get("output", "")
|
|
304
|
-
if output and len(output) < 1000:
|
|
305
|
-
# Don't add to messages, just log
|
|
306
|
-
logger.debug(f"Tool output: {output[:100]}...")
|
|
307
|
-
|
|
308
|
-
def set_conversation_id(self, conv_id: str | None) -> None:
|
|
309
|
-
"""Set conversation ID from final result."""
|
|
310
|
-
if conv_id:
|
|
311
|
-
self._conversation_id = conv_id
|
|
312
|
-
|
|
313
|
-
@property
|
|
314
|
-
def conversation_id(self) -> str | None:
|
|
315
|
-
"""Get the conversation ID."""
|
|
316
|
-
return self._conversation_id or self._session_id
|
|
317
|
-
|
|
318
|
-
@property
|
|
319
|
-
def token_usage(self) -> dict[str, Any]:
|
|
320
|
-
"""Get token usage stats."""
|
|
321
|
-
return self._token_usage
|
|
322
|
-
|
|
323
|
-
@property
|
|
324
|
-
def is_complete(self) -> bool:
|
|
325
|
-
"""Check if collection is complete."""
|
|
326
|
-
return self._is_complete
|
|
327
|
-
|
|
328
|
-
def get_messages(self) -> list[str]:
|
|
329
|
-
"""
|
|
330
|
-
Get the final deduplicated message list.
|
|
331
|
-
|
|
332
|
-
Priority:
|
|
333
|
-
1. Definitive messages (from item_completed)
|
|
334
|
-
2. Fallback message (from task_complete)
|
|
335
|
-
3. Streaming buffer (only if no definitive or fallback)
|
|
336
|
-
"""
|
|
337
|
-
# Prefer definitive messages
|
|
338
|
-
if self._definitive_messages:
|
|
339
|
-
logger.debug(f"Returning {len(self._definitive_messages)} definitive messages")
|
|
340
|
-
return self._definitive_messages
|
|
341
|
-
|
|
342
|
-
# Fall back to task_complete message
|
|
343
|
-
if self._fallback_message:
|
|
344
|
-
logger.debug("Returning fallback message from task_complete")
|
|
345
|
-
return [self._fallback_message]
|
|
346
|
-
|
|
347
|
-
# Last resort: streaming buffer
|
|
348
|
-
if self._streaming_buffer:
|
|
349
|
-
full_text = "".join(self._streaming_buffer)
|
|
350
|
-
if full_text.strip():
|
|
351
|
-
logger.debug(f"Returning streaming buffer ({len(self._streaming_buffer)} chunks)")
|
|
352
|
-
return [full_text]
|
|
353
|
-
|
|
354
|
-
return []
|
|
355
|
-
|
|
356
|
-
def get_response(self) -> str:
|
|
357
|
-
"""Get the final response as a single string."""
|
|
358
|
-
messages = self.get_messages()
|
|
359
|
-
return "\n".join(messages) if messages else ""
|
|
360
|
-
|
|
361
|
-
def get_debug_info(self) -> dict:
|
|
362
|
-
"""Get debug information about message collection."""
|
|
363
|
-
return {
|
|
364
|
-
"seen_event_ids": len(self._seen_event_ids),
|
|
365
|
-
"seen_content_hashes": len(self._seen_content_hashes),
|
|
366
|
-
"definitive_messages": len(self._definitive_messages),
|
|
367
|
-
"has_fallback": self._fallback_message is not None,
|
|
368
|
-
"streaming_chunks": len(self._streaming_buffer),
|
|
369
|
-
"message_sources": self._message_sources,
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
class MCPClient:
|
|
374
|
-
"""
|
|
375
|
-
Robust MCP client for communicating with codex mcp-server.
|
|
376
|
-
|
|
377
|
-
Uses subprocess.Popen (NOT asyncio.subprocess) to avoid being tied to
|
|
378
|
-
any specific event loop. This allows the MCP server to stay alive across
|
|
379
|
-
multiple asyncio.run() calls, preserving conversation state.
|
|
380
|
-
|
|
381
|
-
Uses dedicated reader threads that queue lines, avoiding the race condition
|
|
382
|
-
of spawning new reader threads on timeout.
|
|
383
|
-
"""
|
|
384
|
-
|
|
385
|
-
# Default config overrides for zwarm-managed codex sessions
|
|
386
|
-
# These override ~/.codex/config.toml to ensure consistent behavior
|
|
387
|
-
# Only used as fallback if no config_path is provided
|
|
388
|
-
DEFAULT_CONFIG_OVERRIDES: dict[str, str] = {
|
|
389
|
-
"model_reasoning_effort": "high", # Use 'high' for compatibility with all models
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
def __init__(
|
|
393
|
-
self,
|
|
394
|
-
config_path: Path | None = None,
|
|
395
|
-
config_overrides: dict[str, str] | None = None,
|
|
396
|
-
):
|
|
397
|
-
self._proc: subprocess.Popen | None = None
|
|
398
|
-
self._proc_pid: int | None = None # Track PID to detect restarts
|
|
399
|
-
self._request_id = 0
|
|
400
|
-
self._initialized = False
|
|
401
|
-
self._stderr_thread: threading.Thread | None = None
|
|
402
|
-
self._stdout_thread: threading.Thread | None = None
|
|
403
|
-
self._stderr_lines: list[str] = []
|
|
404
|
-
self._stdout_queue: queue.Queue[str | None] = queue.Queue()
|
|
405
|
-
self._lock = threading.Lock() # Protect writes only
|
|
406
|
-
self._start_count = 0 # Track how many times we've started
|
|
407
|
-
# Config path for full isolation (preferred)
|
|
408
|
-
self._config_path = config_path
|
|
409
|
-
# Fallback: merge default overrides with any custom ones (used if no config_path)
|
|
410
|
-
self._config_overrides = {**self.DEFAULT_CONFIG_OVERRIDES, **(config_overrides or {})}
|
|
411
|
-
|
|
412
|
-
def start(self) -> None:
|
|
413
|
-
"""Start the MCP server process."""
|
|
414
|
-
with self._lock:
|
|
415
|
-
if self._proc is not None and self._proc.poll() is None:
|
|
416
|
-
logger.debug(f"MCP server already running (pid={self._proc.pid}, start_count={self._start_count})")
|
|
417
|
-
return # Already running
|
|
418
|
-
|
|
419
|
-
# Check if this is a restart (previous server died)
|
|
420
|
-
if self._proc_pid is not None:
|
|
421
|
-
logger.warning(
|
|
422
|
-
f"MCP server restart detected! Previous pid={self._proc_pid}, "
|
|
423
|
-
f"start_count={self._start_count}. All conversation state will be lost."
|
|
424
|
-
)
|
|
425
|
-
|
|
426
|
-
self._start_count += 1
|
|
427
|
-
|
|
428
|
-
# Build command - prefer config file for full isolation, fallback to overrides
|
|
429
|
-
cmd = ["codex", "mcp-server"]
|
|
430
|
-
if self._config_path and self._config_path.exists():
|
|
431
|
-
cmd.extend(["--config", str(self._config_path)])
|
|
432
|
-
logger.info(f"Starting codex mcp-server with config: {self._config_path} (start_count={self._start_count})")
|
|
433
|
-
else:
|
|
434
|
-
# Fallback to individual overrides
|
|
435
|
-
for key, value in self._config_overrides.items():
|
|
436
|
-
cmd.extend(["-c", f'{key}="{value}"'])
|
|
437
|
-
logger.info(f"Starting codex mcp-server with overrides: {self._config_overrides} (start_count={self._start_count})")
|
|
438
|
-
self._proc = subprocess.Popen(
|
|
439
|
-
cmd,
|
|
440
|
-
stdin=subprocess.PIPE,
|
|
441
|
-
stdout=subprocess.PIPE,
|
|
442
|
-
stderr=subprocess.PIPE,
|
|
443
|
-
text=False, # Binary mode for explicit encoding control
|
|
444
|
-
)
|
|
445
|
-
self._proc_pid = self._proc.pid
|
|
446
|
-
self._initialized = False
|
|
447
|
-
self._stderr_lines = []
|
|
448
|
-
self._stdout_queue = queue.Queue() # Fresh queue
|
|
449
|
-
|
|
450
|
-
# Start background thread to read stderr
|
|
451
|
-
self._stderr_thread = threading.Thread(
|
|
452
|
-
target=self._read_stderr_loop,
|
|
453
|
-
daemon=True,
|
|
454
|
-
name="mcp-stderr-reader",
|
|
455
|
-
)
|
|
456
|
-
self._stderr_thread.start()
|
|
457
|
-
|
|
458
|
-
# Start background thread to read stdout into queue
|
|
459
|
-
self._stdout_thread = threading.Thread(
|
|
460
|
-
target=self._read_stdout_loop,
|
|
461
|
-
daemon=True,
|
|
462
|
-
name="mcp-stdout-reader",
|
|
463
|
-
)
|
|
464
|
-
self._stdout_thread.start()
|
|
465
|
-
|
|
466
|
-
logger.info(f"MCP server started (pid={self._proc.pid})")
|
|
467
|
-
|
|
468
|
-
def _read_stderr_loop(self) -> None:
|
|
469
|
-
"""Background thread to read stderr and log errors."""
|
|
470
|
-
if not self._proc or not self._proc.stderr:
|
|
471
|
-
return
|
|
472
|
-
try:
|
|
473
|
-
while True:
|
|
474
|
-
line = self._proc.stderr.readline()
|
|
475
|
-
if not line:
|
|
476
|
-
break
|
|
477
|
-
decoded = line.decode().strip()
|
|
478
|
-
if decoded:
|
|
479
|
-
self._stderr_lines.append(decoded)
|
|
480
|
-
# Keep only last 100 lines
|
|
481
|
-
if len(self._stderr_lines) > 100:
|
|
482
|
-
self._stderr_lines = self._stderr_lines[-100:]
|
|
483
|
-
# Log errors prominently
|
|
484
|
-
if "error" in decoded.lower() or "ERROR" in decoded:
|
|
485
|
-
logger.error(f"[MCP stderr] {decoded}")
|
|
486
|
-
else:
|
|
487
|
-
logger.debug(f"[MCP stderr] {decoded}")
|
|
488
|
-
except Exception as e:
|
|
489
|
-
logger.warning(f"stderr reader stopped: {e}")
|
|
490
|
-
|
|
491
|
-
def _read_stdout_loop(self) -> None:
|
|
492
|
-
"""Background thread to read stdout and queue lines."""
|
|
493
|
-
if not self._proc or not self._proc.stdout:
|
|
494
|
-
return
|
|
495
|
-
try:
|
|
496
|
-
while True:
|
|
497
|
-
line = self._proc.stdout.readline()
|
|
498
|
-
if not line:
|
|
499
|
-
# EOF - signal end
|
|
500
|
-
self._stdout_queue.put(None)
|
|
501
|
-
break
|
|
502
|
-
decoded = line.decode()
|
|
503
|
-
self._stdout_queue.put(decoded)
|
|
504
|
-
except Exception as e:
|
|
505
|
-
logger.warning(f"stdout reader stopped: {e}")
|
|
506
|
-
self._stdout_queue.put(None) # Signal error
|
|
507
|
-
|
|
508
|
-
def _next_id(self) -> int:
|
|
509
|
-
self._request_id += 1
|
|
510
|
-
return self._request_id
|
|
511
|
-
|
|
512
|
-
def _write(self, data: str) -> None:
|
|
513
|
-
"""Write to stdin with error handling."""
|
|
514
|
-
if not self._proc or not self._proc.stdin:
|
|
515
|
-
raise RuntimeError("MCP server not running")
|
|
516
|
-
if self._proc.poll() is not None:
|
|
517
|
-
raise RuntimeError(f"MCP server died (exit code {self._proc.returncode})")
|
|
518
|
-
|
|
519
|
-
self._proc.stdin.write(data.encode())
|
|
520
|
-
self._proc.stdin.flush()
|
|
521
|
-
|
|
522
|
-
def _read_line(self, timeout: float = 120.0) -> str:
|
|
523
|
-
"""
|
|
524
|
-
Read a line from the stdout queue with timeout.
|
|
525
|
-
|
|
526
|
-
Uses a dedicated reader thread that queues lines, so we never
|
|
527
|
-
lose data on timeout - we just haven't received it yet.
|
|
528
|
-
"""
|
|
529
|
-
if not self._proc:
|
|
530
|
-
raise RuntimeError("MCP server not running")
|
|
531
|
-
|
|
532
|
-
try:
|
|
533
|
-
line = self._stdout_queue.get(timeout=timeout)
|
|
534
|
-
except queue.Empty:
|
|
535
|
-
# Timeout - check process health
|
|
536
|
-
if self._proc.poll() is not None:
|
|
537
|
-
stderr_context = "\n".join(self._stderr_lines[-10:]) if self._stderr_lines else "(no stderr)"
|
|
538
|
-
raise RuntimeError(
|
|
539
|
-
f"MCP server died (exit code {self._proc.returncode}).\n"
|
|
540
|
-
f"Recent stderr:\n{stderr_context}"
|
|
541
|
-
)
|
|
542
|
-
# Process still alive, just slow - return empty to let caller decide
|
|
543
|
-
return ""
|
|
544
|
-
|
|
545
|
-
if line is None:
|
|
546
|
-
# EOF or error from reader thread
|
|
547
|
-
stderr_context = "\n".join(self._stderr_lines[-10:]) if self._stderr_lines else "(no stderr)"
|
|
548
|
-
if self._proc.poll() is not None:
|
|
549
|
-
raise RuntimeError(
|
|
550
|
-
f"MCP server exited (code {self._proc.returncode}).\n"
|
|
551
|
-
f"Recent stderr:\n{stderr_context}"
|
|
552
|
-
)
|
|
553
|
-
raise RuntimeError(f"MCP stdout closed unexpectedly.\nRecent stderr:\n{stderr_context}")
|
|
554
|
-
|
|
555
|
-
return line
|
|
556
|
-
|
|
557
|
-
def _check_alive(self) -> None:
|
|
558
|
-
"""Check if the MCP server is still alive, raise if not."""
|
|
559
|
-
if not self._proc:
|
|
560
|
-
raise RuntimeError("MCP server not started")
|
|
561
|
-
if self._proc.poll() is not None:
|
|
562
|
-
stderr_context = "\n".join(self._stderr_lines[-10:]) if self._stderr_lines else "(no stderr)"
|
|
563
|
-
raise RuntimeError(
|
|
564
|
-
f"MCP server died (exit code {self._proc.returncode}).\n"
|
|
565
|
-
f"Recent stderr:\n{stderr_context}"
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
def initialize(self) -> dict:
|
|
569
|
-
"""Initialize MCP connection."""
|
|
570
|
-
self._check_alive()
|
|
571
|
-
|
|
572
|
-
request = {
|
|
573
|
-
"jsonrpc": "2.0",
|
|
574
|
-
"id": self._next_id(),
|
|
575
|
-
"method": "initialize",
|
|
576
|
-
"params": {
|
|
577
|
-
"protocolVersion": "2024-11-05",
|
|
578
|
-
"capabilities": {},
|
|
579
|
-
"clientInfo": {"name": "zwarm", "version": "0.1.0"},
|
|
580
|
-
},
|
|
581
|
-
}
|
|
582
|
-
with self._lock:
|
|
583
|
-
self._write(json.dumps(request) + "\n")
|
|
584
|
-
|
|
585
|
-
response_line = self._read_line(timeout=30.0)
|
|
586
|
-
if not response_line:
|
|
587
|
-
raise RuntimeError("No response from MCP server during init")
|
|
588
|
-
|
|
589
|
-
response = json.loads(response_line)
|
|
590
|
-
if "error" in response:
|
|
591
|
-
raise RuntimeError(f"MCP init error: {response['error']}")
|
|
592
|
-
|
|
593
|
-
# Send initialized notification
|
|
594
|
-
notif = {"jsonrpc": "2.0", "method": "notifications/initialized"}
|
|
595
|
-
with self._lock:
|
|
596
|
-
self._write(json.dumps(notif) + "\n")
|
|
597
|
-
|
|
598
|
-
self._initialized = True
|
|
599
|
-
logger.info("MCP connection initialized")
|
|
600
|
-
return response
|
|
601
|
-
|
|
602
|
-
def call_tool(self, name: str, arguments: dict, timeout: float = 300.0) -> dict:
|
|
603
|
-
"""
|
|
604
|
-
Call an MCP tool and collect streaming events.
|
|
605
|
-
|
|
606
|
-
Uses MessageCollector for robust deduplication and priority-based
|
|
607
|
-
message selection. This prevents the transcript rendering bugs:
|
|
608
|
-
- Message duplication
|
|
609
|
-
- Role contamination
|
|
610
|
-
- Turn mis-association
|
|
611
|
-
|
|
612
|
-
Args:
|
|
613
|
-
name: Tool name (codex, codex-reply)
|
|
614
|
-
arguments: Tool arguments
|
|
615
|
-
timeout: Overall timeout for the call (default 5 min)
|
|
616
|
-
"""
|
|
617
|
-
self._check_alive()
|
|
618
|
-
|
|
619
|
-
if not self._initialized:
|
|
620
|
-
self.initialize()
|
|
621
|
-
|
|
622
|
-
request_id = self._next_id()
|
|
623
|
-
request = {
|
|
624
|
-
"jsonrpc": "2.0",
|
|
625
|
-
"id": request_id,
|
|
626
|
-
"method": "tools/call",
|
|
627
|
-
"params": {"name": name, "arguments": arguments},
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
logger.debug(f"Calling MCP tool: {name} with args: {list(arguments.keys())}")
|
|
631
|
-
with self._lock:
|
|
632
|
-
self._write(json.dumps(request) + "\n")
|
|
633
|
-
|
|
634
|
-
# Use MessageCollector for robust event handling
|
|
635
|
-
collector = MessageCollector()
|
|
636
|
-
final_result = None
|
|
637
|
-
start_time = time.time()
|
|
638
|
-
all_events: list[dict] = [] # Keep ALL events for debugging
|
|
639
|
-
|
|
640
|
-
for event_count in range(1000): # Safety limit on events
|
|
641
|
-
self._check_alive()
|
|
642
|
-
|
|
643
|
-
# Check overall timeout
|
|
644
|
-
elapsed = time.time() - start_time
|
|
645
|
-
if elapsed > timeout:
|
|
646
|
-
raise RuntimeError(f"MCP call timed out after {timeout}s ({event_count} events received)")
|
|
647
|
-
|
|
648
|
-
# Read from queue with per-event timeout
|
|
649
|
-
line = self._read_line(timeout=30.0)
|
|
650
|
-
|
|
651
|
-
if not line:
|
|
652
|
-
# Timeout waiting for event - process is still alive, just slow
|
|
653
|
-
logger.debug(f"Waiting for MCP event... (elapsed: {elapsed:.0f}s, events: {event_count})")
|
|
654
|
-
continue
|
|
655
|
-
|
|
656
|
-
try:
|
|
657
|
-
event = json.loads(line)
|
|
658
|
-
all_events.append(event) # Keep for debugging
|
|
659
|
-
except json.JSONDecodeError as e:
|
|
660
|
-
logger.warning(f"Invalid JSON from MCP: {line[:100]}... - {e}")
|
|
661
|
-
continue
|
|
662
|
-
|
|
663
|
-
# Check for final result (has matching id)
|
|
664
|
-
if event.get("id") == request_id:
|
|
665
|
-
if "result" in event:
|
|
666
|
-
final_result = event["result"]
|
|
667
|
-
# Extract conversation ID from final result
|
|
668
|
-
if isinstance(final_result, dict):
|
|
669
|
-
conv_id = final_result.get("conversationId") or final_result.get("conversation_id")
|
|
670
|
-
collector.set_conversation_id(conv_id)
|
|
671
|
-
logger.debug(f"Got final result after {event_count} events")
|
|
672
|
-
break
|
|
673
|
-
elif "error" in event:
|
|
674
|
-
error = event["error"]
|
|
675
|
-
raise RuntimeError(f"MCP tool error: {error.get('message', error)}")
|
|
676
|
-
|
|
677
|
-
# Process event through collector
|
|
678
|
-
try:
|
|
679
|
-
should_continue = collector.process_event(event)
|
|
680
|
-
if not should_continue:
|
|
681
|
-
logger.debug(f"Collector signaled completion after {event_count} events")
|
|
682
|
-
break
|
|
683
|
-
except RuntimeError as e:
|
|
684
|
-
# Collector raises RuntimeError for codex errors
|
|
685
|
-
raise
|
|
686
|
-
|
|
687
|
-
# Try to extract content from final_result if collector has no messages
|
|
688
|
-
messages = collector.get_messages()
|
|
689
|
-
if final_result and not messages:
|
|
690
|
-
if "content" in final_result:
|
|
691
|
-
content = final_result["content"]
|
|
692
|
-
if isinstance(content, list):
|
|
693
|
-
for block in content:
|
|
694
|
-
if isinstance(block, dict) and block.get("text"):
|
|
695
|
-
messages.append(block["text"])
|
|
696
|
-
elif isinstance(block, str):
|
|
697
|
-
messages.append(block)
|
|
698
|
-
elif isinstance(content, str):
|
|
699
|
-
messages.append(content)
|
|
700
|
-
if not messages and "text" in final_result:
|
|
701
|
-
messages.append(final_result["text"])
|
|
702
|
-
|
|
703
|
-
# Build result
|
|
704
|
-
result = {
|
|
705
|
-
"conversationId": collector.conversation_id,
|
|
706
|
-
"messages": messages,
|
|
707
|
-
"output": "\n".join(messages) if messages else "",
|
|
708
|
-
"usage": collector.token_usage,
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
# Log detailed debug info if we didn't capture any messages
|
|
712
|
-
if not messages:
|
|
713
|
-
debug_info = collector.get_debug_info()
|
|
714
|
-
event_types = [e.get("method") or f"id:{e.get('id')}" for e in all_events[:20]]
|
|
715
|
-
logger.warning(
|
|
716
|
-
f"MCP call returned no messages. "
|
|
717
|
-
f"conversation_id={collector.conversation_id}, "
|
|
718
|
-
f"event_count={len(all_events)}, "
|
|
719
|
-
f"event_types={event_types}, "
|
|
720
|
-
f"collector_debug={debug_info}, "
|
|
721
|
-
f"final_result_keys={list(final_result.keys()) if final_result else 'None'}"
|
|
722
|
-
)
|
|
723
|
-
# Log codex/event details for debugging
|
|
724
|
-
codex_events = [e for e in all_events if e.get("method") == "codex/event"]
|
|
725
|
-
if codex_events:
|
|
726
|
-
for ce in codex_events[-5:]: # Last 5 codex events
|
|
727
|
-
msg = ce.get("params", {}).get("msg", {})
|
|
728
|
-
logger.debug(f" codex/event: type={msg.get('type')}, keys={list(msg.keys())}")
|
|
729
|
-
|
|
730
|
-
logger.debug(f"MCP call complete: {len(messages)} messages, conversation_id={collector.conversation_id}")
|
|
731
|
-
return result
|
|
732
|
-
|
|
733
|
-
def close(self) -> None:
|
|
734
|
-
"""Close the MCP connection gracefully."""
|
|
735
|
-
if self._proc and self._proc.poll() is None:
|
|
736
|
-
logger.info("Terminating MCP server...")
|
|
737
|
-
self._proc.terminate()
|
|
738
|
-
try:
|
|
739
|
-
self._proc.wait(timeout=5)
|
|
740
|
-
except subprocess.TimeoutExpired:
|
|
741
|
-
logger.warning("MCP server didn't terminate, killing...")
|
|
742
|
-
self._proc.kill()
|
|
743
|
-
self._proc.wait()
|
|
744
|
-
|
|
745
|
-
self._proc = None
|
|
746
|
-
self._initialized = False
|
|
747
|
-
|
|
748
|
-
@property
|
|
749
|
-
def is_alive(self) -> bool:
|
|
750
|
-
"""Check if the MCP server is running."""
|
|
751
|
-
return self._proc is not None and self._proc.poll() is None
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
@register_adapter("codex_mcp")
|
|
755
|
-
class CodexMCPAdapter(ExecutorAdapter):
|
|
756
|
-
"""
|
|
757
|
-
Codex adapter using MCP server for sync conversations.
|
|
758
|
-
|
|
759
|
-
This is the recommended way to have iterative conversations with Codex.
|
|
760
|
-
The MCP client uses subprocess.Popen (not asyncio) so it persists across
|
|
761
|
-
multiple asyncio.run() calls, preserving conversation state.
|
|
762
|
-
|
|
763
|
-
Config isolation: Pass config_path to use a local codex.toml instead of
|
|
764
|
-
the user's global ~/.codex/config.toml. This is the preferred approach.
|
|
765
|
-
Falls back to config_overrides if no config_path is provided.
|
|
766
|
-
"""
|
|
767
|
-
DEFAULT_MODEL = "gpt-5.1-codex-mini" # Default codex model
|
|
768
|
-
|
|
769
|
-
def __init__(
|
|
770
|
-
self,
|
|
771
|
-
model: str | None = None,
|
|
772
|
-
config_path: Path | None = None,
|
|
773
|
-
config_overrides: dict[str, str] | None = None,
|
|
774
|
-
):
|
|
775
|
-
self._model = model or self.DEFAULT_MODEL
|
|
776
|
-
self._config_path = config_path # Path to local codex.toml for isolation
|
|
777
|
-
self._config_overrides = config_overrides or {}
|
|
778
|
-
self._mcp_client: MCPClient | None = None
|
|
779
|
-
self._sessions: dict[str, str] = {} # session_id -> conversationId
|
|
780
|
-
# Cumulative token usage for cost tracking
|
|
781
|
-
self._total_usage: dict[str, int] = {
|
|
782
|
-
"input_tokens": 0,
|
|
783
|
-
"output_tokens": 0,
|
|
784
|
-
"cached_input_tokens": 0,
|
|
785
|
-
"reasoning_tokens": 0,
|
|
786
|
-
"total_tokens": 0,
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
def _accumulate_usage(self, usage: dict[str, Any]) -> None:
|
|
790
|
-
"""Add usage to cumulative totals."""
|
|
791
|
-
if not usage:
|
|
792
|
-
return
|
|
793
|
-
for key in self._total_usage:
|
|
794
|
-
self._total_usage[key] += usage.get(key, 0)
|
|
795
|
-
|
|
796
|
-
@property
|
|
797
|
-
def total_usage(self) -> dict[str, int]:
|
|
798
|
-
"""Get cumulative token usage across all calls."""
|
|
799
|
-
return self._total_usage.copy()
|
|
800
|
-
|
|
801
|
-
def _ensure_client(self) -> MCPClient:
|
|
802
|
-
"""Ensure MCP client is running and return it."""
|
|
803
|
-
if self._mcp_client is None:
|
|
804
|
-
self._mcp_client = MCPClient(
|
|
805
|
-
config_path=self._config_path,
|
|
806
|
-
config_overrides=self._config_overrides,
|
|
807
|
-
)
|
|
808
|
-
|
|
809
|
-
if not self._mcp_client.is_alive:
|
|
810
|
-
self._mcp_client.start()
|
|
811
|
-
|
|
812
|
-
return self._mcp_client
|
|
813
|
-
|
|
814
|
-
@weave.op()
|
|
815
|
-
def _call_codex(
|
|
816
|
-
self,
|
|
817
|
-
task: str,
|
|
818
|
-
cwd: str,
|
|
819
|
-
sandbox: str,
|
|
820
|
-
model: str | None = None,
|
|
821
|
-
reasoning_effort: str | None = None,
|
|
822
|
-
) -> dict[str, Any]:
|
|
823
|
-
"""
|
|
824
|
-
Call codex MCP tool - traced by Weave.
|
|
825
|
-
|
|
826
|
-
This is synchronous (uses subprocess.Popen, not asyncio) so the MCP
|
|
827
|
-
server persists across calls.
|
|
828
|
-
"""
|
|
829
|
-
client = self._ensure_client()
|
|
830
|
-
|
|
831
|
-
args: dict[str, Any] = {
|
|
832
|
-
"prompt": task,
|
|
833
|
-
"cwd": cwd,
|
|
834
|
-
"sandbox": sandbox,
|
|
835
|
-
}
|
|
836
|
-
if model:
|
|
837
|
-
args["model"] = model
|
|
838
|
-
|
|
839
|
-
# Pass reasoning_effort to override codex config defaults
|
|
840
|
-
# The config key is "model_reasoning_effort"
|
|
841
|
-
if reasoning_effort:
|
|
842
|
-
args["model_reasoning_effort"] = reasoning_effort
|
|
843
|
-
|
|
844
|
-
logger.info(f"Calling codex with task_len={len(task)}, cwd={cwd}, model={model or 'default'}, reasoning_effort={reasoning_effort or 'default'}")
|
|
845
|
-
logger.debug(f"Full codex args: {args}")
|
|
846
|
-
|
|
847
|
-
result = client.call_tool("codex", args)
|
|
848
|
-
|
|
849
|
-
# Log the result structure
|
|
850
|
-
conversation_id = result.get("conversationId")
|
|
851
|
-
messages_count = len(result.get("messages", []))
|
|
852
|
-
output_len = len(result.get("output", ""))
|
|
853
|
-
usage = result.get("usage", {})
|
|
854
|
-
|
|
855
|
-
logger.info(
|
|
856
|
-
f"codex result: conversation_id={conversation_id}, "
|
|
857
|
-
f"messages_count={messages_count}, output_len={output_len}, "
|
|
858
|
-
f"usage={usage.get('total_tokens', 0)} tokens"
|
|
859
|
-
)
|
|
860
|
-
|
|
861
|
-
# Warn if we got a conversation ID but no messages (agent did work but we lost output)
|
|
862
|
-
if conversation_id and not messages_count and not output_len:
|
|
863
|
-
logger.warning(
|
|
864
|
-
f"codex returned conversation_id={conversation_id} but NO messages/output! "
|
|
865
|
-
f"The agent processed {usage.get('total_tokens', 0)} tokens but we didn't capture the response. "
|
|
866
|
-
f"This may indicate an issue with event parsing."
|
|
867
|
-
)
|
|
868
|
-
|
|
869
|
-
# Track usage
|
|
870
|
-
self._accumulate_usage(usage)
|
|
871
|
-
|
|
872
|
-
return {
|
|
873
|
-
"conversation_id": conversation_id,
|
|
874
|
-
"response": self._extract_response(result),
|
|
875
|
-
"raw_messages": result.get("messages", []),
|
|
876
|
-
"usage": usage,
|
|
877
|
-
"total_usage": self.total_usage,
|
|
878
|
-
}
|
|
879
|
-
|
|
880
|
-
@weave.op()
|
|
881
|
-
def _call_codex_reply(
|
|
882
|
-
self,
|
|
883
|
-
conversation_id: str,
|
|
884
|
-
message: str,
|
|
885
|
-
) -> dict[str, Any]:
|
|
886
|
-
"""
|
|
887
|
-
Call codex-reply MCP tool - traced by Weave.
|
|
888
|
-
|
|
889
|
-
This is synchronous (uses subprocess.Popen, not asyncio) so the MCP
|
|
890
|
-
server persists across calls.
|
|
891
|
-
"""
|
|
892
|
-
client = self._ensure_client()
|
|
893
|
-
|
|
894
|
-
logger.info(f"Calling codex-reply with conversation_id={conversation_id}, message_len={len(message)}")
|
|
895
|
-
logger.debug(f"MCP client alive: {client.is_alive}, initialized: {client._initialized}")
|
|
896
|
-
|
|
897
|
-
result = client.call_tool("codex-reply", {
|
|
898
|
-
"conversationId": conversation_id,
|
|
899
|
-
"prompt": message,
|
|
900
|
-
})
|
|
901
|
-
|
|
902
|
-
# Log the full result structure for debugging
|
|
903
|
-
logger.info(
|
|
904
|
-
f"codex-reply result: conversationId={result.get('conversationId')}, "
|
|
905
|
-
f"messages_count={len(result.get('messages', []))}, "
|
|
906
|
-
f"output_len={len(result.get('output', ''))}, "
|
|
907
|
-
f"usage={result.get('usage', {}).get('total_tokens', 0)} tokens"
|
|
908
|
-
)
|
|
909
|
-
|
|
910
|
-
# Check for conversation loss - MCP returns empty result when session not found
|
|
911
|
-
if not result.get("messages") and not result.get("output"):
|
|
912
|
-
logger.error(
|
|
913
|
-
f"codex-reply returned empty result for conversation_id={conversation_id}. "
|
|
914
|
-
f"The MCP server may have lost the conversation state. Result: {result}"
|
|
915
|
-
)
|
|
916
|
-
|
|
917
|
-
# Track usage
|
|
918
|
-
usage = result.get("usage", {})
|
|
919
|
-
self._accumulate_usage(usage)
|
|
920
|
-
|
|
921
|
-
# Filter out the sent message from the response using content hashing
|
|
922
|
-
# The MCP may echo our prompt back, but we use robust content comparison
|
|
923
|
-
raw_messages = result.get("messages", [])
|
|
924
|
-
|
|
925
|
-
# Create hash of user message for comparison (normalized)
|
|
926
|
-
def normalize_for_comparison(text: str) -> str:
|
|
927
|
-
"""Normalize text for comparison (lowercase, collapsed whitespace)."""
|
|
928
|
-
return " ".join(text.lower().split())
|
|
929
|
-
|
|
930
|
-
user_msg_normalized = normalize_for_comparison(message)
|
|
931
|
-
user_msg_hash = hashlib.md5(user_msg_normalized.encode()).hexdigest()
|
|
932
|
-
|
|
933
|
-
def is_user_message_echo(text: str) -> bool:
|
|
934
|
-
"""Check if text is just an echo of the user message."""
|
|
935
|
-
if not text:
|
|
936
|
-
return True # Empty is effectively an echo (skip it)
|
|
937
|
-
|
|
938
|
-
text_normalized = normalize_for_comparison(text)
|
|
939
|
-
text_hash = hashlib.md5(text_normalized.encode()).hexdigest()
|
|
940
|
-
|
|
941
|
-
# Exact match (case-insensitive, whitespace-normalized)
|
|
942
|
-
if text_hash == user_msg_hash:
|
|
943
|
-
return True
|
|
944
|
-
|
|
945
|
-
# Check if text IS the user message (not just starts with it)
|
|
946
|
-
# This avoids the bug where "Fix bug by X" gets filtered when user said "Fix bug"
|
|
947
|
-
if text_normalized == user_msg_normalized:
|
|
948
|
-
return True
|
|
949
|
-
|
|
950
|
-
return False
|
|
951
|
-
|
|
952
|
-
filtered_messages = [m for m in raw_messages if not is_user_message_echo(m)]
|
|
953
|
-
|
|
954
|
-
# Build filtered result for extraction
|
|
955
|
-
filtered_result = {
|
|
956
|
-
**result,
|
|
957
|
-
"messages": filtered_messages,
|
|
958
|
-
"output": "\n".join(filtered_messages) if filtered_messages else result.get("output", ""),
|
|
959
|
-
}
|
|
960
|
-
|
|
961
|
-
response = self._extract_response(filtered_result)
|
|
962
|
-
filtered_count = len(raw_messages) - len(filtered_messages)
|
|
963
|
-
if filtered_count > 0:
|
|
964
|
-
logger.debug(f"Filtered {filtered_count} user echo messages from response")
|
|
965
|
-
logger.debug(f"codex-reply response length: {len(response)} chars")
|
|
966
|
-
|
|
967
|
-
return {
|
|
968
|
-
"response": response,
|
|
969
|
-
"raw_messages": filtered_messages, # Return filtered messages
|
|
970
|
-
"usage": usage,
|
|
971
|
-
"total_usage": self.total_usage,
|
|
972
|
-
"conversation_lost": not result.get("messages") and not result.get("output"),
|
|
973
|
-
}
|
|
974
|
-
|
|
975
|
-
@weave.op()
|
|
976
|
-
async def start_session(
|
|
977
|
-
self,
|
|
978
|
-
task: str,
|
|
979
|
-
working_dir: Path,
|
|
980
|
-
mode: Literal["sync", "async"] = "sync",
|
|
981
|
-
model: str | None = None,
|
|
982
|
-
sandbox: str = "workspace-write",
|
|
983
|
-
**kwargs,
|
|
984
|
-
) -> ConversationSession:
|
|
985
|
-
"""Start a Codex session (sync or async mode)."""
|
|
986
|
-
effective_model = model or self._model
|
|
987
|
-
session = ConversationSession(
|
|
988
|
-
adapter=self.name,
|
|
989
|
-
mode=SessionMode(mode),
|
|
990
|
-
working_dir=working_dir,
|
|
991
|
-
task_description=task,
|
|
992
|
-
model=effective_model,
|
|
993
|
-
)
|
|
994
|
-
|
|
995
|
-
if mode == "sync":
|
|
996
|
-
# Use traced codex call (synchronous - MCP client persists across calls)
|
|
997
|
-
result = self._call_codex(
|
|
998
|
-
task=task,
|
|
999
|
-
cwd=str(working_dir.absolute()),
|
|
1000
|
-
sandbox=sandbox,
|
|
1001
|
-
model=effective_model,
|
|
1002
|
-
reasoning_effort=kwargs.get("reasoning_effort"),
|
|
1003
|
-
)
|
|
1004
|
-
|
|
1005
|
-
# Extract conversation ID and response
|
|
1006
|
-
session.conversation_id = result["conversation_id"]
|
|
1007
|
-
if session.conversation_id:
|
|
1008
|
-
self._sessions[session.id] = session.conversation_id
|
|
1009
|
-
logger.debug(f"Session {session.id[:8]} mapped to conversation {session.conversation_id}")
|
|
1010
|
-
else:
|
|
1011
|
-
# This is bad - we won't be able to continue this conversation
|
|
1012
|
-
logger.warning(
|
|
1013
|
-
f"Session {session.id[:8]} started but MCP didn't return a conversation ID. "
|
|
1014
|
-
"Further converse() calls will fail."
|
|
1015
|
-
)
|
|
1016
|
-
|
|
1017
|
-
session.add_message("user", task)
|
|
1018
|
-
session.add_message("assistant", result["response"])
|
|
1019
|
-
|
|
1020
|
-
# Track token usage on the session
|
|
1021
|
-
session.add_usage(result.get("usage", {}))
|
|
1022
|
-
|
|
1023
|
-
else:
|
|
1024
|
-
# Async mode: use codex exec (fire-and-forget)
|
|
1025
|
-
# This runs in a subprocess without MCP, outputs JSONL events
|
|
1026
|
-
cmd = [
|
|
1027
|
-
"codex", "exec",
|
|
1028
|
-
"--dangerously-bypass-approvals-and-sandbox",
|
|
1029
|
-
"--skip-git-repo-check",
|
|
1030
|
-
"--json",
|
|
1031
|
-
"--model", effective_model,
|
|
1032
|
-
"-C", str(working_dir.absolute()), # Explicit working directory
|
|
1033
|
-
"--", task,
|
|
1034
|
-
]
|
|
1035
|
-
|
|
1036
|
-
logger.info(f"Starting async codex: {' '.join(cmd[:8])}...")
|
|
1037
|
-
|
|
1038
|
-
proc = subprocess.Popen(
|
|
1039
|
-
cmd,
|
|
1040
|
-
cwd=working_dir,
|
|
1041
|
-
stdout=subprocess.PIPE,
|
|
1042
|
-
stderr=subprocess.PIPE,
|
|
1043
|
-
text=True,
|
|
1044
|
-
)
|
|
1045
|
-
session.process = proc
|
|
1046
|
-
session.add_message("user", task)
|
|
1047
|
-
|
|
1048
|
-
return session
|
|
1049
|
-
|
|
1050
|
-
async def send_message(
|
|
1051
|
-
self,
|
|
1052
|
-
session: ConversationSession,
|
|
1053
|
-
message: str,
|
|
1054
|
-
) -> str:
|
|
1055
|
-
"""Send a message to continue a sync conversation."""
|
|
1056
|
-
if session.mode != SessionMode.SYNC:
|
|
1057
|
-
raise ValueError("Cannot send message to async session")
|
|
1058
|
-
if session.status != SessionStatus.ACTIVE:
|
|
1059
|
-
raise ValueError(f"Session is not active: {session.status}")
|
|
1060
|
-
if not session.conversation_id:
|
|
1061
|
-
raise ValueError("Session has no conversation ID")
|
|
1062
|
-
|
|
1063
|
-
# Use traced codex-reply call (synchronous - MCP client persists across calls)
|
|
1064
|
-
result = self._call_codex_reply(
|
|
1065
|
-
conversation_id=session.conversation_id,
|
|
1066
|
-
message=message,
|
|
1067
|
-
)
|
|
1068
|
-
|
|
1069
|
-
response_text = result["response"]
|
|
1070
|
-
|
|
1071
|
-
# Check if conversation was lost
|
|
1072
|
-
if result.get("conversation_lost"):
|
|
1073
|
-
logger.warning(
|
|
1074
|
-
f"Conversation {session.conversation_id} was lost. "
|
|
1075
|
-
f"Session {session.id} will be marked as needing re-delegation."
|
|
1076
|
-
)
|
|
1077
|
-
# Mark the session as having a lost conversation so orchestrator can handle it
|
|
1078
|
-
session.conversation_id = None # Clear the stale ID
|
|
1079
|
-
|
|
1080
|
-
session.add_message("user", message)
|
|
1081
|
-
session.add_message("assistant", response_text)
|
|
1082
|
-
|
|
1083
|
-
# Track token usage on the session
|
|
1084
|
-
session.add_usage(result.get("usage", {}))
|
|
1085
|
-
|
|
1086
|
-
return response_text
|
|
1087
|
-
|
|
1088
|
-
@weave.op()
|
|
1089
|
-
def _parse_jsonl_output(self, stdout: str) -> dict[str, Any]:
|
|
1090
|
-
"""
|
|
1091
|
-
Parse JSONL output from codex exec --json.
|
|
1092
|
-
|
|
1093
|
-
Returns dict with:
|
|
1094
|
-
- response: The agent's message text
|
|
1095
|
-
- usage: Token usage stats
|
|
1096
|
-
- thread_id: The conversation thread ID
|
|
1097
|
-
- events: All parsed events (for debugging)
|
|
1098
|
-
"""
|
|
1099
|
-
response_parts = []
|
|
1100
|
-
usage = {}
|
|
1101
|
-
thread_id = None
|
|
1102
|
-
events = []
|
|
1103
|
-
|
|
1104
|
-
for line in stdout.strip().split("\n"):
|
|
1105
|
-
if not line.strip():
|
|
1106
|
-
continue
|
|
1107
|
-
try:
|
|
1108
|
-
event = json.loads(line)
|
|
1109
|
-
events.append(event)
|
|
1110
|
-
|
|
1111
|
-
event_type = event.get("type", "")
|
|
1112
|
-
|
|
1113
|
-
if event_type == "thread.started":
|
|
1114
|
-
thread_id = event.get("thread_id")
|
|
1115
|
-
|
|
1116
|
-
elif event_type == "item.completed":
|
|
1117
|
-
item = event.get("item", {})
|
|
1118
|
-
if item.get("type") == "agent_message":
|
|
1119
|
-
response_parts.append(item.get("text", ""))
|
|
1120
|
-
|
|
1121
|
-
elif event_type == "turn.completed":
|
|
1122
|
-
usage = event.get("usage", {})
|
|
1123
|
-
|
|
1124
|
-
except json.JSONDecodeError:
|
|
1125
|
-
logger.warning(f"Failed to parse JSONL line: {line[:100]}")
|
|
1126
|
-
continue
|
|
1127
|
-
|
|
1128
|
-
return {
|
|
1129
|
-
"response": "\n".join(response_parts),
|
|
1130
|
-
"usage": usage,
|
|
1131
|
-
"thread_id": thread_id,
|
|
1132
|
-
"events": events,
|
|
1133
|
-
}
|
|
1134
|
-
|
|
1135
|
-
@weave.op()
|
|
1136
|
-
async def check_status(
|
|
1137
|
-
self,
|
|
1138
|
-
session: ConversationSession,
|
|
1139
|
-
) -> dict:
|
|
1140
|
-
"""Check status of an async session."""
|
|
1141
|
-
if session.mode != SessionMode.ASYNC:
|
|
1142
|
-
return {"status": session.status.value}
|
|
1143
|
-
|
|
1144
|
-
if session.process is None:
|
|
1145
|
-
return {"status": "unknown", "error": "No process handle"}
|
|
1146
|
-
|
|
1147
|
-
# Check if process is still running
|
|
1148
|
-
poll = session.process.poll()
|
|
1149
|
-
if poll is None:
|
|
1150
|
-
return {"status": "running"}
|
|
1151
|
-
|
|
1152
|
-
# Process finished - parse the JSONL output
|
|
1153
|
-
stdout, stderr = session.process.communicate()
|
|
1154
|
-
|
|
1155
|
-
if poll == 0:
|
|
1156
|
-
# Parse JSONL to extract actual response
|
|
1157
|
-
parsed = self._parse_jsonl_output(stdout)
|
|
1158
|
-
response_text = parsed["response"] or "(no response captured)"
|
|
1159
|
-
|
|
1160
|
-
# Add the response as a message
|
|
1161
|
-
session.add_message("assistant", response_text)
|
|
1162
|
-
|
|
1163
|
-
# Track token usage
|
|
1164
|
-
if parsed["usage"]:
|
|
1165
|
-
session.add_usage({
|
|
1166
|
-
"input_tokens": parsed["usage"].get("input_tokens", 0),
|
|
1167
|
-
"output_tokens": parsed["usage"].get("output_tokens", 0),
|
|
1168
|
-
"total_tokens": (
|
|
1169
|
-
parsed["usage"].get("input_tokens", 0) +
|
|
1170
|
-
parsed["usage"].get("output_tokens", 0)
|
|
1171
|
-
),
|
|
1172
|
-
})
|
|
1173
|
-
|
|
1174
|
-
session.complete(response_text[:500])
|
|
1175
|
-
return {
|
|
1176
|
-
"status": "completed",
|
|
1177
|
-
"response": response_text,
|
|
1178
|
-
"usage": parsed["usage"],
|
|
1179
|
-
"thread_id": parsed["thread_id"],
|
|
1180
|
-
}
|
|
1181
|
-
else:
|
|
1182
|
-
# Try to parse stderr or stdout for error info
|
|
1183
|
-
error_msg = stderr.strip() if stderr else f"Exit code: {poll}"
|
|
1184
|
-
|
|
1185
|
-
# Sometimes errors come through stdout as JSONL too
|
|
1186
|
-
if stdout and not stderr:
|
|
1187
|
-
try:
|
|
1188
|
-
parsed = self._parse_jsonl_output(stdout)
|
|
1189
|
-
if not parsed["response"]:
|
|
1190
|
-
error_msg = f"Process failed with no response. Exit code: {poll}"
|
|
1191
|
-
except Exception:
|
|
1192
|
-
error_msg = stdout[:500] if stdout else f"Exit code: {poll}"
|
|
1193
|
-
|
|
1194
|
-
session.fail(error_msg[:500])
|
|
1195
|
-
return {"status": "failed", "error": error_msg, "exit_code": poll}
|
|
1196
|
-
|
|
1197
|
-
async def stop(
|
|
1198
|
-
self,
|
|
1199
|
-
session: ConversationSession,
|
|
1200
|
-
) -> None:
|
|
1201
|
-
"""Stop a session."""
|
|
1202
|
-
import subprocess
|
|
1203
|
-
|
|
1204
|
-
if session.process and session.process.poll() is None:
|
|
1205
|
-
session.process.terminate()
|
|
1206
|
-
try:
|
|
1207
|
-
session.process.wait(timeout=5)
|
|
1208
|
-
except subprocess.TimeoutExpired:
|
|
1209
|
-
session.process.kill()
|
|
1210
|
-
|
|
1211
|
-
session.fail("Stopped by user")
|
|
1212
|
-
|
|
1213
|
-
# Remove from tracking
|
|
1214
|
-
if session.id in self._sessions:
|
|
1215
|
-
del self._sessions[session.id]
|
|
1216
|
-
|
|
1217
|
-
async def cleanup(self) -> None:
|
|
1218
|
-
"""Clean up MCP server."""
|
|
1219
|
-
if self._mcp_client:
|
|
1220
|
-
self._mcp_client.close()
|
|
1221
|
-
self._mcp_client = None
|
|
1222
|
-
|
|
1223
|
-
def _extract_response(self, result: dict) -> str:
|
|
1224
|
-
"""Extract response text from MCP result."""
|
|
1225
|
-
# Check for error indicators - empty result suggests lost conversation
|
|
1226
|
-
if (
|
|
1227
|
-
result.get("conversationId") is None
|
|
1228
|
-
and not result.get("messages")
|
|
1229
|
-
and not result.get("output")
|
|
1230
|
-
):
|
|
1231
|
-
logger.warning(f"MCP returned empty result - conversation may be lost: {result}")
|
|
1232
|
-
return "[ERROR] Conversation lost - the MCP server no longer has this session. Please re-delegate the task."
|
|
1233
|
-
|
|
1234
|
-
# First check for our collected output
|
|
1235
|
-
if result.get("output"):
|
|
1236
|
-
return result["output"]
|
|
1237
|
-
|
|
1238
|
-
# Check for messages list
|
|
1239
|
-
if result.get("messages"):
|
|
1240
|
-
return "\n".join(result["messages"])
|
|
1241
|
-
|
|
1242
|
-
# Result may have different structures depending on codex version
|
|
1243
|
-
if "content" in result:
|
|
1244
|
-
content = result["content"]
|
|
1245
|
-
if isinstance(content, list):
|
|
1246
|
-
texts = []
|
|
1247
|
-
for block in content:
|
|
1248
|
-
if isinstance(block, dict) and "text" in block:
|
|
1249
|
-
texts.append(block["text"])
|
|
1250
|
-
elif isinstance(block, str):
|
|
1251
|
-
texts.append(block)
|
|
1252
|
-
if texts:
|
|
1253
|
-
return "\n".join(texts)
|
|
1254
|
-
elif isinstance(content, str):
|
|
1255
|
-
return content
|
|
1256
|
-
|
|
1257
|
-
if "text" in result:
|
|
1258
|
-
return result["text"]
|
|
1259
|
-
|
|
1260
|
-
# Fallback: stringify the result (but log it as unexpected)
|
|
1261
|
-
logger.warning(f"Unexpected MCP result format, returning raw: {list(result.keys())}")
|
|
1262
|
-
return json.dumps(result, indent=2)
|