zwarm 1.3.3__tar.gz → 1.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {zwarm-1.3.3 → zwarm-1.3.8}/PKG-INFO +1 -1
  2. {zwarm-1.3.3 → zwarm-1.3.8}/pyproject.toml +1 -1
  3. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/codex_mcp.py +124 -23
  4. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/cli/main.py +735 -310
  5. zwarm-1.3.8/src/zwarm/sessions/__init__.py +24 -0
  6. zwarm-1.3.8/src/zwarm/sessions/manager.py +589 -0
  7. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/tools/delegation.py +143 -1
  8. {zwarm-1.3.3 → zwarm-1.3.8}/.gitignore +0 -0
  9. {zwarm-1.3.3 → zwarm-1.3.8}/README.md +0 -0
  10. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/__init__.py +0 -0
  11. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/__init__.py +0 -0
  12. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/base.py +0 -0
  13. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/claude_code.py +0 -0
  14. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/registry.py +0 -0
  15. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/test_codex_mcp.py +0 -0
  16. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/adapters/test_registry.py +0 -0
  17. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/cli/__init__.py +0 -0
  18. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/__init__.py +0 -0
  19. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/compact.py +0 -0
  20. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/config.py +0 -0
  21. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/environment.py +0 -0
  22. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/models.py +0 -0
  23. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/state.py +0 -0
  24. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/test_compact.py +0 -0
  25. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/test_config.py +0 -0
  26. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/core/test_models.py +0 -0
  27. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/orchestrator.py +0 -0
  28. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/prompts/__init__.py +0 -0
  29. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/prompts/orchestrator.py +0 -0
  30. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/test_orchestrator_watchers.py +0 -0
  31. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/tools/__init__.py +0 -0
  32. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/watchers/__init__.py +0 -0
  33. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/watchers/base.py +0 -0
  34. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/watchers/builtin.py +0 -0
  35. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/watchers/manager.py +0 -0
  36. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/watchers/registry.py +0 -0
  37. {zwarm-1.3.3 → zwarm-1.3.8}/src/zwarm/watchers/test_watchers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 1.3.3
3
+ Version: 1.3.8
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: python-dotenv>=1.0.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zwarm"
3
- version = "1.3.3"
3
+ version = "1.3.8"
4
4
  description = "Multi-Agent CLI Orchestration Research Platform"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13,<3.14"
@@ -44,6 +44,7 @@ class MCPClient:
44
44
 
45
45
  def __init__(self):
46
46
  self._proc: subprocess.Popen | None = None
47
+ self._proc_pid: int | None = None # Track PID to detect restarts
47
48
  self._request_id = 0
48
49
  self._initialized = False
49
50
  self._stderr_thread: threading.Thread | None = None
@@ -51,14 +52,24 @@ class MCPClient:
51
52
  self._stderr_lines: list[str] = []
52
53
  self._stdout_queue: queue.Queue[str | None] = queue.Queue()
53
54
  self._lock = threading.Lock() # Protect writes only
55
+ self._start_count = 0 # Track how many times we've started
54
56
 
55
57
  def start(self) -> None:
56
58
  """Start the MCP server process."""
57
59
  with self._lock:
58
60
  if self._proc is not None and self._proc.poll() is None:
61
+ logger.debug(f"MCP server already running (pid={self._proc.pid}, start_count={self._start_count})")
59
62
  return # Already running
60
63
 
61
- logger.info("Starting codex mcp-server...")
64
+ # Check if this is a restart (previous server died)
65
+ if self._proc_pid is not None:
66
+ logger.warning(
67
+ f"MCP server restart detected! Previous pid={self._proc_pid}, "
68
+ f"start_count={self._start_count}. All conversation state will be lost."
69
+ )
70
+
71
+ self._start_count += 1
72
+ logger.info(f"Starting codex mcp-server... (start_count={self._start_count})")
62
73
  self._proc = subprocess.Popen(
63
74
  ["codex", "mcp-server"],
64
75
  stdin=subprocess.PIPE,
@@ -66,6 +77,7 @@ class MCPClient:
66
77
  stderr=subprocess.PIPE,
67
78
  text=False, # Binary mode for explicit encoding control
68
79
  )
80
+ self._proc_pid = self._proc.pid
69
81
  self._initialized = False
70
82
  self._stderr_lines = []
71
83
  self._stdout_queue = queue.Queue() # Fresh queue
@@ -251,11 +263,13 @@ class MCPClient:
251
263
  # Collect streaming events until final result
252
264
  # Reader thread queues lines, we pull from queue with timeout
253
265
  session_id = None
266
+ conversation_id = None # Track conversation ID separately
254
267
  agent_messages: list[str] = []
255
268
  streaming_text: list[str] = [] # Accumulate streaming delta text
256
269
  final_result = None
257
270
  token_usage: dict[str, Any] = {} # Track token usage
258
271
  start_time = time.time()
272
+ all_events: list[dict] = [] # Keep ALL events for debugging
259
273
 
260
274
  for event_count in range(1000): # Safety limit on events
261
275
  self._check_alive()
@@ -278,6 +292,7 @@ class MCPClient:
278
292
 
279
293
  try:
280
294
  event = json.loads(line)
295
+ all_events.append(event) # Keep for debugging
281
296
  except json.JSONDecodeError as e:
282
297
  logger.warning(f"Invalid JSON from MCP: {line[:100]}... - {e}")
283
298
  continue
@@ -286,7 +301,10 @@ class MCPClient:
286
301
  if event.get("id") == request_id:
287
302
  if "result" in event:
288
303
  final_result = event["result"]
289
- logger.debug(f"Got final result after {event_count} events")
304
+ # Extract conversation ID from final result
305
+ if isinstance(final_result, dict):
306
+ conversation_id = final_result.get("conversationId") or final_result.get("conversation_id")
307
+ logger.debug(f"Got final result after {event_count} events, conversation_id={conversation_id}")
290
308
  break
291
309
  elif "error" in event:
292
310
  error = event["error"]
@@ -309,6 +327,9 @@ class MCPClient:
309
327
  item = msg.get("item", {})
310
328
  item_type = item.get("type")
311
329
 
330
+ # Log ALL item_completed events to help debug
331
+ logger.debug(f"item_completed: type={item_type}, keys={list(item.keys())}")
332
+
312
333
  # Agent text responses - codex uses "AgentMessage" type
313
334
  if item_type == "AgentMessage":
314
335
  content = item.get("content", [])
@@ -318,6 +339,19 @@ class MCPClient:
318
339
  elif isinstance(block, str):
319
340
  agent_messages.append(block)
320
341
 
342
+ # Also check for "agent_message" (lowercase) variant
343
+ elif item_type == "agent_message":
344
+ text = item.get("text", "") or item.get("message", "")
345
+ if text:
346
+ agent_messages.append(text)
347
+ # Also check content array
348
+ content = item.get("content", [])
349
+ for block in content:
350
+ if isinstance(block, dict) and block.get("text"):
351
+ agent_messages.append(block["text"])
352
+ elif isinstance(block, str):
353
+ agent_messages.append(block)
354
+
321
355
  # Legacy format check
322
356
  elif item_type == "message" and item.get("role") == "assistant":
323
357
  content = item.get("content", [])
@@ -327,6 +361,21 @@ class MCPClient:
327
361
  elif isinstance(block, str):
328
362
  agent_messages.append(block)
329
363
 
364
+ # Generic message type - check for text/content
365
+ elif item_type == "message":
366
+ text = item.get("text", "")
367
+ if text:
368
+ agent_messages.append(text)
369
+ content = item.get("content", [])
370
+ if isinstance(content, str):
371
+ agent_messages.append(content)
372
+ elif isinstance(content, list):
373
+ for block in content:
374
+ if isinstance(block, dict) and block.get("text"):
375
+ agent_messages.append(block["text"])
376
+ elif isinstance(block, str):
377
+ agent_messages.append(block)
378
+
330
379
  # Function call outputs (for context)
331
380
  elif item_type == "function_call_output":
332
381
  output = item.get("output", "")
@@ -334,8 +383,8 @@ class MCPClient:
334
383
  agent_messages.append(f"[Tool output]: {output[:500]}")
335
384
 
336
385
  # Log other item types we're not handling
337
- elif item_type not in ("function_call", "tool_call", "UserMessage"):
338
- logger.debug(f"Unhandled item_completed type: {item_type}, keys: {list(item.keys())}")
386
+ elif item_type not in ("function_call", "tool_call", "UserMessage", "user_message"):
387
+ logger.debug(f"Unhandled item_completed type: {item_type}, item={item}")
339
388
 
340
389
  elif msg_type == "agent_message":
341
390
  # Direct agent message event
@@ -406,28 +455,50 @@ class MCPClient:
406
455
  agent_messages.append(full_streaming)
407
456
  logger.debug(f"Captured {len(streaming_text)} streaming chunks ({len(full_streaming)} chars)")
408
457
 
409
- # Build result
410
- result = {
411
- "conversationId": session_id,
412
- "messages": agent_messages,
413
- "output": "\n".join(agent_messages) if agent_messages else "",
414
- "usage": token_usage, # Token usage for cost tracking
415
- }
416
-
417
- # Merge final result and try to extract content if no messages
418
- if final_result:
419
- result.update(final_result)
420
- if not agent_messages and "content" in final_result:
458
+ # Try to extract content from final_result if we have no messages
459
+ if final_result and not agent_messages:
460
+ if "content" in final_result:
421
461
  content = final_result["content"]
422
462
  if isinstance(content, list):
423
463
  for block in content:
424
464
  if isinstance(block, dict) and block.get("text"):
425
465
  agent_messages.append(block["text"])
426
- if agent_messages:
427
- result["messages"] = agent_messages
428
- result["output"] = "\n".join(agent_messages)
466
+ elif isinstance(block, str):
467
+ agent_messages.append(block)
468
+ elif isinstance(content, str):
469
+ agent_messages.append(content)
470
+ # Also check for text field
471
+ if not agent_messages and "text" in final_result:
472
+ agent_messages.append(final_result["text"])
473
+
474
+ # Build result - prefer conversation_id from final result, fallback to session_id from events
475
+ effective_conversation_id = conversation_id or session_id
476
+ result = {
477
+ "conversationId": effective_conversation_id,
478
+ "messages": agent_messages,
479
+ "output": "\n".join(agent_messages) if agent_messages else "",
480
+ "usage": token_usage, # Token usage for cost tracking
481
+ }
429
482
 
430
- logger.debug(f"MCP call complete: {len(agent_messages)} messages, session={session_id}")
483
+ # Log detailed debug info if we didn't capture any messages
484
+ if not agent_messages:
485
+ event_types = [e.get("method") or f"id:{e.get('id')}" for e in all_events[:20]]
486
+ logger.warning(
487
+ f"MCP call returned no messages. "
488
+ f"conversation_id={effective_conversation_id}, "
489
+ f"session_id={session_id}, "
490
+ f"event_count={len(all_events)}, "
491
+ f"event_types={event_types}, "
492
+ f"final_result_keys={list(final_result.keys()) if final_result else 'None'}"
493
+ )
494
+ # Log codex/event details for debugging
495
+ codex_events = [e for e in all_events if e.get("method") == "codex/event"]
496
+ if codex_events:
497
+ for ce in codex_events[-5:]: # Last 5 codex events
498
+ msg = ce.get("params", {}).get("msg", {})
499
+ logger.debug(f" codex/event: type={msg.get('type')}, keys={list(msg.keys())}")
500
+
501
+ logger.debug(f"MCP call complete: {len(agent_messages)} messages, conversation_id={effective_conversation_id}")
431
502
  return result
432
503
 
433
504
  def close(self) -> None:
@@ -521,14 +592,35 @@ class CodexMCPAdapter(ExecutorAdapter):
521
592
  if model:
522
593
  args["model"] = model
523
594
 
595
+ logger.info(f"Calling codex with task_len={len(task)}, cwd={cwd}, model={model or 'default'}")
596
+
524
597
  result = client.call_tool("codex", args)
525
598
 
526
- # Track usage
599
+ # Log the result structure
600
+ conversation_id = result.get("conversationId")
601
+ messages_count = len(result.get("messages", []))
602
+ output_len = len(result.get("output", ""))
527
603
  usage = result.get("usage", {})
604
+
605
+ logger.info(
606
+ f"codex result: conversation_id={conversation_id}, "
607
+ f"messages_count={messages_count}, output_len={output_len}, "
608
+ f"usage={usage.get('total_tokens', 0)} tokens"
609
+ )
610
+
611
+ # Warn if we got a conversation ID but no messages (agent did work but we lost output)
612
+ if conversation_id and not messages_count and not output_len:
613
+ logger.warning(
614
+ f"codex returned conversation_id={conversation_id} but NO messages/output! "
615
+ f"The agent processed {usage.get('total_tokens', 0)} tokens but we didn't capture the response. "
616
+ f"This may indicate an issue with event parsing."
617
+ )
618
+
619
+ # Track usage
528
620
  self._accumulate_usage(usage)
529
621
 
530
622
  return {
531
- "conversation_id": result.get("conversationId"),
623
+ "conversation_id": conversation_id,
532
624
  "response": self._extract_response(result),
533
625
  "raw_messages": result.get("messages", []),
534
626
  "usage": usage,
@@ -549,13 +641,22 @@ class CodexMCPAdapter(ExecutorAdapter):
549
641
  """
550
642
  client = self._ensure_client()
551
643
 
552
- logger.debug(f"Calling codex-reply with conversation_id={conversation_id}")
644
+ logger.info(f"Calling codex-reply with conversation_id={conversation_id}, message_len={len(message)}")
645
+ logger.debug(f"MCP client alive: {client.is_alive}, initialized: {client._initialized}")
553
646
 
554
647
  result = client.call_tool("codex-reply", {
555
648
  "conversationId": conversation_id,
556
649
  "prompt": message,
557
650
  })
558
651
 
652
+ # Log the full result structure for debugging
653
+ logger.info(
654
+ f"codex-reply result: conversationId={result.get('conversationId')}, "
655
+ f"messages_count={len(result.get('messages', []))}, "
656
+ f"output_len={len(result.get('output', ''))}, "
657
+ f"usage={result.get('usage', {}).get('total_tokens', 0)} tokens"
658
+ )
659
+
559
660
  # Check for conversation loss - MCP returns empty result when session not found
560
661
  if not result.get("messages") and not result.get("output"):
561
662
  logger.error(