zwarm 1.1.1__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ """
2
+ Adapters: Executor wrappers for CLI coding agents.
3
+
4
+ Adapters provide a unified interface to different coding CLIs (Codex, Claude Code).
5
+ Use the registry to discover and instantiate adapters by name.
6
+ """
7
+
8
+ from zwarm.adapters.base import ExecutorAdapter
9
+ from zwarm.adapters.registry import register_adapter, get_adapter, list_adapters, adapter_exists
10
+
11
+ # Import built-in adapters to register them
12
+ from zwarm.adapters import codex_mcp as _codex_mcp # noqa: F401
13
+ from zwarm.adapters import claude_code as _claude_code # noqa: F401
14
+
15
+ __all__ = [
16
+ "ExecutorAdapter",
17
+ "register_adapter",
18
+ "get_adapter",
19
+ "list_adapters",
20
+ "adapter_exists",
21
+ ]
@@ -18,6 +18,7 @@ from typing import Any, Literal
18
18
  import weave
19
19
 
20
20
  from zwarm.adapters.base import ExecutorAdapter
21
+ from zwarm.adapters.registry import register_adapter
21
22
  from zwarm.core.models import (
22
23
  ConversationSession,
23
24
  SessionMode,
@@ -25,14 +26,13 @@ from zwarm.core.models import (
25
26
  )
26
27
 
27
28
 
29
+ @register_adapter("claude_code")
28
30
  class ClaudeCodeAdapter(ExecutorAdapter):
29
31
  """
30
32
  Claude Code adapter using the claude CLI.
31
33
 
32
34
  Supports both sync (conversational) and async (fire-and-forget) modes.
33
35
  """
34
-
35
- name = "claude_code"
36
36
  DEFAULT_MODEL = "claude-sonnet-4-5-20250514" # Best balance of speed and capability
37
37
 
38
38
  def __init__(self, model: str | None = None):
@@ -186,6 +186,7 @@ class ClaudeCodeAdapter(ExecutorAdapter):
186
186
  "exit_code": result.returncode,
187
187
  }
188
188
 
189
+ @weave.op()
189
190
  async def start_session(
190
191
  self,
191
192
  task: str,
@@ -195,7 +196,7 @@ class ClaudeCodeAdapter(ExecutorAdapter):
195
196
  permission_mode: str = "bypassPermissions",
196
197
  **kwargs,
197
198
  ) -> ConversationSession:
198
- """Start a Claude Code session."""
199
+ """Start a Claude Code session (sync or async mode)."""
199
200
  session = ConversationSession(
200
201
  adapter=self.name,
201
202
  mode=SessionMode(mode),
@@ -277,6 +278,7 @@ class ClaudeCodeAdapter(ExecutorAdapter):
277
278
 
278
279
  return response_text
279
280
 
281
+ @weave.op()
280
282
  async def check_status(
281
283
  self,
282
284
  session: ConversationSession,
@@ -20,6 +20,7 @@ from typing import Any, Literal
20
20
  import weave
21
21
 
22
22
  from zwarm.adapters.base import ExecutorAdapter
23
+ from zwarm.adapters.registry import register_adapter
23
24
  from zwarm.core.models import (
24
25
  ConversationSession,
25
26
  SessionMode,
@@ -450,6 +451,7 @@ class MCPClient:
450
451
  return self._proc is not None and self._proc.poll() is None
451
452
 
452
453
 
454
+ @register_adapter("codex_mcp")
453
455
  class CodexMCPAdapter(ExecutorAdapter):
454
456
  """
455
457
  Codex adapter using MCP server for sync conversations.
@@ -458,8 +460,6 @@ class CodexMCPAdapter(ExecutorAdapter):
458
460
  The MCP client uses subprocess.Popen (not asyncio) so it persists across
459
461
  multiple asyncio.run() calls, preserving conversation state.
460
462
  """
461
-
462
- name = "codex_mcp"
463
463
  DEFAULT_MODEL = "gpt-5.1-codex-mini" # Default codex model
464
464
 
465
465
  def __init__(self, model: str | None = None):
@@ -549,22 +549,36 @@ class CodexMCPAdapter(ExecutorAdapter):
549
549
  """
550
550
  client = self._ensure_client()
551
551
 
552
+ logger.debug(f"Calling codex-reply with conversation_id={conversation_id}")
553
+
552
554
  result = client.call_tool("codex-reply", {
553
555
  "conversationId": conversation_id,
554
556
  "prompt": message,
555
557
  })
556
558
 
559
+ # Check for conversation loss - MCP returns empty result when session not found
560
+ if not result.get("messages") and not result.get("output"):
561
+ logger.error(
562
+ f"codex-reply returned empty result for conversation_id={conversation_id}. "
563
+ f"The MCP server may have lost the conversation state. Result: {result}"
564
+ )
565
+
557
566
  # Track usage
558
567
  usage = result.get("usage", {})
559
568
  self._accumulate_usage(usage)
560
569
 
570
+ response = self._extract_response(result)
571
+ logger.debug(f"codex-reply response length: {len(response)} chars")
572
+
561
573
  return {
562
- "response": self._extract_response(result),
574
+ "response": response,
563
575
  "raw_messages": result.get("messages", []),
564
576
  "usage": usage,
565
577
  "total_usage": self.total_usage,
578
+ "conversation_lost": not result.get("messages") and not result.get("output"),
566
579
  }
567
580
 
581
+ @weave.op()
568
582
  async def start_session(
569
583
  self,
570
584
  task: str,
@@ -574,7 +588,7 @@ class CodexMCPAdapter(ExecutorAdapter):
574
588
  sandbox: str = "workspace-write",
575
589
  **kwargs,
576
590
  ) -> ConversationSession:
577
- """Start a Codex session."""
591
+ """Start a Codex session (sync or async mode)."""
578
592
  effective_model = model or self._model
579
593
  session = ConversationSession(
580
594
  adapter=self.name,
@@ -597,6 +611,13 @@ class CodexMCPAdapter(ExecutorAdapter):
597
611
  session.conversation_id = result["conversation_id"]
598
612
  if session.conversation_id:
599
613
  self._sessions[session.id] = session.conversation_id
614
+ logger.debug(f"Session {session.id[:8]} mapped to conversation {session.conversation_id}")
615
+ else:
616
+ # This is bad - we won't be able to continue this conversation
617
+ logger.warning(
618
+ f"Session {session.id[:8]} started but MCP didn't return a conversation ID. "
619
+ "Further converse() calls will fail."
620
+ )
600
621
 
601
622
  session.add_message("user", task)
602
623
  session.add_message("assistant", result["response"])
@@ -606,15 +627,18 @@ class CodexMCPAdapter(ExecutorAdapter):
606
627
 
607
628
  else:
608
629
  # Async mode: use codex exec (fire-and-forget)
609
- # This runs in a subprocess without MCP
630
+ # This runs in a subprocess without MCP, outputs JSONL events
610
631
  cmd = [
611
632
  "codex", "exec",
612
633
  "--dangerously-bypass-approvals-and-sandbox",
613
634
  "--skip-git-repo-check",
614
635
  "--json",
615
636
  "--model", effective_model,
637
+ "-C", str(working_dir.absolute()), # Explicit working directory
638
+ "--", task,
616
639
  ]
617
- cmd.extend(["--", task])
640
+
641
+ logger.info(f"Starting async codex: {' '.join(cmd[:8])}...")
618
642
 
619
643
  proc = subprocess.Popen(
620
644
  cmd,
@@ -648,6 +672,16 @@ class CodexMCPAdapter(ExecutorAdapter):
648
672
  )
649
673
 
650
674
  response_text = result["response"]
675
+
676
+ # Check if conversation was lost
677
+ if result.get("conversation_lost"):
678
+ logger.warning(
679
+ f"Conversation {session.conversation_id} was lost. "
680
+ f"Session {session.id} will be marked as needing re-delegation."
681
+ )
682
+ # Mark the session as having a lost conversation so orchestrator can handle it
683
+ session.conversation_id = None # Clear the stale ID
684
+
651
685
  session.add_message("user", message)
652
686
  session.add_message("assistant", response_text)
653
687
 
@@ -656,6 +690,54 @@ class CodexMCPAdapter(ExecutorAdapter):
656
690
 
657
691
  return response_text
658
692
 
693
+ @weave.op()
694
+ def _parse_jsonl_output(self, stdout: str) -> dict[str, Any]:
695
+ """
696
+ Parse JSONL output from codex exec --json.
697
+
698
+ Returns dict with:
699
+ - response: The agent's message text
700
+ - usage: Token usage stats
701
+ - thread_id: The conversation thread ID
702
+ - events: All parsed events (for debugging)
703
+ """
704
+ response_parts = []
705
+ usage = {}
706
+ thread_id = None
707
+ events = []
708
+
709
+ for line in stdout.strip().split("\n"):
710
+ if not line.strip():
711
+ continue
712
+ try:
713
+ event = json.loads(line)
714
+ events.append(event)
715
+
716
+ event_type = event.get("type", "")
717
+
718
+ if event_type == "thread.started":
719
+ thread_id = event.get("thread_id")
720
+
721
+ elif event_type == "item.completed":
722
+ item = event.get("item", {})
723
+ if item.get("type") == "agent_message":
724
+ response_parts.append(item.get("text", ""))
725
+
726
+ elif event_type == "turn.completed":
727
+ usage = event.get("usage", {})
728
+
729
+ except json.JSONDecodeError:
730
+ logger.warning(f"Failed to parse JSONL line: {line[:100]}")
731
+ continue
732
+
733
+ return {
734
+ "response": "\n".join(response_parts),
735
+ "usage": usage,
736
+ "thread_id": thread_id,
737
+ "events": events,
738
+ }
739
+
740
+ @weave.op()
659
741
  async def check_status(
660
742
  self,
661
743
  session: ConversationSession,
@@ -672,14 +754,50 @@ class CodexMCPAdapter(ExecutorAdapter):
672
754
  if poll is None:
673
755
  return {"status": "running"}
674
756
 
675
- # Process finished
757
+ # Process finished - parse the JSONL output
676
758
  stdout, stderr = session.process.communicate()
759
+
677
760
  if poll == 0:
678
- session.complete(stdout[:1000] if stdout else "Completed")
679
- return {"status": "completed", "output": stdout}
761
+ # Parse JSONL to extract actual response
762
+ parsed = self._parse_jsonl_output(stdout)
763
+ response_text = parsed["response"] or "(no response captured)"
764
+
765
+ # Add the response as a message
766
+ session.add_message("assistant", response_text)
767
+
768
+ # Track token usage
769
+ if parsed["usage"]:
770
+ session.add_usage({
771
+ "input_tokens": parsed["usage"].get("input_tokens", 0),
772
+ "output_tokens": parsed["usage"].get("output_tokens", 0),
773
+ "total_tokens": (
774
+ parsed["usage"].get("input_tokens", 0) +
775
+ parsed["usage"].get("output_tokens", 0)
776
+ ),
777
+ })
778
+
779
+ session.complete(response_text[:500])
780
+ return {
781
+ "status": "completed",
782
+ "response": response_text,
783
+ "usage": parsed["usage"],
784
+ "thread_id": parsed["thread_id"],
785
+ }
680
786
  else:
681
- session.fail(stderr[:1000] if stderr else f"Exit code: {poll}")
682
- return {"status": "failed", "error": stderr, "exit_code": poll}
787
+ # Try to parse stderr or stdout for error info
788
+ error_msg = stderr.strip() if stderr else f"Exit code: {poll}"
789
+
790
+ # Sometimes errors come through stdout as JSONL too
791
+ if stdout and not stderr:
792
+ try:
793
+ parsed = self._parse_jsonl_output(stdout)
794
+ if not parsed["response"]:
795
+ error_msg = f"Process failed with no response. Exit code: {poll}"
796
+ except Exception:
797
+ error_msg = stdout[:500] if stdout else f"Exit code: {poll}"
798
+
799
+ session.fail(error_msg[:500])
800
+ return {"status": "failed", "error": error_msg, "exit_code": poll}
683
801
 
684
802
  async def stop(
685
803
  self,
@@ -709,6 +827,15 @@ class CodexMCPAdapter(ExecutorAdapter):
709
827
 
710
828
  def _extract_response(self, result: dict) -> str:
711
829
  """Extract response text from MCP result."""
830
+ # Check for error indicators - empty result suggests lost conversation
831
+ if (
832
+ result.get("conversationId") is None
833
+ and not result.get("messages")
834
+ and not result.get("output")
835
+ ):
836
+ logger.warning(f"MCP returned empty result - conversation may be lost: {result}")
837
+ return "[ERROR] Conversation lost - the MCP server no longer has this session. Please re-delegate the task."
838
+
712
839
  # First check for our collected output
713
840
  if result.get("output"):
714
841
  return result["output"]
@@ -735,5 +862,6 @@ class CodexMCPAdapter(ExecutorAdapter):
735
862
  if "text" in result:
736
863
  return result["text"]
737
864
 
738
- # Fallback: stringify the result
865
+ # Fallback: stringify the result (but log it as unexpected)
866
+ logger.warning(f"Unexpected MCP result format, returning raw: {list(result.keys())}")
739
867
  return json.dumps(result, indent=2)
@@ -0,0 +1,69 @@
1
+ """
2
+ Adapter registry for discovering and instantiating executor adapters.
3
+
4
+ This follows the same pattern as the watcher registry, enabling:
5
+ - Easy addition of new adapters without modifying orchestrator code
6
+ - Runtime discovery of available adapters
7
+ - Consistent instantiation across CLI and orchestrator
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any, Type
13
+
14
+ from zwarm.adapters.base import ExecutorAdapter
15
+
16
+
17
+ # Global adapter registry
18
+ _ADAPTERS: dict[str, Type[ExecutorAdapter]] = {}
19
+
20
+
21
+ def register_adapter(name: str):
22
+ """
23
+ Decorator to register an adapter class.
24
+
25
+ Example:
26
+ @register_adapter("codex_mcp")
27
+ class CodexMCPAdapter(ExecutorAdapter):
28
+ ...
29
+ """
30
+
31
+ def decorator(cls: Type[ExecutorAdapter]) -> Type[ExecutorAdapter]:
32
+ cls.name = name
33
+ _ADAPTERS[name] = cls
34
+ return cls
35
+
36
+ return decorator
37
+
38
+
39
+ def get_adapter(name: str, model: str | None = None, **kwargs: Any) -> ExecutorAdapter:
40
+ """
41
+ Get an adapter instance by name.
42
+
43
+ Args:
44
+ name: Registered adapter name (e.g., "codex_mcp", "claude_code")
45
+ model: Optional model override to pass to adapter
46
+ **kwargs: Additional kwargs passed to adapter constructor
47
+
48
+ Returns:
49
+ Instantiated adapter
50
+
51
+ Raises:
52
+ ValueError: If adapter not found
53
+ """
54
+ if name not in _ADAPTERS:
55
+ available = list(_ADAPTERS.keys())
56
+ raise ValueError(
57
+ f"Unknown adapter: {name}. Available: {available}"
58
+ )
59
+ return _ADAPTERS[name](model=model, **kwargs)
60
+
61
+
62
+ def list_adapters() -> list[str]:
63
+ """List all registered adapter names."""
64
+ return list(_ADAPTERS.keys())
65
+
66
+
67
+ def adapter_exists(name: str) -> bool:
68
+ """Check if an adapter is registered."""
69
+ return name in _ADAPTERS
@@ -177,6 +177,56 @@ class TestCodexMCPAdapter:
177
177
  response = adapter._extract_response(result)
178
178
  assert "unknown" in response
179
179
 
180
+ def test_parse_jsonl_output(self, adapter):
181
+ """Test parsing JSONL output from codex exec --json."""
182
+ jsonl_output = """{"type":"thread.started","thread_id":"abc123"}
183
+ {"type":"turn.started"}
184
+ {"type":"item.completed","item":{"id":"item_0","type":"reasoning","text":"Thinking..."}}
185
+ {"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"The answer is 4"}}
186
+ {"type":"turn.completed","usage":{"input_tokens":100,"output_tokens":10}}"""
187
+
188
+ parsed = adapter._parse_jsonl_output(jsonl_output)
189
+
190
+ assert parsed["response"] == "The answer is 4"
191
+ assert parsed["thread_id"] == "abc123"
192
+ assert parsed["usage"]["input_tokens"] == 100
193
+ assert parsed["usage"]["output_tokens"] == 10
194
+ assert len(parsed["events"]) == 5
195
+
196
+ def test_parse_jsonl_output_multiple_messages(self, adapter):
197
+ """Test parsing JSONL with multiple agent messages."""
198
+ jsonl_output = """{"type":"thread.started","thread_id":"xyz"}
199
+ {"type":"item.completed","item":{"type":"agent_message","text":"First part"}}
200
+ {"type":"item.completed","item":{"type":"agent_message","text":"Second part"}}
201
+ {"type":"turn.completed","usage":{"input_tokens":50,"output_tokens":20}}"""
202
+
203
+ parsed = adapter._parse_jsonl_output(jsonl_output)
204
+
205
+ assert parsed["response"] == "First part\nSecond part"
206
+ assert parsed["thread_id"] == "xyz"
207
+
208
+ def test_parse_jsonl_output_empty(self, adapter):
209
+ """Test parsing empty JSONL output."""
210
+ parsed = adapter._parse_jsonl_output("")
211
+ assert parsed["response"] == ""
212
+ assert parsed["usage"] == {}
213
+ assert parsed["thread_id"] is None
214
+
215
+ def test_parse_jsonl_output_malformed_lines(self, adapter):
216
+ """Test parsing JSONL with some malformed lines."""
217
+ jsonl_output = """{"type":"thread.started","thread_id":"test123"}
218
+ not valid json
219
+ {"type":"item.completed","item":{"type":"agent_message","text":"Valid response"}}
220
+ also not json
221
+ {"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}"""
222
+
223
+ parsed = adapter._parse_jsonl_output(jsonl_output)
224
+
225
+ # Should still extract valid data
226
+ assert parsed["response"] == "Valid response"
227
+ assert parsed["thread_id"] == "test123"
228
+ assert len(parsed["events"]) == 3 # Only valid JSON lines
229
+
180
230
 
181
231
  @pytest.mark.integration
182
232
  class TestCodexMCPIntegration:
@@ -0,0 +1,68 @@
1
+ """Tests for the adapter registry."""
2
+
3
+ import pytest
4
+
5
+ from zwarm.adapters import (
6
+ get_adapter,
7
+ list_adapters,
8
+ adapter_exists,
9
+ ExecutorAdapter,
10
+ )
11
+
12
+
13
+ class TestAdapterRegistry:
14
+ """Test adapter registration and retrieval."""
15
+
16
+ def test_list_adapters_includes_builtins(self):
17
+ """Built-in adapters are registered on import."""
18
+ adapters = list_adapters()
19
+ assert "codex_mcp" in adapters
20
+ assert "claude_code" in adapters
21
+
22
+ def test_get_adapter_codex(self):
23
+ """Can retrieve codex adapter by name."""
24
+ adapter = get_adapter("codex_mcp")
25
+ assert isinstance(adapter, ExecutorAdapter)
26
+ assert adapter.name == "codex_mcp"
27
+
28
+ def test_get_adapter_claude(self):
29
+ """Can retrieve claude adapter by name."""
30
+ adapter = get_adapter("claude_code")
31
+ assert isinstance(adapter, ExecutorAdapter)
32
+ assert adapter.name == "claude_code"
33
+
34
+ def test_get_adapter_with_model(self):
35
+ """Model parameter is passed to adapter."""
36
+ adapter = get_adapter("codex_mcp", model="custom-model")
37
+ # The model should be set (adapters store it as _model)
38
+ assert adapter._model == "custom-model"
39
+
40
+ def test_get_unknown_adapter(self):
41
+ """Unknown adapter raises ValueError."""
42
+ with pytest.raises(ValueError) as exc_info:
43
+ get_adapter("nonexistent_adapter")
44
+ assert "Unknown adapter" in str(exc_info.value)
45
+ assert "nonexistent_adapter" in str(exc_info.value)
46
+
47
+ def test_adapter_exists(self):
48
+ """adapter_exists returns correct boolean."""
49
+ assert adapter_exists("codex_mcp") is True
50
+ assert adapter_exists("claude_code") is True
51
+ assert adapter_exists("nonexistent") is False
52
+
53
+
54
+ class TestAdapterInstances:
55
+ """Test that retrieved adapters are independent instances."""
56
+
57
+ def test_separate_instances(self):
58
+ """Each get_adapter call returns a new instance."""
59
+ adapter1 = get_adapter("codex_mcp")
60
+ adapter2 = get_adapter("codex_mcp")
61
+ assert adapter1 is not adapter2
62
+
63
+ def test_different_models(self):
64
+ """Can create adapters with different models."""
65
+ adapter1 = get_adapter("codex_mcp", model="model-a")
66
+ adapter2 = get_adapter("codex_mcp", model="model-b")
67
+ assert adapter1._model == "model-a"
68
+ assert adapter2._model == "model-b"