zwarm 3.10.3__py3-none-any.whl → 3.10.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ """
2
+ Compression modules for infinite-running agents.
3
+
4
+ Two types of compression:
5
+ 1. TC (Tool Call) Compression - compresses tool call results before they enter context
6
+ 2. Rollout Compression - manages message history eviction (LRU-style)
7
+
8
+ These modules allow agents to run virtually indefinitely without context explosion.
9
+ """
10
+
11
+ from .tc_compression import (
12
+ TCCompressor,
13
+ NoOpTCCompressor,
14
+ NaiveSizeTCCompressor,
15
+ get_tc_compressor,
16
+ )
17
+ from .rollout_compression import (
18
+ RolloutCompressor,
19
+ NoOpRolloutCompressor,
20
+ LRURolloutCompressor,
21
+ SlidingWindowRolloutCompressor,
22
+ get_rollout_compressor,
23
+ )
24
+
25
+ __all__ = [
26
+ # TC Compression
27
+ "TCCompressor",
28
+ "NoOpTCCompressor",
29
+ "NaiveSizeTCCompressor",
30
+ "get_tc_compressor",
31
+ # Rollout Compression
32
+ "RolloutCompressor",
33
+ "NoOpRolloutCompressor",
34
+ "LRURolloutCompressor",
35
+ "SlidingWindowRolloutCompressor",
36
+ "get_rollout_compressor",
37
+ ]
@@ -0,0 +1,292 @@
1
+ """
2
+ Rollout Compression - manages message history eviction for infinite-running agents.
3
+
4
+ As agents run, their conversation history grows. These compressors implement
5
+ different strategies for evicting old messages to keep context bounded.
6
+
7
+ Available compressors:
8
+ - NoOpRolloutCompressor: No eviction (context will eventually overflow)
9
+ - LRURolloutCompressor: Evict oldest messages, keeping system prompt
10
+ - SlidingWindowRolloutCompressor: Keep last N turns (user+assistant pairs)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from abc import ABC, abstractmethod
16
+ from dataclasses import dataclass, field
17
+ from typing import Any
18
+
19
+
20
+ @dataclass
21
+ class EvictionStats:
22
+ """Statistics about message eviction."""
23
+
24
+ messages_before: int = 0
25
+ messages_after: int = 0
26
+ messages_evicted: int = 0
27
+ tokens_evicted_estimate: int = 0 # Rough estimate
28
+ eviction_triggered: bool = False
29
+
30
+ def to_dict(self) -> dict[str, Any]:
31
+ return {
32
+ "messages_before": self.messages_before,
33
+ "messages_after": self.messages_after,
34
+ "messages_evicted": self.messages_evicted,
35
+ "tokens_evicted_estimate": self.tokens_evicted_estimate,
36
+ "eviction_triggered": self.eviction_triggered,
37
+ }
38
+
39
+
40
+ class RolloutCompressor(ABC):
41
+ """
42
+ Abstract base class for rollout (message history) compression.
43
+
44
+ Subclasses implement different eviction strategies to keep the
45
+ conversation history bounded while preserving important context.
46
+ """
47
+
48
+ name: str = "base"
49
+
50
+ @abstractmethod
51
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
52
+ """
53
+ Compress message history, returning trimmed version and stats.
54
+
55
+ Args:
56
+ messages: List of message dicts with 'role' and 'content' keys
57
+
58
+ Returns:
59
+ (compressed_messages, eviction_stats)
60
+ """
61
+ pass
62
+
63
+ def should_compress(self, messages: list[dict]) -> bool:
64
+ """Check if compression is needed (subclasses may override)."""
65
+ return True
66
+
67
+ def __repr__(self) -> str:
68
+ return f"{self.__class__.__name__}()"
69
+
70
+
71
+ class NoOpRolloutCompressor(RolloutCompressor):
72
+ """
73
+ No-op compressor - keeps all messages.
74
+
75
+ Use this when you want to disable rollout compression and let the
76
+ context window naturally overflow (will error eventually).
77
+ """
78
+
79
+ name = "noop"
80
+
81
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
82
+ """Pass through unchanged."""
83
+ return messages, EvictionStats(
84
+ messages_before=len(messages),
85
+ messages_after=len(messages),
86
+ eviction_triggered=False,
87
+ )
88
+
89
+
90
+ class LRURolloutCompressor(RolloutCompressor):
91
+ """
92
+ LRU (Least Recently Used) compressor - evicts oldest messages.
93
+
94
+ Keeps the system prompt and the most recent messages. When the message
95
+ count exceeds max_messages, evicts oldest non-system messages.
96
+
97
+ Args:
98
+ max_messages: Maximum messages to keep (default: 50)
99
+ preserve_system: Keep all system messages (default: True)
100
+ preserve_first_user: Keep first user message as context (default: True)
101
+ """
102
+
103
+ name = "lru"
104
+
105
+ def __init__(
106
+ self,
107
+ max_messages: int = 50,
108
+ preserve_system: bool = True,
109
+ preserve_first_user: bool = True,
110
+ ):
111
+ self.max_messages = max_messages
112
+ self.preserve_system = preserve_system
113
+ self.preserve_first_user = preserve_first_user
114
+
115
+ def should_compress(self, messages: list[dict]) -> bool:
116
+ """Only compress if we exceed max_messages."""
117
+ return len(messages) > self.max_messages
118
+
119
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
120
+ """Evict oldest messages, keeping system prompt and recent history."""
121
+ stats = EvictionStats(messages_before=len(messages))
122
+
123
+ if not self.should_compress(messages):
124
+ stats.messages_after = len(messages)
125
+ return messages, stats
126
+
127
+ # Separate preserved messages from evictable ones
128
+ preserved = []
129
+ evictable = []
130
+
131
+ first_user_seen = False
132
+ for i, msg in enumerate(messages):
133
+ role = msg.get("role", "")
134
+
135
+ # Always preserve system messages
136
+ if self.preserve_system and role == "system":
137
+ preserved.append((i, msg))
138
+ # Preserve first user message as task context
139
+ elif self.preserve_first_user and role == "user" and not first_user_seen:
140
+ preserved.append((i, msg))
141
+ first_user_seen = True
142
+ else:
143
+ evictable.append((i, msg))
144
+
145
+ # Calculate how many evictable messages to keep
146
+ preserved_count = len(preserved)
147
+ keep_count = max(0, self.max_messages - preserved_count)
148
+
149
+ # Keep the most recent evictable messages
150
+ kept_evictable = evictable[-keep_count:] if keep_count > 0 else []
151
+ evicted = evictable[:-keep_count] if keep_count > 0 and len(evictable) > keep_count else []
152
+
153
+ # Merge preserved and kept messages, maintaining original order
154
+ all_kept = preserved + kept_evictable
155
+ all_kept.sort(key=lambda x: x[0]) # Sort by original index
156
+ result = [msg for _, msg in all_kept]
157
+
158
+ # Estimate tokens evicted (rough: ~4 chars per token)
159
+ evicted_content = sum(len(str(msg.get("content", ""))) for _, msg in evicted)
160
+ tokens_evicted = evicted_content // 4
161
+
162
+ stats.messages_after = len(result)
163
+ stats.messages_evicted = len(evicted)
164
+ stats.tokens_evicted_estimate = tokens_evicted
165
+ stats.eviction_triggered = len(evicted) > 0
166
+
167
+ return result, stats
168
+
169
+ def __repr__(self) -> str:
170
+ return f"LRURolloutCompressor(max_messages={self.max_messages})"
171
+
172
+
173
+ class SlidingWindowRolloutCompressor(RolloutCompressor):
174
+ """
175
+ Sliding window compressor - keeps last N turns (user+assistant pairs).
176
+
177
+ A "turn" is a user message followed by an assistant response. This
178
+ preserves conversation coherence better than raw message count.
179
+
180
+ Args:
181
+ max_turns: Maximum turns to keep (default: 20)
182
+ preserve_system: Keep all system messages (default: True)
183
+ preserve_first_turn: Keep first turn as context (default: True)
184
+ """
185
+
186
+ name = "sliding_window"
187
+
188
+ def __init__(
189
+ self,
190
+ max_turns: int = 20,
191
+ preserve_system: bool = True,
192
+ preserve_first_turn: bool = True,
193
+ ):
194
+ self.max_turns = max_turns
195
+ self.preserve_system = preserve_system
196
+ self.preserve_first_turn = preserve_first_turn
197
+
198
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
199
+ """Keep last N turns, preserving system messages."""
200
+ stats = EvictionStats(messages_before=len(messages))
201
+
202
+ # Extract system messages
203
+ system_messages = []
204
+ conversation = []
205
+
206
+ for msg in messages:
207
+ if msg.get("role") == "system":
208
+ system_messages.append(msg)
209
+ else:
210
+ conversation.append(msg)
211
+
212
+ # Group conversation into turns (user + assistant + tool results)
213
+ turns: list[list[dict]] = []
214
+ current_turn: list[dict] = []
215
+
216
+ for msg in conversation:
217
+ role = msg.get("role", "")
218
+ if role == "user" and current_turn:
219
+ # New user message starts a new turn
220
+ turns.append(current_turn)
221
+ current_turn = [msg]
222
+ else:
223
+ current_turn.append(msg)
224
+
225
+ # Don't forget the last turn
226
+ if current_turn:
227
+ turns.append(current_turn)
228
+
229
+ # Decide which turns to keep
230
+ if len(turns) <= self.max_turns:
231
+ # No eviction needed
232
+ result = system_messages + conversation
233
+ stats.messages_after = len(result)
234
+ return result, stats
235
+
236
+ # Keep first turn + last (max_turns - 1) turns
237
+ kept_turns = []
238
+ if self.preserve_first_turn and turns:
239
+ kept_turns.append(turns[0])
240
+ remaining_turns = turns[1:]
241
+ kept_turns.extend(remaining_turns[-(self.max_turns - 1):])
242
+ else:
243
+ kept_turns = turns[-self.max_turns:]
244
+
245
+ # Flatten kept turns back into messages
246
+ kept_conversation = []
247
+ for turn in kept_turns:
248
+ kept_conversation.extend(turn)
249
+
250
+ result = system_messages + kept_conversation
251
+
252
+ # Calculate eviction stats
253
+ evicted_count = len(messages) - len(result)
254
+ stats.messages_after = len(result)
255
+ stats.messages_evicted = evicted_count
256
+ stats.eviction_triggered = evicted_count > 0
257
+
258
+ return result, stats
259
+
260
+ def __repr__(self) -> str:
261
+ return f"SlidingWindowRolloutCompressor(max_turns={self.max_turns})"
262
+
263
+
264
+ # =============================================================================
265
+ # Factory
266
+ # =============================================================================
267
+
268
+
269
+ def get_rollout_compressor(
270
+ name: str = "lru",
271
+ **kwargs,
272
+ ) -> RolloutCompressor:
273
+ """
274
+ Get a rollout compressor by name.
275
+
276
+ Args:
277
+ name: Compressor name ("noop", "lru", "sliding_window")
278
+ **kwargs: Passed to compressor constructor
279
+
280
+ Returns:
281
+ Configured RolloutCompressor instance
282
+ """
283
+ compressors = {
284
+ "noop": NoOpRolloutCompressor,
285
+ "lru": LRURolloutCompressor,
286
+ "sliding_window": SlidingWindowRolloutCompressor,
287
+ }
288
+
289
+ if name not in compressors:
290
+ raise ValueError(f"Unknown rollout compressor: {name}. Available: {list(compressors.keys())}")
291
+
292
+ return compressors[name](**kwargs)
@@ -0,0 +1,165 @@
1
+ """
2
+ Tool Call (TC) Compression - compresses tool results before they enter context.
3
+
4
+ When an agent makes a tool call, the result can be arbitrarily large. These
5
+ compressors marshal results into a more digestible format for the agent.
6
+
7
+ Available compressors:
8
+ - NoOpTCCompressor: Pass-through, no compression (default for now)
9
+ - NaiveSizeTCCompressor: Truncate to last N characters
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from abc import ABC, abstractmethod
15
+ from typing import Any
16
+
17
+
18
+ class TCCompressor(ABC):
19
+ """
20
+ Abstract base class for tool call result compression.
21
+
22
+ Subclasses implement different compression strategies to prevent
23
+ tool results from exploding the agent's context window.
24
+ """
25
+
26
+ name: str = "base"
27
+
28
+ @abstractmethod
29
+ def compress(self, tool_name: str, result: Any) -> Any:
30
+ """
31
+ Compress a tool call result.
32
+
33
+ Args:
34
+ tool_name: Name of the tool that was called
35
+ result: The raw result from the tool
36
+
37
+ Returns:
38
+ Compressed result (same type or string)
39
+ """
40
+ pass
41
+
42
+ def __repr__(self) -> str:
43
+ return f"{self.__class__.__name__}()"
44
+
45
+
46
+ class NoOpTCCompressor(TCCompressor):
47
+ """
48
+ No-op compressor - passes results through unchanged.
49
+
50
+ Use this when tool results are already well-bounded or when you want
51
+ to disable compression entirely.
52
+ """
53
+
54
+ name = "noop"
55
+
56
+ def compress(self, tool_name: str, result: Any) -> Any:
57
+ """Pass through unchanged."""
58
+ return result
59
+
60
+
61
+ class NaiveSizeTCCompressor(TCCompressor):
62
+ """
63
+ Naive size-based compressor - truncates results to last N characters.
64
+
65
+ Simple but effective: keeps the most recent output which is usually
66
+ the most relevant (e.g., last N chars of a log file).
67
+
68
+ Args:
69
+ max_chars: Maximum characters to keep (default: 25000)
70
+ truncation_marker: String to prepend when truncated
71
+ """
72
+
73
+ name = "naive_size"
74
+
75
+ def __init__(
76
+ self,
77
+ max_chars: int = 25000,
78
+ truncation_marker: str = "... [truncated, showing last {n} chars] ...\n",
79
+ ):
80
+ self.max_chars = max_chars
81
+ self.truncation_marker = truncation_marker
82
+
83
+ def compress(self, tool_name: str, result: Any) -> Any:
84
+ """Truncate to last max_chars characters if needed."""
85
+ # Handle dict results (common for our tools)
86
+ if isinstance(result, dict):
87
+ return self._compress_dict(result)
88
+
89
+ # Handle string results
90
+ if isinstance(result, str):
91
+ return self._truncate_string(result)
92
+
93
+ # Handle list results
94
+ if isinstance(result, list):
95
+ return self._compress_list(result)
96
+
97
+ # For other types, convert to string and truncate
98
+ result_str = str(result)
99
+ return self._truncate_string(result_str)
100
+
101
+ def _truncate_string(self, s: str) -> str:
102
+ """Truncate string to last max_chars."""
103
+ if len(s) <= self.max_chars:
104
+ return s
105
+
106
+ # Keep last N chars with marker
107
+ marker = self.truncation_marker.format(n=self.max_chars)
108
+ keep_chars = self.max_chars - len(marker)
109
+ return marker + s[-keep_chars:]
110
+
111
+ def _compress_dict(self, d: dict) -> dict:
112
+ """Recursively compress dict values."""
113
+ compressed = {}
114
+ for key, value in d.items():
115
+ if isinstance(value, str):
116
+ compressed[key] = self._truncate_string(value)
117
+ elif isinstance(value, dict):
118
+ compressed[key] = self._compress_dict(value)
119
+ elif isinstance(value, list):
120
+ compressed[key] = self._compress_list(value)
121
+ else:
122
+ compressed[key] = value
123
+ return compressed
124
+
125
+ def _compress_list(self, lst: list) -> list:
126
+ """Compress list items."""
127
+ return [
128
+ self._truncate_string(item) if isinstance(item, str)
129
+ else self._compress_dict(item) if isinstance(item, dict)
130
+ else item
131
+ for item in lst
132
+ ]
133
+
134
+ def __repr__(self) -> str:
135
+ return f"NaiveSizeTCCompressor(max_chars={self.max_chars})"
136
+
137
+
138
+ # =============================================================================
139
+ # Factory
140
+ # =============================================================================
141
+
142
+
143
+ def get_tc_compressor(
144
+ name: str = "noop",
145
+ **kwargs,
146
+ ) -> TCCompressor:
147
+ """
148
+ Get a TC compressor by name.
149
+
150
+ Args:
151
+ name: Compressor name ("noop", "naive_size")
152
+ **kwargs: Passed to compressor constructor
153
+
154
+ Returns:
155
+ Configured TCCompressor instance
156
+ """
157
+ compressors = {
158
+ "noop": NoOpTCCompressor,
159
+ "naive_size": NaiveSizeTCCompressor,
160
+ }
161
+
162
+ if name not in compressors:
163
+ raise ValueError(f"Unknown TC compressor: {name}. Available: {list(compressors.keys())}")
164
+
165
+ return compressors[name](**kwargs)
zwarm/core/config.py CHANGED
@@ -40,9 +40,18 @@ class ExecutorConfig:
40
40
  # Note: web_search is always enabled via .codex/config.toml (set up by `zwarm init`)
41
41
 
42
42
 
43
+ @dataclass
44
+ class TCCompressionConfig:
45
+ """Configuration for tool call result compression."""
46
+
47
+ enabled: bool = True
48
+ compressor: str = "naive_size" # noop | naive_size
49
+ max_chars: int = 25000 # For naive_size compressor
50
+
51
+
43
52
  @dataclass
44
53
  class CompactionConfig:
45
- """Configuration for context window compaction."""
54
+ """Configuration for context window compaction (rollout compression)."""
46
55
 
47
56
  enabled: bool = True
48
57
  max_tokens: int = 100000 # Trigger compaction when estimated tokens exceed this
@@ -62,7 +71,10 @@ class OrchestratorConfig:
62
71
  max_steps: int = 50
63
72
  max_steps_per_turn: int = 60 # Max tool-call steps before returning to user (pilot mode)
64
73
  parallel_delegations: int = 4
65
- compaction: CompactionConfig = field(default_factory=CompactionConfig)
74
+
75
+ # Compression settings for infinite-running agents
76
+ compaction: CompactionConfig = field(default_factory=CompactionConfig) # Rollout compression
77
+ tc_compression: TCCompressionConfig = field(default_factory=TCCompressionConfig) # Tool call compression
66
78
 
67
79
  # Directory restrictions for agent delegations
68
80
  # None = only working_dir allowed (most restrictive, default)
@@ -115,10 +127,13 @@ class ZwarmConfig:
115
127
  orchestrator_data = data.get("orchestrator", {})
116
128
  watchers_data = data.get("watchers", {})
117
129
 
118
- # Parse compaction config from orchestrator
130
+ # Parse compression configs from orchestrator
119
131
  compaction_data = orchestrator_data.pop("compaction", {}) if orchestrator_data else {}
120
132
  compaction_config = CompactionConfig(**compaction_data) if compaction_data else CompactionConfig()
121
133
 
134
+ tc_compression_data = orchestrator_data.pop("tc_compression", {}) if orchestrator_data else {}
135
+ tc_compression_config = TCCompressionConfig(**tc_compression_data) if tc_compression_data else TCCompressionConfig()
136
+
122
137
  # Parse watchers config - handle both list shorthand and dict format
123
138
  if isinstance(watchers_data, list):
124
139
  # Shorthand: watchers: [progress, budget, scope]
@@ -140,11 +155,18 @@ class ZwarmConfig:
140
155
  message_role=watchers_data.get("message_role", "user"),
141
156
  )
142
157
 
143
- # Build orchestrator config with nested compaction
158
+ # Build orchestrator config with nested compression configs
144
159
  if orchestrator_data:
145
- orchestrator_config = OrchestratorConfig(**orchestrator_data, compaction=compaction_config)
160
+ orchestrator_config = OrchestratorConfig(
161
+ **orchestrator_data,
162
+ compaction=compaction_config,
163
+ tc_compression=tc_compression_config,
164
+ )
146
165
  else:
147
- orchestrator_config = OrchestratorConfig(compaction=compaction_config)
166
+ orchestrator_config = OrchestratorConfig(
167
+ compaction=compaction_config,
168
+ tc_compression=tc_compression_config,
169
+ )
148
170
 
149
171
  return cls(
150
172
  weave=WeaveConfig(**weave_data) if weave_data else WeaveConfig(),
@@ -183,6 +205,11 @@ class ZwarmConfig:
183
205
  "keep_first_n": self.orchestrator.compaction.keep_first_n,
184
206
  "keep_last_n": self.orchestrator.compaction.keep_last_n,
185
207
  },
208
+ "tc_compression": {
209
+ "enabled": self.orchestrator.tc_compression.enabled,
210
+ "compressor": self.orchestrator.tc_compression.compressor,
211
+ "max_chars": self.orchestrator.tc_compression.max_chars,
212
+ },
186
213
  },
187
214
  "watchers": {
188
215
  "enabled": self.watchers.enabled,
zwarm/orchestrator.py CHANGED
@@ -83,6 +83,8 @@ class Orchestrator(YamlAgent):
83
83
  )
84
84
  # Callback for step progress (used by CLI to print tool calls)
85
85
  _step_callback: Callable[[int, list[tuple[dict[str, Any], Any]]], None] | None = PrivateAttr(default=None)
86
+ # TC compression for tool call results
87
+ _tc_compressor: Any = PrivateAttr(default=None)
86
88
 
87
89
  def model_post_init(self, __context: Any) -> None:
88
90
  """Initialize state after model creation."""
@@ -132,6 +134,14 @@ class Orchestrator(YamlAgent):
132
134
  from zwarm.sessions import CodexSessionManager
133
135
  self._session_manager = CodexSessionManager(self.working_dir / ".zwarm")
134
136
 
137
+ # Initialize TC compressor for tool call result compression
138
+ if self.config.orchestrator.tc_compression.enabled:
139
+ from zwarm.compression import get_tc_compressor
140
+ self._tc_compressor = get_tc_compressor(
141
+ name=self.config.orchestrator.tc_compression.compressor,
142
+ max_chars=self.config.orchestrator.tc_compression.max_chars,
143
+ )
144
+
135
145
  # Link session manager to environment for live session visibility in observe()
136
146
  if hasattr(self.env, "set_session_manager"):
137
147
  self.env.set_session_manager(self._session_manager)
@@ -532,6 +542,10 @@ Review what was accomplished in the previous session and delegate new tasks as n
532
542
  else:
533
543
  tc_output = f"Unknown tool: {tc_name}"
534
544
 
545
+ # Apply TC compression to reduce context usage
546
+ if self._tc_compressor is not None:
547
+ tc_output = self._tc_compressor.compress(tc_name, tc_output)
548
+
535
549
  # Collect tool call info and result
536
550
  tool_call_info = {
537
551
  "name": tc_name,
@@ -48,16 +48,18 @@ You command executor agents - capable coding agents that handle specific tasks.
48
48
 
49
49
  **converse(session_id, message)** - Send follow-up to an executor. Returns immediately.
50
50
 
51
- **peek_session(session_id)** - Quick poll: {is_running, status}. Use in polling loops.
52
-
53
- **check_session(session_id)** - Get FULL result. Complete response, tokens, runtime.
51
+ **list_sessions(status=None)** - Dashboard of all executors. Shows status, preview, and `needs_attention` flag.
52
+ - `status`: Filter by "running", "completed", "failed", or None for all
53
+ - Use this to check which sessions are done before calling check_session.
54
54
 
55
- **get_trajectory(session_id, full=False)** - See what steps the agent took.
56
- - `full=True`: Complete untruncated details (debugging)
57
- - `full=False`: Concise summaries (default)
55
+ **check_session(session_id, latest=True)** - Get session result.
56
+ - `latest=True` (default): Only the latest response (keeps context small)
57
+ - `latest=False`: Full conversation history
58
+ - Returns: status, response, tokens, runtime
58
59
 
59
- **list_sessions(status=None)** - See all executors. `needs_attention=True` = ready for review.
60
- - `status`: Filter by "running", "completed", "failed", or None for all
60
+ **get_trajectory(session_id)** - Debug tool: see step-by-step what the agent did.
61
+ - Use when a session failed or went off-rails to understand what happened.
62
+ - Returns concise summaries of each step.
61
63
 
62
64
  **end_session(session_id, reason=None, delete=False)** - End an executor.
63
65
  - `delete=True`: Remove from list entirely
@@ -81,9 +83,9 @@ All executor sessions run in the background. delegate() and converse() return im
81
83
  ```
82
84
  1. delegate(task, model="5.2") → session_id
83
85
  2. sleep(30)
84
- 3. peek_session(id) → done?
85
- 4. If running, goto 2
86
- 5. check_session(id) → FULL result
86
+ 3. list_sessions() → check needs_attention
87
+ 4. If still running, goto 2
88
+ 5. check_session(id) → get result
87
89
  ```
88
90
 
89
91
  **Parallel work:**
@@ -91,8 +93,8 @@ All executor sessions run in the background. delegate() and converse() return im
91
93
  1. delegate(task1) → session_a
92
94
  2. delegate(task2) → session_b
93
95
  3. sleep(30)
94
- 4. list_sessions() → see needs_attention
95
- 5. check_session() for each done
96
+ 4. list_sessions() → see which have needs_attention=True
97
+ 5. check_session(id) for each done
96
98
  6. Repeat until all complete
97
99
  ```
98
100
 
zwarm/prompts/pilot.py CHANGED
@@ -54,16 +54,18 @@ You command executor agents - capable coding agents that do specific tasks. Thin
54
54
 
55
55
  **converse(session_id, message)** - Send follow-up to a crew member. Returns immediately.
56
56
 
57
- **peek_session(session_id)** - Quick status check. Use for polling: {is_running, status}
58
-
59
- **check_session(session_id)** - Get FULL result. Complete response, tokens, runtime.
57
+ **list_sessions(status=None)** - Dashboard of all crew. Shows status, preview, and `needs_attention` flag.
58
+ - `status`: Filter by "running", "completed", "failed", or None for all
59
+ - Use this to check which sessions are done before calling check_session.
60
60
 
61
- **get_trajectory(session_id, full=False)** - See what steps the agent took.
62
- - `full=True`: Show complete untruncated content for all steps (debugging)
63
- - `full=False`: Concise summaries (default)
61
+ **check_session(session_id, latest=True)** - Get session result.
62
+ - `latest=True` (default): Only the latest response (keeps context small)
63
+ - `latest=False`: Full conversation history
64
+ - Returns: status, response, tokens, runtime
64
65
 
65
- **list_sessions(status=None)** - See all crew. `needs_attention=True` means ready for review.
66
- - `status`: Filter by "running", "completed", "failed", or None for all
66
+ **get_trajectory(session_id)** - Debug tool: see step-by-step what the agent did.
67
+ - Use when a session failed or went off-rails to understand what happened.
68
+ - Returns concise summaries of each step (reasoning, commands, tool calls).
67
69
 
68
70
  **end_session(session_id, reason=None, delete=False)** - Dismiss a crew member.
69
71
  - `reason`: Optional note about why
@@ -80,9 +82,9 @@ NOTE: Only use the tools listed above. Do NOT use `list_agents`, `run_agent`, `e
80
82
  ```
81
83
  1. delegate(task, model="5.2") → session_id # or model="opus" for complex tasks
82
84
  2. sleep(30)
83
- 3. peek_session(id) → done?
84
- 4. If running, goto 2
85
- 5. check_session(id) → FULL result
85
+ 3. list_sessions() → see which are done (needs_attention=True)
86
+ 4. If all still running, goto 2
87
+ 5. check_session(id) → get the result
86
88
  ```
87
89
 
88
90
  Parallelize freely - dispatch multiple crew, sleep, check which finished.
zwarm/tools/delegation.py CHANGED
@@ -34,17 +34,43 @@ if TYPE_CHECKING:
34
34
  ADAPTERS = ["codex", "claude"]
35
35
 
36
36
 
37
- def _get_session_manager(orchestrator: "Orchestrator"):
37
+ def _get_all_adapter_managers(orchestrator: "Orchestrator") -> dict:
38
38
  """
39
- Get the default session manager for list/get operations.
39
+ Get session managers for ALL adapters.
40
40
 
41
- Uses CodexSessionManager as the default since all adapters share
42
- the same .zwarm/sessions/ directory structure.
41
+ This ensures we can find sessions regardless of which adapter created them.
42
+ Returns a dict of {adapter_name: manager}.
43
43
  """
44
- if not hasattr(orchestrator, "_session_manager") or orchestrator._session_manager is None:
45
- from zwarm.sessions import CodexSessionManager
46
- orchestrator._session_manager = CodexSessionManager(orchestrator.working_dir / ".zwarm")
47
- return orchestrator._session_manager
44
+ # Initialize adapter managers dict if needed
45
+ if not hasattr(orchestrator, "_adapter_managers"):
46
+ orchestrator._adapter_managers = {}
47
+
48
+ # Ensure all adapters have managers
49
+ from zwarm.sessions import get_session_manager
50
+ for adapter in ADAPTERS:
51
+ if adapter not in orchestrator._adapter_managers:
52
+ manager = get_session_manager(adapter, str(orchestrator.working_dir / ".zwarm"))
53
+ orchestrator._adapter_managers[adapter] = manager
54
+
55
+ return orchestrator._adapter_managers
56
+
57
+
58
+ def _get_session_with_manager(orchestrator: "Orchestrator", session_id: str):
59
+ """
60
+ Find a session across all adapters and return (session, manager).
61
+
62
+ Since sessions are adapter-scoped, we need to check each adapter's
63
+ manager to find where the session lives.
64
+
65
+ Returns:
66
+ (session, manager) if found, (None, None) if not found
67
+ """
68
+ managers = _get_all_adapter_managers(orchestrator)
69
+ for adapter, manager in managers.items():
70
+ session = manager.get_session(session_id)
71
+ if session:
72
+ return session, manager
73
+ return None, None
48
74
 
49
75
 
50
76
  def _get_adapter_manager(orchestrator: "Orchestrator", adapter: str):
@@ -177,7 +203,7 @@ def delegate(
177
203
  WORKFLOW:
178
204
  1. delegate(task="...", model="5.2") -> session_id
179
205
  2. sleep(30)
180
- 3. peek_session(session_id) -> {is_running: true/false}
206
+ 3. list_sessions() -> check which are done (needs_attention=True)
181
207
  4. If is_running, goto 2
182
208
  5. check_session(session_id) -> FULL response
183
209
 
@@ -295,9 +321,8 @@ def converse(
295
321
  sleep(30)
296
322
  check_session(session_id) # Get response
297
323
  """
298
- # First get session to determine adapter
299
- default_manager = _get_session_manager(self)
300
- session = default_manager.get_session(session_id)
324
+ # Find session across all adapters
325
+ session, manager = _get_session_with_manager(self, session_id)
301
326
 
302
327
  if not session:
303
328
  return {
@@ -322,9 +347,8 @@ def converse(
322
347
  "hint": "Start a new session with delegate()",
323
348
  }
324
349
 
325
- # Get the correct adapter manager for this session
350
+ # Use the manager that found the session (correct adapter)
326
351
  adapter = getattr(session, "adapter", "codex")
327
- manager = _get_adapter_manager(self, adapter)
328
352
 
329
353
  # Inject the follow-up message
330
354
  # This uses the adapter's inject_message() which:
@@ -356,25 +380,22 @@ def converse(
356
380
  def check_session(
357
381
  self: "Orchestrator",
358
382
  session_id: str,
383
+ latest: bool = True,
359
384
  ) -> dict[str, Any]:
360
385
  """
361
- Check the status of a session and get the FULL response.
386
+ Check the status of a session and get the response.
362
387
 
363
388
  This is your primary tool for seeing what an executor accomplished.
364
- Returns the complete, untruncated response from the agent.
365
-
366
- Use this after peek_session() shows the session is done, or when
367
- you need to see the full details of what was accomplished.
368
389
 
369
390
  Args:
370
391
  session_id: The session to check.
392
+ latest: If True (default), only return the latest assistant message.
393
+ If False, return full conversation history.
371
394
 
372
395
  Returns:
373
- {session_id, status, response (FULL), tokens, runtime}
396
+ {session_id, status, response, tokens, runtime}
374
397
  """
375
- manager = _get_session_manager(self)
376
-
377
- session = manager.get_session(session_id)
398
+ session, manager = _get_session_with_manager(self, session_id)
378
399
  if not session:
379
400
  return {
380
401
  "success": False,
@@ -382,13 +403,22 @@ def check_session(
382
403
  "hint": "Use list_sessions() to see available sessions",
383
404
  }
384
405
 
385
- # Get latest response - FULL, not truncated
386
- response_text = ""
387
406
  messages = manager.get_messages(session_id)
388
- for msg in reversed(messages):
389
- if msg.role == "assistant":
390
- response_text = msg.content # Full content, no truncation
391
- break
407
+
408
+ if latest:
409
+ # Only get the latest assistant message
410
+ response_text = ""
411
+ for msg in reversed(messages):
412
+ if msg.role == "assistant":
413
+ response_text = msg.content
414
+ break
415
+ response_content = response_text if response_text else "(no response yet)"
416
+ else:
417
+ # Return full conversation history
418
+ response_content = [
419
+ {"role": msg.role, "content": msg.content}
420
+ for msg in messages
421
+ ]
392
422
 
393
423
  # Build log path
394
424
  log_path = str(manager._output_path(session.id, session.turn))
@@ -401,8 +431,8 @@ def check_session(
401
431
  "is_running": session.is_running,
402
432
  "turn": session.turn,
403
433
  "message_count": len(messages),
404
- "task": _truncate(session.task, 80), # Task can stay truncated
405
- "response": response_text if response_text else "(no response yet)", # FULL response
434
+ "task": _truncate(session.task, 80),
435
+ "response": response_content,
406
436
  "tokens": _get_total_tokens(session),
407
437
  "runtime": session.runtime,
408
438
  "log_file": log_path,
@@ -417,51 +447,6 @@ def check_session(
417
447
  return result
418
448
 
419
449
 
420
- @weaveTool
421
- def peek_session(
422
- self: "Orchestrator",
423
- session_id: str,
424
- ) -> dict[str, Any]:
425
- """
426
- Quick peek at a session - minimal info for FAST POLLING.
427
-
428
- Use this in your polling loop to check if a session is done:
429
- 1. delegate() -> start work
430
- 2. sleep(30)
431
- 3. peek_session() -> is_running? If yes, goto 2
432
- 4. check_session() -> get FULL response
433
-
434
- Returns truncated preview only. Once done, use check_session() for full response.
435
-
436
- Args:
437
- session_id: The session to peek at.
438
-
439
- Returns:
440
- {session_id, status, is_running, latest_message (truncated preview)}
441
- """
442
- manager = _get_session_manager(self)
443
-
444
- session = manager.get_session(session_id)
445
- if not session:
446
- return {"success": False, "error": f"Unknown session: {session_id}"}
447
-
448
- # Get latest assistant message only
449
- latest = ""
450
- messages = manager.get_messages(session_id)
451
- for msg in reversed(messages):
452
- if msg.role == "assistant":
453
- latest = msg.content.replace("\n", " ")
454
- break
455
-
456
- return {
457
- "success": True,
458
- "session_id": session.short_id,
459
- "status": session.status.value,
460
- "is_running": session.status.value == "running",
461
- "latest_message": _truncate(latest, 150) if latest else None,
462
- }
463
-
464
-
465
450
  @weaveTool
466
451
  def get_trajectory(
467
452
  self: "Orchestrator",
@@ -487,9 +472,7 @@ def get_trajectory(
487
472
  - check_session() -> what did the agent conclude? (FULL response)
488
473
  - get_trajectory() -> what steps did the agent take? (step-by-step)
489
474
  """
490
- manager = _get_session_manager(self)
491
-
492
- session = manager.get_session(session_id)
475
+ session, manager = _get_session_with_manager(self, session_id)
493
476
  if not session:
494
477
  return {"success": False, "error": f"Unknown session: {session_id}"}
495
478
 
@@ -565,9 +548,7 @@ def end_session(
565
548
  Returns:
566
549
  {session_id, status}
567
550
  """
568
- manager = _get_session_manager(self)
569
-
570
- session = manager.get_session(session_id)
551
+ session, manager = _get_session_with_manager(self, session_id)
571
552
  if not session:
572
553
  return {
573
554
  "success": False,
@@ -594,7 +575,7 @@ def end_session(
594
575
  "session_id": session_id,
595
576
  }
596
577
 
597
- # Refresh
578
+ # Refresh - use same manager
598
579
  session = manager.get_session(session_id)
599
580
 
600
581
  return {
@@ -633,7 +614,8 @@ def list_sessions(
633
614
  """
634
615
  from datetime import datetime
635
616
 
636
- manager = _get_session_manager(self)
617
+ # Get managers for ALL adapters to aggregate sessions
618
+ managers = _get_all_adapter_managers(self)
637
619
 
638
620
  # Map string status to enum
639
621
  from zwarm.sessions import SessionStatus
@@ -648,7 +630,13 @@ def list_sessions(
648
630
  }
649
631
  status_filter = status_map.get(status.lower())
650
632
 
651
- sessions = manager.list_sessions(status=status_filter)
633
+ # Aggregate sessions from ALL adapters
634
+ sessions = []
635
+ for manager in managers.values():
636
+ sessions.extend(manager.list_sessions(status=status_filter))
637
+
638
+ # Sort by created_at descending (newest first)
639
+ sessions.sort(key=lambda s: s.created_at, reverse=True)
652
640
 
653
641
  def time_ago(iso_str: str) -> tuple[str, float]:
654
642
  """Convert ISO timestamp to ('Xm ago', seconds)."""
@@ -681,13 +669,16 @@ def list_sessions(
681
669
 
682
670
  updated_str, updated_secs = time_ago(s.updated_at)
683
671
 
684
- # Get last assistant message
685
- messages = manager.get_messages(s.id)
672
+ # Get last assistant message using the correct adapter's manager
673
+ session_adapter = getattr(s, "adapter", "codex")
674
+ session_manager = managers.get(session_adapter)
686
675
  last_message = ""
687
- for msg in reversed(messages):
688
- if msg.role == "assistant":
689
- last_message = msg.content.replace("\n", " ")
690
- break
676
+ if session_manager:
677
+ messages = session_manager.get_messages(s.id)
678
+ for msg in reversed(messages):
679
+ if msg.role == "assistant":
680
+ last_message = msg.content.replace("\n", " ")
681
+ break
691
682
 
692
683
  # Flag sessions that need attention:
693
684
  # - Recently completed (< 60s)
@@ -739,10 +730,11 @@ def sleep(self, seconds: float) -> dict[str, Any]:
739
730
  give them time to complete before checking their status. This lets you
740
731
  manage your own polling loop:
741
732
 
742
- 1. delegate(task, wait=False) -> start background work
743
- 2. sleep(10) -> wait a bit
744
- 3. peek_session(id) -> check if done
733
+ 1. delegate(task) -> start background work
734
+ 2. sleep(30) -> wait a bit
735
+ 3. list_sessions() -> check which are done (needs_attention=True)
745
736
  4. Repeat 2-3 if still running
737
+ 5. check_session(id) -> get result
746
738
 
747
739
  Args:
748
740
  seconds: Number of seconds to sleep (max 300 = 5 minutes)
@@ -58,7 +58,7 @@ def _extract_tool_call_summary(tc: Any) -> str:
58
58
  elif name == "bash":
59
59
  cmd = args.get("command", "")[:60]
60
60
  return f"$ {cmd}"
61
- elif name in ("check_session", "peek_session", "end_session"):
61
+ elif name in ("check_session", "end_session"):
62
62
  sid = args.get("session_id", "")[:8]
63
63
  return f"{name}({sid})"
64
64
  elif name == "list_sessions":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 3.10.3
3
+ Version: 3.10.5
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: prompt-toolkit>=3.0.52
@@ -1,14 +1,17 @@
1
1
  zwarm/__init__.py,sha256=3i3LMjHwIzE-LFIS2aUrwv3EZmpkvVMe-xj1h97rcSM,837
2
- zwarm/orchestrator.py,sha256=A2Mj7YSdM4QEW7zyiuDbOxI-tzHfyx_XPZG0JxgrDpE,26192
2
+ zwarm/orchestrator.py,sha256=1nQmFTUKzrtvrKi08gg9ow6UHt-KvNozXz1td-yrCbA,26878
3
3
  zwarm/test_orchestrator_watchers.py,sha256=QpoaehPU7ekT4XshbTOWnJ2H0wRveV3QOZjxbgyJJLY,807
4
4
  zwarm/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  zwarm/cli/interactive.py,sha256=Bd2lBKV1qnVjEx3-swCBl0U0T7JlFaCX9JmSxhffr84,41704
6
6
  zwarm/cli/main.py,sha256=WlEpdsdGacoJcSOk3_Rhr9athUGvYcDjixsrFsXg8fE,77251
7
7
  zwarm/cli/pilot.py,sha256=zq7p-R4dflY2UocbDNdDOpok4Lqu97CmzTTxM5Ji1Ow,42331
8
+ zwarm/compression/__init__.py,sha256=tMx9jBVwrxKBtim8l4DdKllsE77yoq0kcYn7KG7tFAU,936
9
+ zwarm/compression/rollout_compression.py,sha256=Y5xjIkLDaCdTilzagbjpS7cpCoAuKpoyG9TC5jR4Hu8,9647
10
+ zwarm/compression/tc_compression.py,sha256=_qBKOWcCmqsU3EIg8_rJzeEgUL9lo4JcOnl2yPleTKQ,4867
8
11
  zwarm/core/__init__.py,sha256=nEdpEHMFo0gEEKgX-eKHabyOdrOI6UXfWqLu3FfZDao,376
9
12
  zwarm/core/checkpoints.py,sha256=D6sXCMB7Sa1kchQ9_lQx_rabwc5-_7jbuynWgA1nkNY,6560
10
13
  zwarm/core/compact.py,sha256=Y8C7Gs-5-WOU43WRvQ863Qzd5xtuEqR6Aw3r2p8_-i8,10907
11
- zwarm/core/config.py,sha256=m3Vm6U_BNtEDu_cz2d6E3p_RNQfRHWaq-946mDru9-8,12656
14
+ zwarm/core/config.py,sha256=jjn-BV5CME_PgIB8gW3o20b5qSNUdPoWWMZp22jnVmg,13813
12
15
  zwarm/core/costs.py,sha256=Z-5o-ZQWRCfFv0mTHev4Ke1AzyXKhXWO6ss7S8eBX9U,1485
13
16
  zwarm/core/environment.py,sha256=v7wwVCTIOt_qfiJEe774oM4vIYnlb28s6LJXucJdjoo,8735
14
17
  zwarm/core/models.py,sha256=PrC3okRBVJxISUa1Fax4KkagqLT6Xub-kTxC9drN0sY,10083
@@ -18,22 +21,22 @@ zwarm/core/test_compact.py,sha256=WSdjCB5t4YMcknsrkmJIUsVOPY28s4y9GnDmu3Z4BFw,11
18
21
  zwarm/core/test_config.py,sha256=bXXd3OHhK-ndC7wAxePWIdpu73s4O1eScxi3xDzrZwA,4828
19
22
  zwarm/core/test_models.py,sha256=sWTIhMZvuLP5AooGR6y8OR2EyWydqVfhmGrE7NPBBnk,8450
20
23
  zwarm/prompts/__init__.py,sha256=DI307o712F8qQyDt5vwnFgpVBrxpKwjhr0MaBHLzr9E,334
21
- zwarm/prompts/orchestrator.py,sha256=PhAQUItwRuy8Y6sk9-Yk719EhZZ_vOGyvSU2tNmaYAQ,6764
22
- zwarm/prompts/pilot.py,sha256=DfjUbOOTHF3CrBVGyp7Pd4RRyGRmQ7rXRUJ6DiuiwwM,6178
24
+ zwarm/prompts/orchestrator.py,sha256=HPoJxtH4_4yLHv4sS2SCYRsUjQo5S7_uGCodm-K97gA,6979
25
+ zwarm/prompts/pilot.py,sha256=ZKrxJHIni3cnT-oWQmRV-S-X_PkdiJnEiLGgCjism3E,6412
23
26
  zwarm/sessions/__init__.py,sha256=5fPkl6JRS_GwPn9hi5iv3dzIpGWu_yghPtvPZdujhnM,1728
24
27
  zwarm/sessions/base.py,sha256=3YBd-WWKslQvsBtu03Blth8cEGc_4k4H3GOoKJoTcgg,16976
25
28
  zwarm/sessions/claude.py,sha256=hBP_TpNFJjR29IRGJFB3rlG7Z9uWEYSbBGV61tpIr00,16672
26
29
  zwarm/sessions/manager.py,sha256=g_QQM9sGdpQ1MK1jdwWMrADeJZY2AqfGDBLVtQasUxg,18520
27
30
  zwarm/tools/__init__.py,sha256=FpqxwXJA6-fQ7C-oLj30jjK_0qqcE7MbI0dQuaB56kU,290
28
- zwarm/tools/delegation.py,sha256=NFMX-f05r28A1OgzYaSMdrq_8VPpP1pJ_nfouYr_2zA,24690
31
+ zwarm/tools/delegation.py,sha256=UTDueeLIpYWMbCN1klPWBUno1zvkhPwqfzO4OtTQiNw,24765
29
32
  zwarm/watchers/__init__.py,sha256=a96s7X6ruYkF2ItWWOZ3Q5QUOMOoeCW4Vz8XXcYLXPM,956
30
33
  zwarm/watchers/base.py,sha256=r1GoPlj06nOT2xp4fghfSjxbRyFFFQUB6HpZbEyO2OY,3834
31
34
  zwarm/watchers/builtin.py,sha256=IL5QwwKOIqWEfJ_uQWb321Px4i5OLtI_vnWQMudqKoA,19064
32
- zwarm/watchers/llm_watcher.py,sha256=yJGpE3BGKNZX3qgPsiNtJ5d3UJpiTT1V-A-Rh4AiMYM,11029
35
+ zwarm/watchers/llm_watcher.py,sha256=bj7snjMdSbarZA9YSPHa660pTU3TnJ6RQ8rSf6YX8ys,11013
33
36
  zwarm/watchers/manager.py,sha256=XZjBVeHjgCUlkTUeHqdvBvHoBC862U1ik0fG6nlRGog,5587
34
37
  zwarm/watchers/registry.py,sha256=A9iBIVIFNtO7KPX0kLpUaP8dAK7ozqWLA44ocJGnOw4,1219
35
38
  zwarm/watchers/test_watchers.py,sha256=zOsxumBqKfR5ZVGxrNlxz6KcWjkcdp0QhW9WB0_20zM,7855
36
- zwarm-3.10.3.dist-info/METADATA,sha256=L2yZPENBjY-rQ9leUD2kP3VhqK6c_4dj2Vujgh36NIM,11761
37
- zwarm-3.10.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
- zwarm-3.10.3.dist-info/entry_points.txt,sha256=u0OXq4q8d3yJ3EkUXwZfkS-Y8Lcy0F8cWrcQfoRxM6Q,46
39
- zwarm-3.10.3.dist-info/RECORD,,
39
+ zwarm-3.10.5.dist-info/METADATA,sha256=XiRWnNB4N5oHNFaweJbjoDT4-5BHiNSdcBniJTk1lFE,11761
40
+ zwarm-3.10.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
41
+ zwarm-3.10.5.dist-info/entry_points.txt,sha256=u0OXq4q8d3yJ3EkUXwZfkS-Y8Lcy0F8cWrcQfoRxM6Q,46
42
+ zwarm-3.10.5.dist-info/RECORD,,
File without changes