zwarm 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/adapters/claude_code.py +55 -3
- zwarm/adapters/codex_mcp.py +433 -122
- zwarm/adapters/test_codex_mcp.py +26 -26
- zwarm/cli/main.py +464 -3
- zwarm/core/compact.py +312 -0
- zwarm/core/config.py +51 -9
- zwarm/core/environment.py +104 -33
- zwarm/core/models.py +16 -0
- zwarm/core/test_compact.py +266 -0
- zwarm/orchestrator.py +222 -39
- zwarm/prompts/orchestrator.py +128 -146
- zwarm/test_orchestrator_watchers.py +23 -0
- zwarm/tools/delegation.py +23 -4
- zwarm/watchers/builtin.py +90 -4
- zwarm/watchers/manager.py +46 -8
- zwarm/watchers/test_watchers.py +42 -0
- {zwarm-0.1.0.dist-info → zwarm-1.0.0.dist-info}/METADATA +162 -36
- zwarm-1.0.0.dist-info/RECORD +33 -0
- zwarm-0.1.0.dist-info/RECORD +0 -30
- {zwarm-0.1.0.dist-info → zwarm-1.0.0.dist-info}/WHEEL +0 -0
- {zwarm-0.1.0.dist-info → zwarm-1.0.0.dist-info}/entry_points.txt +0 -0
zwarm/core/compact.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message compaction for context window management.
|
|
3
|
+
|
|
4
|
+
Safely prunes old messages while preserving:
|
|
5
|
+
- System prompt and initial user task
|
|
6
|
+
- Tool call/response pairs (never orphaned)
|
|
7
|
+
- Recent conversation context
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class CompactionResult:
|
|
21
|
+
"""Result of a compaction operation."""
|
|
22
|
+
|
|
23
|
+
messages: list[dict[str, Any]]
|
|
24
|
+
removed_count: int
|
|
25
|
+
original_count: int
|
|
26
|
+
preserved_reason: str | None = None
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def was_compacted(self) -> bool:
|
|
30
|
+
return self.removed_count > 0
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def estimate_tokens(messages: list[dict[str, Any]]) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Rough token estimate for messages.
|
|
36
|
+
|
|
37
|
+
Uses ~4 chars per token as a simple heuristic.
|
|
38
|
+
This is intentionally conservative.
|
|
39
|
+
"""
|
|
40
|
+
total_chars = 0
|
|
41
|
+
for msg in messages:
|
|
42
|
+
content = msg.get("content", "")
|
|
43
|
+
if isinstance(content, str):
|
|
44
|
+
total_chars += len(content)
|
|
45
|
+
elif isinstance(content, list):
|
|
46
|
+
# Anthropic-style content blocks
|
|
47
|
+
for block in content:
|
|
48
|
+
if isinstance(block, dict):
|
|
49
|
+
total_chars += len(str(block.get("text", "")))
|
|
50
|
+
total_chars += len(str(block.get("input", "")))
|
|
51
|
+
elif isinstance(block, str):
|
|
52
|
+
total_chars += len(block)
|
|
53
|
+
|
|
54
|
+
# Tool calls add tokens too
|
|
55
|
+
tool_calls = msg.get("tool_calls", [])
|
|
56
|
+
for tc in tool_calls:
|
|
57
|
+
total_chars += len(str(tc.get("function", {}).get("arguments", "")))
|
|
58
|
+
|
|
59
|
+
return total_chars // 4
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def find_tool_groups(messages: list[dict[str, Any]]) -> list[tuple[int, int]]:
|
|
63
|
+
"""
|
|
64
|
+
Find message index ranges that form tool call groups.
|
|
65
|
+
|
|
66
|
+
A tool call group is:
|
|
67
|
+
- An assistant message with tool_calls
|
|
68
|
+
- All following tool/user response messages until the next assistant message
|
|
69
|
+
|
|
70
|
+
This handles both OpenAI format (role="tool") and Anthropic format
|
|
71
|
+
(role="user" with tool_result content).
|
|
72
|
+
|
|
73
|
+
Returns list of (start_idx, end_idx) tuples (inclusive).
|
|
74
|
+
"""
|
|
75
|
+
groups = []
|
|
76
|
+
i = 0
|
|
77
|
+
|
|
78
|
+
while i < len(messages):
|
|
79
|
+
msg = messages[i]
|
|
80
|
+
|
|
81
|
+
# Check for tool calls in assistant message
|
|
82
|
+
has_tool_calls = False
|
|
83
|
+
|
|
84
|
+
# OpenAI format: tool_calls field
|
|
85
|
+
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
86
|
+
has_tool_calls = True
|
|
87
|
+
|
|
88
|
+
# Anthropic format: content blocks with type="tool_use"
|
|
89
|
+
if msg.get("role") == "assistant":
|
|
90
|
+
content = msg.get("content", [])
|
|
91
|
+
if isinstance(content, list):
|
|
92
|
+
for block in content:
|
|
93
|
+
if isinstance(block, dict) and block.get("type") == "tool_use":
|
|
94
|
+
has_tool_calls = True
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
if has_tool_calls:
|
|
98
|
+
start = i
|
|
99
|
+
j = i + 1
|
|
100
|
+
|
|
101
|
+
# Find all following tool responses
|
|
102
|
+
while j < len(messages):
|
|
103
|
+
next_msg = messages[j]
|
|
104
|
+
role = next_msg.get("role", "")
|
|
105
|
+
|
|
106
|
+
# OpenAI format: tool role
|
|
107
|
+
if role == "tool":
|
|
108
|
+
j += 1
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# Anthropic format: user message with tool_result
|
|
112
|
+
if role == "user":
|
|
113
|
+
content = next_msg.get("content", [])
|
|
114
|
+
if isinstance(content, list):
|
|
115
|
+
has_tool_result = any(
|
|
116
|
+
isinstance(b, dict) and b.get("type") == "tool_result"
|
|
117
|
+
for b in content
|
|
118
|
+
)
|
|
119
|
+
if has_tool_result:
|
|
120
|
+
j += 1
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
# Not a tool response, stop here
|
|
124
|
+
break
|
|
125
|
+
|
|
126
|
+
groups.append((start, j - 1))
|
|
127
|
+
i = j
|
|
128
|
+
else:
|
|
129
|
+
i += 1
|
|
130
|
+
|
|
131
|
+
return groups
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def compact_messages(
|
|
135
|
+
messages: list[dict[str, Any]],
|
|
136
|
+
keep_first_n: int = 2,
|
|
137
|
+
keep_last_n: int = 10,
|
|
138
|
+
max_tokens: int | None = None,
|
|
139
|
+
target_token_pct: float = 0.7,
|
|
140
|
+
) -> CompactionResult:
|
|
141
|
+
"""
|
|
142
|
+
Compact message history by removing old messages (LRU-style).
|
|
143
|
+
|
|
144
|
+
Preserves:
|
|
145
|
+
- First N messages (system prompt, user task)
|
|
146
|
+
- Last N messages (recent context)
|
|
147
|
+
- Tool call/response pairs are NEVER split
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
messages: The message list to compact
|
|
151
|
+
keep_first_n: Number of messages to always keep at the start
|
|
152
|
+
keep_last_n: Number of messages to always keep at the end
|
|
153
|
+
max_tokens: If set, compact when estimated tokens exceed this
|
|
154
|
+
target_token_pct: Target percentage of max_tokens after compaction
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
CompactionResult with the compacted messages and stats
|
|
158
|
+
"""
|
|
159
|
+
original_count = len(messages)
|
|
160
|
+
|
|
161
|
+
# Nothing to compact if we have few messages
|
|
162
|
+
if len(messages) <= keep_first_n + keep_last_n:
|
|
163
|
+
return CompactionResult(
|
|
164
|
+
messages=messages,
|
|
165
|
+
removed_count=0,
|
|
166
|
+
original_count=original_count,
|
|
167
|
+
preserved_reason="Too few messages to compact",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Check if compaction is needed based on tokens
|
|
171
|
+
if max_tokens:
|
|
172
|
+
current_tokens = estimate_tokens(messages)
|
|
173
|
+
if current_tokens < max_tokens:
|
|
174
|
+
return CompactionResult(
|
|
175
|
+
messages=messages,
|
|
176
|
+
removed_count=0,
|
|
177
|
+
original_count=original_count,
|
|
178
|
+
preserved_reason=f"Under token limit ({current_tokens}/{max_tokens})",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Find tool call groups (these must stay together)
|
|
182
|
+
tool_groups = find_tool_groups(messages)
|
|
183
|
+
|
|
184
|
+
# Build a set of "protected" indices (in tool groups)
|
|
185
|
+
protected_indices: set[int] = set()
|
|
186
|
+
for start, end in tool_groups:
|
|
187
|
+
for idx in range(start, end + 1):
|
|
188
|
+
protected_indices.add(idx)
|
|
189
|
+
|
|
190
|
+
# Determine which messages are in the "middle" (candidates for removal)
|
|
191
|
+
# Middle = not in first N, not in last N
|
|
192
|
+
middle_start = keep_first_n
|
|
193
|
+
middle_end = len(messages) - keep_last_n
|
|
194
|
+
|
|
195
|
+
if middle_start >= middle_end:
|
|
196
|
+
return CompactionResult(
|
|
197
|
+
messages=messages,
|
|
198
|
+
removed_count=0,
|
|
199
|
+
original_count=original_count,
|
|
200
|
+
preserved_reason="No middle messages to remove",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Find removable message ranges in the middle
|
|
204
|
+
# We remove from the oldest (lowest index) first
|
|
205
|
+
removable_ranges: list[tuple[int, int]] = []
|
|
206
|
+
i = middle_start
|
|
207
|
+
|
|
208
|
+
while i < middle_end:
|
|
209
|
+
# Check if this index is in a tool group
|
|
210
|
+
in_group = False
|
|
211
|
+
for start, end in tool_groups:
|
|
212
|
+
if start <= i <= end:
|
|
213
|
+
# This message is part of a tool group
|
|
214
|
+
# Check if the ENTIRE group is in the middle
|
|
215
|
+
if start >= middle_start and end < middle_end:
|
|
216
|
+
# Entire group is removable as a unit
|
|
217
|
+
removable_ranges.append((start, end))
|
|
218
|
+
i = end + 1
|
|
219
|
+
in_group = True
|
|
220
|
+
break
|
|
221
|
+
else:
|
|
222
|
+
# Group spans protected region, skip it entirely
|
|
223
|
+
i = end + 1
|
|
224
|
+
in_group = True
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
if not in_group:
|
|
228
|
+
# Single message, can be removed individually
|
|
229
|
+
removable_ranges.append((i, i))
|
|
230
|
+
i += 1
|
|
231
|
+
|
|
232
|
+
# Deduplicate and sort ranges
|
|
233
|
+
removable_ranges = sorted(set(removable_ranges), key=lambda x: x[0])
|
|
234
|
+
|
|
235
|
+
if not removable_ranges:
|
|
236
|
+
return CompactionResult(
|
|
237
|
+
messages=messages,
|
|
238
|
+
removed_count=0,
|
|
239
|
+
original_count=original_count,
|
|
240
|
+
preserved_reason="All middle messages are in protected tool groups",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Determine how many to remove
|
|
244
|
+
# Start by removing the oldest half of removable ranges
|
|
245
|
+
if max_tokens:
|
|
246
|
+
# Token-based: remove until under target
|
|
247
|
+
target_tokens = int(max_tokens * target_token_pct)
|
|
248
|
+
indices_to_remove: set[int] = set()
|
|
249
|
+
|
|
250
|
+
for start, end in removable_ranges:
|
|
251
|
+
for idx in range(start, end + 1):
|
|
252
|
+
indices_to_remove.add(idx)
|
|
253
|
+
|
|
254
|
+
# Check if we've removed enough
|
|
255
|
+
remaining = [m for i, m in enumerate(messages) if i not in indices_to_remove]
|
|
256
|
+
if estimate_tokens(remaining) <= target_tokens:
|
|
257
|
+
break
|
|
258
|
+
else:
|
|
259
|
+
# Count-based: remove oldest half of middle
|
|
260
|
+
total_removable = sum(end - start + 1 for start, end in removable_ranges)
|
|
261
|
+
target_remove = total_removable // 2
|
|
262
|
+
|
|
263
|
+
indices_to_remove = set()
|
|
264
|
+
removed = 0
|
|
265
|
+
|
|
266
|
+
for start, end in removable_ranges:
|
|
267
|
+
if removed >= target_remove:
|
|
268
|
+
break
|
|
269
|
+
for idx in range(start, end + 1):
|
|
270
|
+
indices_to_remove.add(idx)
|
|
271
|
+
removed += 1
|
|
272
|
+
|
|
273
|
+
# Build new message list
|
|
274
|
+
new_messages = [m for i, m in enumerate(messages) if i not in indices_to_remove]
|
|
275
|
+
|
|
276
|
+
# Add a compaction marker so the model knows history was truncated
|
|
277
|
+
if indices_to_remove and len(new_messages) > keep_first_n:
|
|
278
|
+
# Insert marker after the preserved first messages
|
|
279
|
+
marker = {
|
|
280
|
+
"role": "system",
|
|
281
|
+
"content": (
|
|
282
|
+
f"[Context compacted: {len(indices_to_remove)} older messages removed "
|
|
283
|
+
f"to manage context window. Conversation continues below.]"
|
|
284
|
+
),
|
|
285
|
+
}
|
|
286
|
+
new_messages.insert(keep_first_n, marker)
|
|
287
|
+
|
|
288
|
+
logger.info(
|
|
289
|
+
f"Compacted messages: {original_count} -> {len(new_messages)} "
|
|
290
|
+
f"(removed {len(indices_to_remove)})"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
return CompactionResult(
|
|
294
|
+
messages=new_messages,
|
|
295
|
+
removed_count=len(indices_to_remove),
|
|
296
|
+
original_count=original_count,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def should_compact(
|
|
301
|
+
messages: list[dict[str, Any]],
|
|
302
|
+
max_tokens: int,
|
|
303
|
+
threshold_pct: float = 0.85,
|
|
304
|
+
) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Check if messages should be compacted.
|
|
307
|
+
|
|
308
|
+
Returns True if estimated tokens exceed threshold percentage of max.
|
|
309
|
+
"""
|
|
310
|
+
current = estimate_tokens(messages)
|
|
311
|
+
threshold = int(max_tokens * threshold_pct)
|
|
312
|
+
return current >= threshold
|
zwarm/core/config.py
CHANGED
|
@@ -38,6 +38,18 @@ class ExecutorConfig:
|
|
|
38
38
|
timeout: int = 3600
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
@dataclass
|
|
42
|
+
class CompactionConfig:
|
|
43
|
+
"""Configuration for context window compaction."""
|
|
44
|
+
|
|
45
|
+
enabled: bool = True
|
|
46
|
+
max_tokens: int = 100000 # Trigger compaction when estimated tokens exceed this
|
|
47
|
+
threshold_pct: float = 0.85 # Compact when at this % of max_tokens
|
|
48
|
+
target_pct: float = 0.7 # Target this % after compaction
|
|
49
|
+
keep_first_n: int = 2 # Always keep first N messages (system + task)
|
|
50
|
+
keep_last_n: int = 10 # Always keep last N messages (recent context)
|
|
51
|
+
|
|
52
|
+
|
|
41
53
|
@dataclass
|
|
42
54
|
class OrchestratorConfig:
|
|
43
55
|
"""Configuration for the orchestrator."""
|
|
@@ -48,6 +60,7 @@ class OrchestratorConfig:
|
|
|
48
60
|
max_steps: int = 50
|
|
49
61
|
parallel_delegations: int = 4
|
|
50
62
|
sync_first: bool = True # prefer sync mode by default
|
|
63
|
+
compaction: CompactionConfig = field(default_factory=CompactionConfig)
|
|
51
64
|
|
|
52
65
|
|
|
53
66
|
@dataclass
|
|
@@ -88,19 +101,40 @@ class ZwarmConfig:
|
|
|
88
101
|
orchestrator_data = data.get("orchestrator", {})
|
|
89
102
|
watchers_data = data.get("watchers", {})
|
|
90
103
|
|
|
91
|
-
# Parse
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
104
|
+
# Parse compaction config from orchestrator
|
|
105
|
+
compaction_data = orchestrator_data.pop("compaction", {}) if orchestrator_data else {}
|
|
106
|
+
compaction_config = CompactionConfig(**compaction_data) if compaction_data else CompactionConfig()
|
|
107
|
+
|
|
108
|
+
# Parse watchers config - handle both list shorthand and dict format
|
|
109
|
+
if isinstance(watchers_data, list):
|
|
110
|
+
# Shorthand: watchers: [progress, budget, scope]
|
|
111
|
+
watchers_config = WatchersConfig(
|
|
112
|
+
enabled=True,
|
|
113
|
+
watchers=[
|
|
114
|
+
WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
|
|
115
|
+
for w in watchers_data
|
|
116
|
+
],
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
# Full format: watchers: {enabled: true, watchers: [...]}
|
|
120
|
+
watchers_config = WatchersConfig(
|
|
121
|
+
enabled=watchers_data.get("enabled", True),
|
|
122
|
+
watchers=[
|
|
123
|
+
WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
|
|
124
|
+
for w in watchers_data.get("watchers", [])
|
|
125
|
+
] or WatchersConfig().watchers,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Build orchestrator config with nested compaction
|
|
129
|
+
if orchestrator_data:
|
|
130
|
+
orchestrator_config = OrchestratorConfig(**orchestrator_data, compaction=compaction_config)
|
|
131
|
+
else:
|
|
132
|
+
orchestrator_config = OrchestratorConfig(compaction=compaction_config)
|
|
99
133
|
|
|
100
134
|
return cls(
|
|
101
135
|
weave=WeaveConfig(**weave_data) if weave_data else WeaveConfig(),
|
|
102
136
|
executor=ExecutorConfig(**executor_data) if executor_data else ExecutorConfig(),
|
|
103
|
-
orchestrator=
|
|
137
|
+
orchestrator=orchestrator_config,
|
|
104
138
|
watchers=watchers_config,
|
|
105
139
|
state_dir=data.get("state_dir", ".zwarm"),
|
|
106
140
|
)
|
|
@@ -125,6 +159,14 @@ class ZwarmConfig:
|
|
|
125
159
|
"max_steps": self.orchestrator.max_steps,
|
|
126
160
|
"parallel_delegations": self.orchestrator.parallel_delegations,
|
|
127
161
|
"sync_first": self.orchestrator.sync_first,
|
|
162
|
+
"compaction": {
|
|
163
|
+
"enabled": self.orchestrator.compaction.enabled,
|
|
164
|
+
"max_tokens": self.orchestrator.compaction.max_tokens,
|
|
165
|
+
"threshold_pct": self.orchestrator.compaction.threshold_pct,
|
|
166
|
+
"target_pct": self.orchestrator.compaction.target_pct,
|
|
167
|
+
"keep_first_n": self.orchestrator.compaction.keep_first_n,
|
|
168
|
+
"keep_last_n": self.orchestrator.compaction.keep_last_n,
|
|
169
|
+
},
|
|
128
170
|
},
|
|
129
171
|
"watchers": {
|
|
130
172
|
"enabled": self.watchers.enabled,
|
zwarm/core/environment.py
CHANGED
|
@@ -4,14 +4,15 @@ OrchestratorEnv: A lean environment for the zwarm orchestrator.
|
|
|
4
4
|
Unlike ChatEnv, this environment:
|
|
5
5
|
- Has no notes/observations (we use StateManager instead)
|
|
6
6
|
- Has no chat() tool (orchestrator communicates via output_handler)
|
|
7
|
-
- Shows active sessions in observe()
|
|
7
|
+
- Shows active sessions, step progress, and budget in observe()
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
11
11
|
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Any, Callable
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
14
14
|
|
|
15
|
+
from pydantic import PrivateAttr
|
|
15
16
|
from wbal.environment import Environment
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
@@ -26,6 +27,8 @@ class OrchestratorEnv(Environment):
|
|
|
26
27
|
- Task context
|
|
27
28
|
- Working directory info
|
|
28
29
|
- Active session visibility
|
|
30
|
+
- Step progress tracking
|
|
31
|
+
- Budget/resource monitoring
|
|
29
32
|
- Output handler for messages
|
|
30
33
|
"""
|
|
31
34
|
|
|
@@ -34,50 +37,118 @@ class OrchestratorEnv(Environment):
|
|
|
34
37
|
output_handler: Callable[[str], None] = lambda x: print(x)
|
|
35
38
|
|
|
36
39
|
# Session tracking (set by orchestrator)
|
|
37
|
-
_sessions: dict[str, "ConversationSession"] | None = None
|
|
40
|
+
_sessions: dict[str, "ConversationSession"] | None = PrivateAttr(default=None)
|
|
41
|
+
|
|
42
|
+
# Progress tracking (updated by orchestrator each step)
|
|
43
|
+
_step_count: int = PrivateAttr(default=0)
|
|
44
|
+
_max_steps: int = PrivateAttr(default=50)
|
|
45
|
+
_total_tokens: int = PrivateAttr(default=0)
|
|
46
|
+
_executor_tokens: int = PrivateAttr(default=0) # Executor token usage
|
|
47
|
+
|
|
48
|
+
# Budget config (set from config)
|
|
49
|
+
_budget_max_sessions: int | None = PrivateAttr(default=None)
|
|
38
50
|
|
|
39
51
|
def set_sessions(self, sessions: dict[str, "ConversationSession"]) -> None:
|
|
40
52
|
"""Set the sessions dict for observe() visibility."""
|
|
41
53
|
self._sessions = sessions
|
|
42
54
|
|
|
55
|
+
def update_progress(
|
|
56
|
+
self,
|
|
57
|
+
step_count: int,
|
|
58
|
+
max_steps: int,
|
|
59
|
+
total_tokens: int = 0,
|
|
60
|
+
executor_tokens: int = 0,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Update progress tracking (called by orchestrator each step)."""
|
|
63
|
+
self._step_count = step_count
|
|
64
|
+
self._max_steps = max_steps
|
|
65
|
+
self._total_tokens = total_tokens
|
|
66
|
+
self._executor_tokens = executor_tokens
|
|
67
|
+
|
|
68
|
+
def set_budget(self, max_sessions: int | None = None) -> None:
|
|
69
|
+
"""Set budget limits from config."""
|
|
70
|
+
self._budget_max_sessions = max_sessions
|
|
71
|
+
|
|
43
72
|
def observe(self) -> str:
|
|
44
73
|
"""
|
|
45
74
|
Return observable state for the orchestrator.
|
|
46
75
|
|
|
47
76
|
Shows:
|
|
48
|
-
-
|
|
49
|
-
-
|
|
77
|
+
- Progress (steps, tokens)
|
|
78
|
+
- Session summary
|
|
50
79
|
- Active sessions with their status
|
|
80
|
+
- Working directory
|
|
81
|
+
|
|
82
|
+
Note: Task is NOT included here as it's already in the user message.
|
|
51
83
|
"""
|
|
52
84
|
parts = []
|
|
53
85
|
|
|
54
|
-
#
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
86
|
+
# Progress bar and stats
|
|
87
|
+
progress_pct = (
|
|
88
|
+
(self._step_count / self._max_steps * 100) if self._max_steps > 0 else 0
|
|
89
|
+
)
|
|
90
|
+
bar_len = 20
|
|
91
|
+
filled = (
|
|
92
|
+
int(bar_len * self._step_count / self._max_steps)
|
|
93
|
+
if self._max_steps > 0
|
|
94
|
+
else 0
|
|
95
|
+
)
|
|
96
|
+
bar = "█" * filled + "░" * (bar_len - filled)
|
|
97
|
+
|
|
98
|
+
progress_lines = [
|
|
99
|
+
f"Steps: [{bar}] {self._step_count}/{self._max_steps} ({progress_pct:.0f}%)",
|
|
100
|
+
]
|
|
101
|
+
if self._total_tokens > 0 or self._executor_tokens > 0:
|
|
102
|
+
token_parts = []
|
|
103
|
+
if self._total_tokens > 0:
|
|
104
|
+
token_parts.append(f"orchestrator: ~{self._total_tokens:,}")
|
|
105
|
+
if self._executor_tokens > 0:
|
|
106
|
+
token_parts.append(f"executors: ~{self._executor_tokens:,}")
|
|
107
|
+
progress_lines.append(f"Tokens: {', '.join(token_parts)}")
|
|
108
|
+
|
|
109
|
+
parts.append("## Progress\n" + "\n".join(progress_lines))
|
|
110
|
+
|
|
111
|
+
# Session summary
|
|
112
|
+
if self._sessions is not None:
|
|
113
|
+
active = sum(
|
|
114
|
+
1 for s in self._sessions.values() if s.status.value == "active"
|
|
115
|
+
)
|
|
116
|
+
completed = sum(
|
|
117
|
+
1 for s in self._sessions.values() if s.status.value == "completed"
|
|
118
|
+
)
|
|
119
|
+
failed = sum(
|
|
120
|
+
1 for s in self._sessions.values() if s.status.value == "failed"
|
|
121
|
+
)
|
|
122
|
+
total = len(self._sessions)
|
|
123
|
+
|
|
124
|
+
summary = f"Sessions: {active} active, {completed} done, {failed} failed ({total} total)"
|
|
125
|
+
if self._budget_max_sessions:
|
|
126
|
+
summary += f" [limit: {self._budget_max_sessions}]"
|
|
127
|
+
|
|
128
|
+
parts.append(f"## Resources\n{summary}")
|
|
129
|
+
|
|
130
|
+
# Active sessions detail
|
|
131
|
+
active_sessions = [
|
|
132
|
+
(sid, s)
|
|
133
|
+
for sid, s in self._sessions.items()
|
|
134
|
+
if s.status.value == "active"
|
|
135
|
+
]
|
|
136
|
+
if active_sessions:
|
|
137
|
+
session_lines = []
|
|
138
|
+
for sid, session in active_sessions:
|
|
139
|
+
mode_tag = "sync" if session.mode.value == "sync" else "async"
|
|
140
|
+
turns = len([m for m in session.messages if m.role == "user"])
|
|
141
|
+
task_preview = (
|
|
142
|
+
session.task_description[:50] + "..."
|
|
143
|
+
if len(session.task_description) > 50
|
|
144
|
+
else session.task_description
|
|
145
|
+
)
|
|
146
|
+
session_lines.append(
|
|
147
|
+
f"\n • {sid[:8]} ({session.adapter}, {mode_tag}, {turns} turns): {task_preview}"
|
|
148
|
+
)
|
|
149
|
+
parts.append("## Active Sessions\n" + "\n".join(session_lines))
|
|
150
|
+
|
|
151
|
+
# Working directory (less prominent)
|
|
152
|
+
parts.append(f"## Context\nWorking dir: {self.working_dir.absolute()}")
|
|
82
153
|
|
|
83
154
|
return "\n\n".join(parts)
|
zwarm/core/models.py
CHANGED
|
@@ -92,6 +92,20 @@ class ConversationSession:
|
|
|
92
92
|
model: str | None = None
|
|
93
93
|
exit_message: str | None = None
|
|
94
94
|
|
|
95
|
+
# Token usage tracking for cost calculation
|
|
96
|
+
token_usage: dict[str, int] = field(default_factory=lambda: {
|
|
97
|
+
"input_tokens": 0,
|
|
98
|
+
"output_tokens": 0,
|
|
99
|
+
"total_tokens": 0,
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
def add_usage(self, usage: dict[str, int]) -> None:
|
|
103
|
+
"""Add token usage from an interaction."""
|
|
104
|
+
if not usage:
|
|
105
|
+
return
|
|
106
|
+
for key in self.token_usage:
|
|
107
|
+
self.token_usage[key] += usage.get(key, 0)
|
|
108
|
+
|
|
95
109
|
def add_message(self, role: Literal["user", "assistant", "system"], content: str) -> Message:
|
|
96
110
|
"""Add a message to the conversation."""
|
|
97
111
|
msg = Message(role=role, content=content)
|
|
@@ -125,6 +139,7 @@ class ConversationSession:
|
|
|
125
139
|
"task_description": self.task_description,
|
|
126
140
|
"model": self.model,
|
|
127
141
|
"exit_message": self.exit_message,
|
|
142
|
+
"token_usage": self.token_usage,
|
|
128
143
|
}
|
|
129
144
|
|
|
130
145
|
@classmethod
|
|
@@ -143,6 +158,7 @@ class ConversationSession:
|
|
|
143
158
|
task_description=data.get("task_description", ""),
|
|
144
159
|
model=data.get("model"),
|
|
145
160
|
exit_message=data.get("exit_message"),
|
|
161
|
+
token_usage=data.get("token_usage", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}),
|
|
146
162
|
)
|
|
147
163
|
|
|
148
164
|
|