zwarm 3.10.2__tar.gz → 3.10.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zwarm-3.10.2 → zwarm-3.10.5}/PKG-INFO +22 -15
- {zwarm-3.10.2 → zwarm-3.10.5}/README.md +21 -14
- {zwarm-3.10.2 → zwarm-3.10.5}/pyproject.toml +1 -1
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/interactive.py +2 -2
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/main.py +3 -5
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/pilot.py +5 -13
- zwarm-3.10.5/src/zwarm/compression/__init__.py +37 -0
- zwarm-3.10.5/src/zwarm/compression/rollout_compression.py +292 -0
- zwarm-3.10.5/src/zwarm/compression/tc_compression.py +165 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/config.py +33 -6
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/registry.py +2 -20
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/orchestrator.py +43 -0
- zwarm-3.10.5/src/zwarm/prompts/orchestrator.py +214 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/prompts/pilot.py +15 -11
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/manager.py +2 -2
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/tools/delegation.py +86 -94
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/llm_watcher.py +1 -1
- zwarm-3.10.2/src/zwarm/prompts/orchestrator.py +0 -253
- {zwarm-3.10.2 → zwarm-3.10.5}/.gitignore +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/checkpoints.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/compact.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/costs.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/environment.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/models.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/state.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/test_compact.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/test_config.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/test_models.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/prompts/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/base.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/claude.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/test_orchestrator_watchers.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/tools/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/__init__.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/base.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/builtin.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/manager.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/registry.py +0 -0
- {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/test_watchers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zwarm
|
|
3
|
-
Version: 3.10.
|
|
3
|
+
Version: 3.10.5
|
|
4
4
|
Summary: Multi-Agent CLI Orchestration Research Platform
|
|
5
5
|
Requires-Python: <3.14,>=3.13
|
|
6
6
|
Requires-Dist: prompt-toolkit>=3.0.52
|
|
@@ -87,14 +87,18 @@ Want a 3-minute walkthrough? See `docs/DEMO.md` for a pilot + interactive demo.
|
|
|
87
87
|
|
|
88
88
|
## Multi-Adapter Support
|
|
89
89
|
|
|
90
|
-
zwarm supports multiple executor backends:
|
|
90
|
+
zwarm supports multiple executor backends with simple model shortcuts:
|
|
91
91
|
|
|
92
|
-
|
|
|
93
|
-
|
|
94
|
-
|
|
|
95
|
-
|
|
|
92
|
+
| Model | Alias | Description |
|
|
93
|
+
|-------|-------|-------------|
|
|
94
|
+
| `gpt-5.2-codex` | `5.2` | GPT-5.2 Codex - fast, great for code (default) |
|
|
95
|
+
| `gpt-5.2` | `5.2-think` | GPT-5.2 with extended reasoning |
|
|
96
|
+
| `sonnet` | - | Claude Sonnet - balanced |
|
|
97
|
+
| `opus` | - | Claude Opus - most capable |
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
**Adapter is auto-detected from model name** - just use `model="opus"` and zwarm handles the rest.
|
|
100
|
+
|
|
101
|
+
Mix models freely - use Opus for complex reasoning, 5.2 for quick edits.
|
|
98
102
|
|
|
99
103
|
---
|
|
100
104
|
|
|
@@ -184,7 +188,7 @@ zwarm interactive
|
|
|
184
188
|
|
|
185
189
|
| Command | Description |
|
|
186
190
|
|---------|-------------|
|
|
187
|
-
| `spawn "task" [--
|
|
191
|
+
| `spawn "task" [--model M]` | Start a new session (model: 5.2, opus, sonnet) |
|
|
188
192
|
| `ls` | Dashboard of all sessions (with costs, models) |
|
|
189
193
|
| `? ID` / `peek ID` | Quick status check |
|
|
190
194
|
| `show ID` | Full session details |
|
|
@@ -213,8 +217,8 @@ $ zwarm interactive
|
|
|
213
217
|
⟳ 2 running
|
|
214
218
|
|
|
215
219
|
ID │ │ Task │ Model │ Tokens │ Cost
|
|
216
|
-
abc123 │ ⟳ │ Add tests for the auth... │ codex
|
|
217
|
-
def456 │ ⟳ │ Fix type errors in utils... │ codex
|
|
220
|
+
abc123 │ ⟳ │ Add tests for the auth... │ 5.2-codex │ 5,234 │ $0.052
|
|
221
|
+
def456 │ ⟳ │ Fix type errors in utils... │ 5.2-codex │ 2,100 │ $0.021
|
|
218
222
|
|
|
219
223
|
> watch abc123
|
|
220
224
|
Watching abc123... (Ctrl+C to stop)
|
|
@@ -254,17 +258,20 @@ The orchestrator LLM has access to:
|
|
|
254
258
|
|
|
255
259
|
| Tool | Description |
|
|
256
260
|
|------|-------------|
|
|
257
|
-
| `delegate(task,
|
|
261
|
+
| `delegate(task, model="5.2")` | Start a new coding session |
|
|
258
262
|
| `converse(id, msg)` | Continue a session |
|
|
259
263
|
| `check_session(id)` | Get full session details |
|
|
260
264
|
| `peek_session(id)` | Quick status check |
|
|
265
|
+
| `get_trajectory(id)` | See what steps the agent took |
|
|
261
266
|
| `list_sessions()` | List all sessions |
|
|
262
267
|
| `end_session(id)` | Kill/delete a session |
|
|
263
268
|
| `sleep(seconds)` | Wait before checking again |
|
|
269
|
+
| `bash(cmd)` | Run verification commands (tests, linters) |
|
|
270
|
+
| `exit()` | Signal task completion |
|
|
264
271
|
|
|
265
272
|
**Async-first**: All sessions run in the background. The orchestrator uses `sleep()` to wait, then checks on progress.
|
|
266
273
|
|
|
267
|
-
**
|
|
274
|
+
**Model shortcuts**: Just use `model="5.2"` or `model="opus"` - the adapter is auto-detected.
|
|
268
275
|
|
|
269
276
|
**Web Search**: Enable `web_search=True` in config for tasks needing current info (API docs, latest releases, etc.).
|
|
270
277
|
|
|
@@ -361,14 +368,14 @@ enabled = ["progress", "budget", "delegation", "delegation_reminder"]
|
|
|
361
368
|
|
|
362
369
|
**`.zwarm/codex.toml`** - Controls the Codex CLI:
|
|
363
370
|
```toml
|
|
364
|
-
model = "gpt-5.
|
|
371
|
+
model = "gpt-5.2-codex" # or gpt-5.2 for extended reasoning
|
|
365
372
|
model_reasoning_effort = "high" # low | medium | high
|
|
366
|
-
|
|
373
|
+
full_danger = true # Skip approval prompts
|
|
367
374
|
```
|
|
368
375
|
|
|
369
376
|
**`.zwarm/claude.toml`** - Controls the Claude Code CLI:
|
|
370
377
|
```toml
|
|
371
|
-
model = "
|
|
378
|
+
model = "opus" # opus | sonnet
|
|
372
379
|
full_danger = true # Skip permission prompts
|
|
373
380
|
```
|
|
374
381
|
|
|
@@ -73,14 +73,18 @@ Want a 3-minute walkthrough? See `docs/DEMO.md` for a pilot + interactive demo.
|
|
|
73
73
|
|
|
74
74
|
## Multi-Adapter Support
|
|
75
75
|
|
|
76
|
-
zwarm supports multiple executor backends:
|
|
76
|
+
zwarm supports multiple executor backends with simple model shortcuts:
|
|
77
77
|
|
|
78
|
-
|
|
|
79
|
-
|
|
80
|
-
|
|
|
81
|
-
|
|
|
78
|
+
| Model | Alias | Description |
|
|
79
|
+
|-------|-------|-------------|
|
|
80
|
+
| `gpt-5.2-codex` | `5.2` | GPT-5.2 Codex - fast, great for code (default) |
|
|
81
|
+
| `gpt-5.2` | `5.2-think` | GPT-5.2 with extended reasoning |
|
|
82
|
+
| `sonnet` | - | Claude Sonnet - balanced |
|
|
83
|
+
| `opus` | - | Claude Opus - most capable |
|
|
82
84
|
|
|
83
|
-
|
|
85
|
+
**Adapter is auto-detected from model name** - just use `model="opus"` and zwarm handles the rest.
|
|
86
|
+
|
|
87
|
+
Mix models freely - use Opus for complex reasoning, 5.2 for quick edits.
|
|
84
88
|
|
|
85
89
|
---
|
|
86
90
|
|
|
@@ -170,7 +174,7 @@ zwarm interactive
|
|
|
170
174
|
|
|
171
175
|
| Command | Description |
|
|
172
176
|
|---------|-------------|
|
|
173
|
-
| `spawn "task" [--
|
|
177
|
+
| `spawn "task" [--model M]` | Start a new session (model: 5.2, opus, sonnet) |
|
|
174
178
|
| `ls` | Dashboard of all sessions (with costs, models) |
|
|
175
179
|
| `? ID` / `peek ID` | Quick status check |
|
|
176
180
|
| `show ID` | Full session details |
|
|
@@ -199,8 +203,8 @@ $ zwarm interactive
|
|
|
199
203
|
⟳ 2 running
|
|
200
204
|
|
|
201
205
|
ID │ │ Task │ Model │ Tokens │ Cost
|
|
202
|
-
abc123 │ ⟳ │ Add tests for the auth... │ codex
|
|
203
|
-
def456 │ ⟳ │ Fix type errors in utils... │ codex
|
|
206
|
+
abc123 │ ⟳ │ Add tests for the auth... │ 5.2-codex │ 5,234 │ $0.052
|
|
207
|
+
def456 │ ⟳ │ Fix type errors in utils... │ 5.2-codex │ 2,100 │ $0.021
|
|
204
208
|
|
|
205
209
|
> watch abc123
|
|
206
210
|
Watching abc123... (Ctrl+C to stop)
|
|
@@ -240,17 +244,20 @@ The orchestrator LLM has access to:
|
|
|
240
244
|
|
|
241
245
|
| Tool | Description |
|
|
242
246
|
|------|-------------|
|
|
243
|
-
| `delegate(task,
|
|
247
|
+
| `delegate(task, model="5.2")` | Start a new coding session |
|
|
244
248
|
| `converse(id, msg)` | Continue a session |
|
|
245
249
|
| `check_session(id)` | Get full session details |
|
|
246
250
|
| `peek_session(id)` | Quick status check |
|
|
251
|
+
| `get_trajectory(id)` | See what steps the agent took |
|
|
247
252
|
| `list_sessions()` | List all sessions |
|
|
248
253
|
| `end_session(id)` | Kill/delete a session |
|
|
249
254
|
| `sleep(seconds)` | Wait before checking again |
|
|
255
|
+
| `bash(cmd)` | Run verification commands (tests, linters) |
|
|
256
|
+
| `exit()` | Signal task completion |
|
|
250
257
|
|
|
251
258
|
**Async-first**: All sessions run in the background. The orchestrator uses `sleep()` to wait, then checks on progress.
|
|
252
259
|
|
|
253
|
-
**
|
|
260
|
+
**Model shortcuts**: Just use `model="5.2"` or `model="opus"` - the adapter is auto-detected.
|
|
254
261
|
|
|
255
262
|
**Web Search**: Enable `web_search=True` in config for tasks needing current info (API docs, latest releases, etc.).
|
|
256
263
|
|
|
@@ -347,14 +354,14 @@ enabled = ["progress", "budget", "delegation", "delegation_reminder"]
|
|
|
347
354
|
|
|
348
355
|
**`.zwarm/codex.toml`** - Controls the Codex CLI:
|
|
349
356
|
```toml
|
|
350
|
-
model = "gpt-5.
|
|
357
|
+
model = "gpt-5.2-codex" # or gpt-5.2 for extended reasoning
|
|
351
358
|
model_reasoning_effort = "high" # low | medium | high
|
|
352
|
-
|
|
359
|
+
full_danger = true # Skip approval prompts
|
|
353
360
|
```
|
|
354
361
|
|
|
355
362
|
**`.zwarm/claude.toml`** - Controls the Claude Code CLI:
|
|
356
363
|
```toml
|
|
357
|
-
model = "
|
|
364
|
+
model = "opus" # opus | sonnet
|
|
358
365
|
full_danger = true # Skip permission prompts
|
|
359
366
|
```
|
|
360
367
|
|
|
@@ -269,10 +269,10 @@ def cmd_ls(manager):
|
|
|
269
269
|
task_preview = s.task[:23] + "..." if len(s.task) > 26 else s.task
|
|
270
270
|
updated = time_ago(s.updated_at)
|
|
271
271
|
|
|
272
|
-
# Short model name (e.g., "gpt-5.
|
|
272
|
+
# Short model name (e.g., "gpt-5.2-codex" -> "5.2-codex")
|
|
273
273
|
model_short = s.model or "?"
|
|
274
274
|
if "codex" in model_short.lower():
|
|
275
|
-
# Extract codex variant: gpt-5.
|
|
275
|
+
# Extract codex variant: gpt-5.2-codex -> 5.2-codex
|
|
276
276
|
parts = model_short.split("-")
|
|
277
277
|
codex_idx = next((i for i, p in enumerate(parts) if "codex" in p.lower()), -1)
|
|
278
278
|
if codex_idx >= 0:
|
|
@@ -838,19 +838,17 @@ def init(
|
|
|
838
838
|
console.print(" [dim]These control the underlying Codex CLI that runs executor sessions[/]\n")
|
|
839
839
|
|
|
840
840
|
console.print(" Available models:")
|
|
841
|
-
console.print(" [cyan]1[/] gpt-5.2-codex [dim]- GPT-5.2 Codex, balanced (Recommended)[/]")
|
|
841
|
+
console.print(" [cyan]1[/] gpt-5.2-codex [dim]- GPT-5.2 Codex, fast and balanced (Recommended)[/]")
|
|
842
842
|
console.print(" [cyan]2[/] gpt-5.2 [dim]- GPT-5.2 with extended reasoning[/]")
|
|
843
|
-
console.print(" [cyan]3[/] gpt-5.1-codex [dim]- GPT-5.1 Codex (legacy)[/]")
|
|
844
843
|
|
|
845
844
|
model_choice = typer.prompt(
|
|
846
|
-
" Select model (1-
|
|
845
|
+
" Select model (1-2)",
|
|
847
846
|
default="1",
|
|
848
847
|
type=str,
|
|
849
848
|
)
|
|
850
849
|
model_map = {
|
|
851
850
|
"1": "gpt-5.2-codex",
|
|
852
851
|
"2": "gpt-5.2",
|
|
853
|
-
"3": "gpt-5.1-codex",
|
|
854
852
|
}
|
|
855
853
|
codex_model = model_map.get(model_choice, model_choice)
|
|
856
854
|
if model_choice not in model_map:
|
|
@@ -1668,7 +1666,7 @@ def session_start(
|
|
|
1668
1666
|
$ zwarm session start "Fix the bug in auth.py"
|
|
1669
1667
|
|
|
1670
1668
|
[dim]# With specific model[/]
|
|
1671
|
-
$ zwarm session start "Refactor the API" --model gpt-5.
|
|
1669
|
+
$ zwarm session start "Refactor the API" --model gpt-5.2-codex
|
|
1672
1670
|
|
|
1673
1671
|
[dim]# Web search is always available[/]
|
|
1674
1672
|
$ zwarm session start "Research latest OAuth2 best practices"
|
|
@@ -83,22 +83,14 @@ class ChoogingSpinner:
|
|
|
83
83
|
# Context window sizes for different models (in tokens)
|
|
84
84
|
# These are for the ORCHESTRATOR LLM, not the executors
|
|
85
85
|
MODEL_CONTEXT_WINDOWS = {
|
|
86
|
-
# OpenAI models
|
|
86
|
+
# OpenAI models (via Codex CLI)
|
|
87
87
|
"gpt-5.2-codex": 200_000,
|
|
88
88
|
"gpt-5.2": 200_000,
|
|
89
|
-
|
|
90
|
-
"gpt-5.1-codex-mini": 200_000,
|
|
91
|
-
"gpt-5": 200_000,
|
|
92
|
-
"gpt-5-mini": 200_000,
|
|
93
|
-
"o3": 200_000,
|
|
94
|
-
"o3-mini": 200_000,
|
|
95
|
-
# Claude models (if used as orchestrator)
|
|
96
|
-
"claude-sonnet": 200_000,
|
|
97
|
-
"claude-opus": 200_000,
|
|
98
|
-
"claude-haiku": 200_000,
|
|
89
|
+
# Claude models (via Claude CLI)
|
|
99
90
|
"sonnet": 200_000,
|
|
100
91
|
"opus": 200_000,
|
|
101
|
-
"
|
|
92
|
+
"claude-sonnet": 200_000,
|
|
93
|
+
"claude-opus": 200_000,
|
|
102
94
|
# Fallback
|
|
103
95
|
"default": 128_000,
|
|
104
96
|
}
|
|
@@ -1080,7 +1072,7 @@ def _run_pilot_repl(
|
|
|
1080
1072
|
renderer.status("")
|
|
1081
1073
|
|
|
1082
1074
|
# Get model from orchestrator if available
|
|
1083
|
-
model = "gpt-5.
|
|
1075
|
+
model = "gpt-5.2-codex" # Default
|
|
1084
1076
|
if hasattr(orchestrator, "lm") and hasattr(orchestrator.lm, "model"):
|
|
1085
1077
|
model = orchestrator.lm.model
|
|
1086
1078
|
elif hasattr(orchestrator, "config"):
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Compression modules for infinite-running agents.
|
|
3
|
+
|
|
4
|
+
Two types of compression:
|
|
5
|
+
1. TC (Tool Call) Compression - compresses tool call results before they enter context
|
|
6
|
+
2. Rollout Compression - manages message history eviction (LRU-style)
|
|
7
|
+
|
|
8
|
+
These modules allow agents to run virtually indefinitely without context explosion.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .tc_compression import (
|
|
12
|
+
TCCompressor,
|
|
13
|
+
NoOpTCCompressor,
|
|
14
|
+
NaiveSizeTCCompressor,
|
|
15
|
+
get_tc_compressor,
|
|
16
|
+
)
|
|
17
|
+
from .rollout_compression import (
|
|
18
|
+
RolloutCompressor,
|
|
19
|
+
NoOpRolloutCompressor,
|
|
20
|
+
LRURolloutCompressor,
|
|
21
|
+
SlidingWindowRolloutCompressor,
|
|
22
|
+
get_rollout_compressor,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# TC Compression
|
|
27
|
+
"TCCompressor",
|
|
28
|
+
"NoOpTCCompressor",
|
|
29
|
+
"NaiveSizeTCCompressor",
|
|
30
|
+
"get_tc_compressor",
|
|
31
|
+
# Rollout Compression
|
|
32
|
+
"RolloutCompressor",
|
|
33
|
+
"NoOpRolloutCompressor",
|
|
34
|
+
"LRURolloutCompressor",
|
|
35
|
+
"SlidingWindowRolloutCompressor",
|
|
36
|
+
"get_rollout_compressor",
|
|
37
|
+
]
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rollout Compression - manages message history eviction for infinite-running agents.
|
|
3
|
+
|
|
4
|
+
As agents run, their conversation history grows. These compressors implement
|
|
5
|
+
different strategies for evicting old messages to keep context bounded.
|
|
6
|
+
|
|
7
|
+
Available compressors:
|
|
8
|
+
- NoOpRolloutCompressor: No eviction (context will eventually overflow)
|
|
9
|
+
- LRURolloutCompressor: Evict oldest messages, keeping system prompt
|
|
10
|
+
- SlidingWindowRolloutCompressor: Keep last N turns (user+assistant pairs)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class EvictionStats:
|
|
22
|
+
"""Statistics about message eviction."""
|
|
23
|
+
|
|
24
|
+
messages_before: int = 0
|
|
25
|
+
messages_after: int = 0
|
|
26
|
+
messages_evicted: int = 0
|
|
27
|
+
tokens_evicted_estimate: int = 0 # Rough estimate
|
|
28
|
+
eviction_triggered: bool = False
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict[str, Any]:
|
|
31
|
+
return {
|
|
32
|
+
"messages_before": self.messages_before,
|
|
33
|
+
"messages_after": self.messages_after,
|
|
34
|
+
"messages_evicted": self.messages_evicted,
|
|
35
|
+
"tokens_evicted_estimate": self.tokens_evicted_estimate,
|
|
36
|
+
"eviction_triggered": self.eviction_triggered,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RolloutCompressor(ABC):
|
|
41
|
+
"""
|
|
42
|
+
Abstract base class for rollout (message history) compression.
|
|
43
|
+
|
|
44
|
+
Subclasses implement different eviction strategies to keep the
|
|
45
|
+
conversation history bounded while preserving important context.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
name: str = "base"
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
|
|
52
|
+
"""
|
|
53
|
+
Compress message history, returning trimmed version and stats.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
messages: List of message dicts with 'role' and 'content' keys
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
(compressed_messages, eviction_stats)
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
def should_compress(self, messages: list[dict]) -> bool:
|
|
64
|
+
"""Check if compression is needed (subclasses may override)."""
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
def __repr__(self) -> str:
|
|
68
|
+
return f"{self.__class__.__name__}()"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class NoOpRolloutCompressor(RolloutCompressor):
|
|
72
|
+
"""
|
|
73
|
+
No-op compressor - keeps all messages.
|
|
74
|
+
|
|
75
|
+
Use this when you want to disable rollout compression and let the
|
|
76
|
+
context window naturally overflow (will error eventually).
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
name = "noop"
|
|
80
|
+
|
|
81
|
+
def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
|
|
82
|
+
"""Pass through unchanged."""
|
|
83
|
+
return messages, EvictionStats(
|
|
84
|
+
messages_before=len(messages),
|
|
85
|
+
messages_after=len(messages),
|
|
86
|
+
eviction_triggered=False,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class LRURolloutCompressor(RolloutCompressor):
|
|
91
|
+
"""
|
|
92
|
+
LRU (Least Recently Used) compressor - evicts oldest messages.
|
|
93
|
+
|
|
94
|
+
Keeps the system prompt and the most recent messages. When the message
|
|
95
|
+
count exceeds max_messages, evicts oldest non-system messages.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
max_messages: Maximum messages to keep (default: 50)
|
|
99
|
+
preserve_system: Keep all system messages (default: True)
|
|
100
|
+
preserve_first_user: Keep first user message as context (default: True)
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
name = "lru"
|
|
104
|
+
|
|
105
|
+
def __init__(
|
|
106
|
+
self,
|
|
107
|
+
max_messages: int = 50,
|
|
108
|
+
preserve_system: bool = True,
|
|
109
|
+
preserve_first_user: bool = True,
|
|
110
|
+
):
|
|
111
|
+
self.max_messages = max_messages
|
|
112
|
+
self.preserve_system = preserve_system
|
|
113
|
+
self.preserve_first_user = preserve_first_user
|
|
114
|
+
|
|
115
|
+
def should_compress(self, messages: list[dict]) -> bool:
|
|
116
|
+
"""Only compress if we exceed max_messages."""
|
|
117
|
+
return len(messages) > self.max_messages
|
|
118
|
+
|
|
119
|
+
def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
|
|
120
|
+
"""Evict oldest messages, keeping system prompt and recent history."""
|
|
121
|
+
stats = EvictionStats(messages_before=len(messages))
|
|
122
|
+
|
|
123
|
+
if not self.should_compress(messages):
|
|
124
|
+
stats.messages_after = len(messages)
|
|
125
|
+
return messages, stats
|
|
126
|
+
|
|
127
|
+
# Separate preserved messages from evictable ones
|
|
128
|
+
preserved = []
|
|
129
|
+
evictable = []
|
|
130
|
+
|
|
131
|
+
first_user_seen = False
|
|
132
|
+
for i, msg in enumerate(messages):
|
|
133
|
+
role = msg.get("role", "")
|
|
134
|
+
|
|
135
|
+
# Always preserve system messages
|
|
136
|
+
if self.preserve_system and role == "system":
|
|
137
|
+
preserved.append((i, msg))
|
|
138
|
+
# Preserve first user message as task context
|
|
139
|
+
elif self.preserve_first_user and role == "user" and not first_user_seen:
|
|
140
|
+
preserved.append((i, msg))
|
|
141
|
+
first_user_seen = True
|
|
142
|
+
else:
|
|
143
|
+
evictable.append((i, msg))
|
|
144
|
+
|
|
145
|
+
# Calculate how many evictable messages to keep
|
|
146
|
+
preserved_count = len(preserved)
|
|
147
|
+
keep_count = max(0, self.max_messages - preserved_count)
|
|
148
|
+
|
|
149
|
+
# Keep the most recent evictable messages
|
|
150
|
+
kept_evictable = evictable[-keep_count:] if keep_count > 0 else []
|
|
151
|
+
evicted = evictable[:-keep_count] if keep_count > 0 and len(evictable) > keep_count else []
|
|
152
|
+
|
|
153
|
+
# Merge preserved and kept messages, maintaining original order
|
|
154
|
+
all_kept = preserved + kept_evictable
|
|
155
|
+
all_kept.sort(key=lambda x: x[0]) # Sort by original index
|
|
156
|
+
result = [msg for _, msg in all_kept]
|
|
157
|
+
|
|
158
|
+
# Estimate tokens evicted (rough: ~4 chars per token)
|
|
159
|
+
evicted_content = sum(len(str(msg.get("content", ""))) for _, msg in evicted)
|
|
160
|
+
tokens_evicted = evicted_content // 4
|
|
161
|
+
|
|
162
|
+
stats.messages_after = len(result)
|
|
163
|
+
stats.messages_evicted = len(evicted)
|
|
164
|
+
stats.tokens_evicted_estimate = tokens_evicted
|
|
165
|
+
stats.eviction_triggered = len(evicted) > 0
|
|
166
|
+
|
|
167
|
+
return result, stats
|
|
168
|
+
|
|
169
|
+
def __repr__(self) -> str:
|
|
170
|
+
return f"LRURolloutCompressor(max_messages={self.max_messages})"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class SlidingWindowRolloutCompressor(RolloutCompressor):
|
|
174
|
+
"""
|
|
175
|
+
Sliding window compressor - keeps last N turns (user+assistant pairs).
|
|
176
|
+
|
|
177
|
+
A "turn" is a user message followed by an assistant response. This
|
|
178
|
+
preserves conversation coherence better than raw message count.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
max_turns: Maximum turns to keep (default: 20)
|
|
182
|
+
preserve_system: Keep all system messages (default: True)
|
|
183
|
+
preserve_first_turn: Keep first turn as context (default: True)
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
name = "sliding_window"
|
|
187
|
+
|
|
188
|
+
def __init__(
|
|
189
|
+
self,
|
|
190
|
+
max_turns: int = 20,
|
|
191
|
+
preserve_system: bool = True,
|
|
192
|
+
preserve_first_turn: bool = True,
|
|
193
|
+
):
|
|
194
|
+
self.max_turns = max_turns
|
|
195
|
+
self.preserve_system = preserve_system
|
|
196
|
+
self.preserve_first_turn = preserve_first_turn
|
|
197
|
+
|
|
198
|
+
def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
|
|
199
|
+
"""Keep last N turns, preserving system messages."""
|
|
200
|
+
stats = EvictionStats(messages_before=len(messages))
|
|
201
|
+
|
|
202
|
+
# Extract system messages
|
|
203
|
+
system_messages = []
|
|
204
|
+
conversation = []
|
|
205
|
+
|
|
206
|
+
for msg in messages:
|
|
207
|
+
if msg.get("role") == "system":
|
|
208
|
+
system_messages.append(msg)
|
|
209
|
+
else:
|
|
210
|
+
conversation.append(msg)
|
|
211
|
+
|
|
212
|
+
# Group conversation into turns (user + assistant + tool results)
|
|
213
|
+
turns: list[list[dict]] = []
|
|
214
|
+
current_turn: list[dict] = []
|
|
215
|
+
|
|
216
|
+
for msg in conversation:
|
|
217
|
+
role = msg.get("role", "")
|
|
218
|
+
if role == "user" and current_turn:
|
|
219
|
+
# New user message starts a new turn
|
|
220
|
+
turns.append(current_turn)
|
|
221
|
+
current_turn = [msg]
|
|
222
|
+
else:
|
|
223
|
+
current_turn.append(msg)
|
|
224
|
+
|
|
225
|
+
# Don't forget the last turn
|
|
226
|
+
if current_turn:
|
|
227
|
+
turns.append(current_turn)
|
|
228
|
+
|
|
229
|
+
# Decide which turns to keep
|
|
230
|
+
if len(turns) <= self.max_turns:
|
|
231
|
+
# No eviction needed
|
|
232
|
+
result = system_messages + conversation
|
|
233
|
+
stats.messages_after = len(result)
|
|
234
|
+
return result, stats
|
|
235
|
+
|
|
236
|
+
# Keep first turn + last (max_turns - 1) turns
|
|
237
|
+
kept_turns = []
|
|
238
|
+
if self.preserve_first_turn and turns:
|
|
239
|
+
kept_turns.append(turns[0])
|
|
240
|
+
remaining_turns = turns[1:]
|
|
241
|
+
kept_turns.extend(remaining_turns[-(self.max_turns - 1):])
|
|
242
|
+
else:
|
|
243
|
+
kept_turns = turns[-self.max_turns:]
|
|
244
|
+
|
|
245
|
+
# Flatten kept turns back into messages
|
|
246
|
+
kept_conversation = []
|
|
247
|
+
for turn in kept_turns:
|
|
248
|
+
kept_conversation.extend(turn)
|
|
249
|
+
|
|
250
|
+
result = system_messages + kept_conversation
|
|
251
|
+
|
|
252
|
+
# Calculate eviction stats
|
|
253
|
+
evicted_count = len(messages) - len(result)
|
|
254
|
+
stats.messages_after = len(result)
|
|
255
|
+
stats.messages_evicted = evicted_count
|
|
256
|
+
stats.eviction_triggered = evicted_count > 0
|
|
257
|
+
|
|
258
|
+
return result, stats
|
|
259
|
+
|
|
260
|
+
def __repr__(self) -> str:
|
|
261
|
+
return f"SlidingWindowRolloutCompressor(max_turns={self.max_turns})"
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# =============================================================================
|
|
265
|
+
# Factory
|
|
266
|
+
# =============================================================================
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def get_rollout_compressor(
|
|
270
|
+
name: str = "lru",
|
|
271
|
+
**kwargs,
|
|
272
|
+
) -> RolloutCompressor:
|
|
273
|
+
"""
|
|
274
|
+
Get a rollout compressor by name.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
name: Compressor name ("noop", "lru", "sliding_window")
|
|
278
|
+
**kwargs: Passed to compressor constructor
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Configured RolloutCompressor instance
|
|
282
|
+
"""
|
|
283
|
+
compressors = {
|
|
284
|
+
"noop": NoOpRolloutCompressor,
|
|
285
|
+
"lru": LRURolloutCompressor,
|
|
286
|
+
"sliding_window": SlidingWindowRolloutCompressor,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if name not in compressors:
|
|
290
|
+
raise ValueError(f"Unknown rollout compressor: {name}. Available: {list(compressors.keys())}")
|
|
291
|
+
|
|
292
|
+
return compressors[name](**kwargs)
|