zwarm 3.10.2__tar.gz → 3.10.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {zwarm-3.10.2 → zwarm-3.10.5}/PKG-INFO +22 -15
  2. {zwarm-3.10.2 → zwarm-3.10.5}/README.md +21 -14
  3. {zwarm-3.10.2 → zwarm-3.10.5}/pyproject.toml +1 -1
  4. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/interactive.py +2 -2
  5. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/main.py +3 -5
  6. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/pilot.py +5 -13
  7. zwarm-3.10.5/src/zwarm/compression/__init__.py +37 -0
  8. zwarm-3.10.5/src/zwarm/compression/rollout_compression.py +292 -0
  9. zwarm-3.10.5/src/zwarm/compression/tc_compression.py +165 -0
  10. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/config.py +33 -6
  11. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/registry.py +2 -20
  12. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/orchestrator.py +43 -0
  13. zwarm-3.10.5/src/zwarm/prompts/orchestrator.py +214 -0
  14. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/prompts/pilot.py +15 -11
  15. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/manager.py +2 -2
  16. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/tools/delegation.py +86 -94
  17. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/llm_watcher.py +1 -1
  18. zwarm-3.10.2/src/zwarm/prompts/orchestrator.py +0 -253
  19. {zwarm-3.10.2 → zwarm-3.10.5}/.gitignore +0 -0
  20. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/__init__.py +0 -0
  21. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/cli/__init__.py +0 -0
  22. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/__init__.py +0 -0
  23. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/checkpoints.py +0 -0
  24. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/compact.py +0 -0
  25. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/costs.py +0 -0
  26. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/environment.py +0 -0
  27. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/models.py +0 -0
  28. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/state.py +0 -0
  29. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/test_compact.py +0 -0
  30. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/test_config.py +0 -0
  31. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/core/test_models.py +0 -0
  32. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/prompts/__init__.py +0 -0
  33. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/__init__.py +0 -0
  34. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/base.py +0 -0
  35. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/sessions/claude.py +0 -0
  36. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/test_orchestrator_watchers.py +0 -0
  37. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/tools/__init__.py +0 -0
  38. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/__init__.py +0 -0
  39. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/base.py +0 -0
  40. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/builtin.py +0 -0
  41. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/manager.py +0 -0
  42. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/registry.py +0 -0
  43. {zwarm-3.10.2 → zwarm-3.10.5}/src/zwarm/watchers/test_watchers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 3.10.2
3
+ Version: 3.10.5
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: prompt-toolkit>=3.0.52
@@ -87,14 +87,18 @@ Want a 3-minute walkthrough? See `docs/DEMO.md` for a pilot + interactive demo.
87
87
 
88
88
  ## Multi-Adapter Support
89
89
 
90
- zwarm supports multiple executor backends:
90
+ zwarm supports multiple executor backends with simple model shortcuts:
91
91
 
92
- | Adapter | CLI | Models | Config |
93
- |---------|-----|--------|--------|
94
- | **Codex** | `codex` | gpt-5.1-codex-mini, etc. | `.zwarm/codex.toml` |
95
- | **Claude** | `claude` | sonnet, opus, haiku | `.zwarm/claude.toml` |
92
+ | Model | Alias | Description |
93
+ |-------|-------|-------------|
94
+ | `gpt-5.2-codex` | `5.2` | GPT-5.2 Codex - fast, great for code (default) |
95
+ | `gpt-5.2` | `5.2-think` | GPT-5.2 with extended reasoning |
96
+ | `sonnet` | - | Claude Sonnet - balanced |
97
+ | `opus` | - | Claude Opus - most capable |
96
98
 
97
- You can mix adapters in the same session - for example, use Claude Opus for complex reasoning tasks and Codex Mini for quick edits.
99
+ **Adapter is auto-detected from model name** - just use `model="opus"` and zwarm handles the rest.
100
+
101
+ Mix models freely - use Opus for complex reasoning, 5.2 for quick edits.
98
102
 
99
103
  ---
100
104
 
@@ -184,7 +188,7 @@ zwarm interactive
184
188
 
185
189
  | Command | Description |
186
190
  |---------|-------------|
187
- | `spawn "task" [--search]` | Start a new session (--search enables web) |
191
+ | `spawn "task" [--model M]` | Start a new session (model: 5.2, opus, sonnet) |
188
192
  | `ls` | Dashboard of all sessions (with costs, models) |
189
193
  | `? ID` / `peek ID` | Quick status check |
190
194
  | `show ID` | Full session details |
@@ -213,8 +217,8 @@ $ zwarm interactive
213
217
  ⟳ 2 running
214
218
 
215
219
  ID │ │ Task │ Model │ Tokens │ Cost
216
- abc123 │ ⟳ │ Add tests for the auth... │ codex-mini │ 5,234 │ $0.052
217
- def456 │ ⟳ │ Fix type errors in utils... │ codex-mini │ 2,100 │ $0.021
220
+ abc123 │ ⟳ │ Add tests for the auth... │ 5.2-codex │ 5,234 │ $0.052
221
+ def456 │ ⟳ │ Fix type errors in utils... │ 5.2-codex │ 2,100 │ $0.021
218
222
 
219
223
  > watch abc123
220
224
  Watching abc123... (Ctrl+C to stop)
@@ -254,17 +258,20 @@ The orchestrator LLM has access to:
254
258
 
255
259
  | Tool | Description |
256
260
  |------|-------------|
257
- | `delegate(task, adapter="codex")` | Start a new coding session |
261
+ | `delegate(task, model="5.2")` | Start a new coding session |
258
262
  | `converse(id, msg)` | Continue a session |
259
263
  | `check_session(id)` | Get full session details |
260
264
  | `peek_session(id)` | Quick status check |
265
+ | `get_trajectory(id)` | See what steps the agent took |
261
266
  | `list_sessions()` | List all sessions |
262
267
  | `end_session(id)` | Kill/delete a session |
263
268
  | `sleep(seconds)` | Wait before checking again |
269
+ | `bash(cmd)` | Run verification commands (tests, linters) |
270
+ | `exit()` | Signal task completion |
264
271
 
265
272
  **Async-first**: All sessions run in the background. The orchestrator uses `sleep()` to wait, then checks on progress.
266
273
 
267
- **Multi-adapter**: Pass `adapter="claude"` or `adapter="codex"` to `delegate()` to choose the backend.
274
+ **Model shortcuts**: Just use `model="5.2"` or `model="opus"` - the adapter is auto-detected.
268
275
 
269
276
  **Web Search**: Enable `web_search=True` in config for tasks needing current info (API docs, latest releases, etc.).
270
277
 
@@ -361,14 +368,14 @@ enabled = ["progress", "budget", "delegation", "delegation_reminder"]
361
368
 
362
369
  **`.zwarm/codex.toml`** - Controls the Codex CLI:
363
370
  ```toml
364
- model = "gpt-5.1-codex-mini"
371
+ model = "gpt-5.2-codex" # or gpt-5.2 for extended reasoning
365
372
  model_reasoning_effort = "high" # low | medium | high
366
- full_auto = true
373
+ full_danger = true # Skip approval prompts
367
374
  ```
368
375
 
369
376
  **`.zwarm/claude.toml`** - Controls the Claude Code CLI:
370
377
  ```toml
371
- model = "sonnet" # sonnet | opus | haiku
378
+ model = "opus" # opus | sonnet
372
379
  full_danger = true # Skip permission prompts
373
380
  ```
374
381
 
@@ -73,14 +73,18 @@ Want a 3-minute walkthrough? See `docs/DEMO.md` for a pilot + interactive demo.
73
73
 
74
74
  ## Multi-Adapter Support
75
75
 
76
- zwarm supports multiple executor backends:
76
+ zwarm supports multiple executor backends with simple model shortcuts:
77
77
 
78
- | Adapter | CLI | Models | Config |
79
- |---------|-----|--------|--------|
80
- | **Codex** | `codex` | gpt-5.1-codex-mini, etc. | `.zwarm/codex.toml` |
81
- | **Claude** | `claude` | sonnet, opus, haiku | `.zwarm/claude.toml` |
78
+ | Model | Alias | Description |
79
+ |-------|-------|-------------|
80
+ | `gpt-5.2-codex` | `5.2` | GPT-5.2 Codex - fast, great for code (default) |
81
+ | `gpt-5.2` | `5.2-think` | GPT-5.2 with extended reasoning |
82
+ | `sonnet` | - | Claude Sonnet - balanced |
83
+ | `opus` | - | Claude Opus - most capable |
82
84
 
83
- You can mix adapters in the same session - for example, use Claude Opus for complex reasoning tasks and Codex Mini for quick edits.
85
+ **Adapter is auto-detected from model name** - just use `model="opus"` and zwarm handles the rest.
86
+
87
+ Mix models freely - use Opus for complex reasoning, 5.2 for quick edits.
84
88
 
85
89
  ---
86
90
 
@@ -170,7 +174,7 @@ zwarm interactive
170
174
 
171
175
  | Command | Description |
172
176
  |---------|-------------|
173
- | `spawn "task" [--search]` | Start a new session (--search enables web) |
177
+ | `spawn "task" [--model M]` | Start a new session (model: 5.2, opus, sonnet) |
174
178
  | `ls` | Dashboard of all sessions (with costs, models) |
175
179
  | `? ID` / `peek ID` | Quick status check |
176
180
  | `show ID` | Full session details |
@@ -199,8 +203,8 @@ $ zwarm interactive
199
203
  ⟳ 2 running
200
204
 
201
205
  ID │ │ Task │ Model │ Tokens │ Cost
202
- abc123 │ ⟳ │ Add tests for the auth... │ codex-mini │ 5,234 │ $0.052
203
- def456 │ ⟳ │ Fix type errors in utils... │ codex-mini │ 2,100 │ $0.021
206
+ abc123 │ ⟳ │ Add tests for the auth... │ 5.2-codex │ 5,234 │ $0.052
207
+ def456 │ ⟳ │ Fix type errors in utils... │ 5.2-codex │ 2,100 │ $0.021
204
208
 
205
209
  > watch abc123
206
210
  Watching abc123... (Ctrl+C to stop)
@@ -240,17 +244,20 @@ The orchestrator LLM has access to:
240
244
 
241
245
  | Tool | Description |
242
246
  |------|-------------|
243
- | `delegate(task, adapter="codex")` | Start a new coding session |
247
+ | `delegate(task, model="5.2")` | Start a new coding session |
244
248
  | `converse(id, msg)` | Continue a session |
245
249
  | `check_session(id)` | Get full session details |
246
250
  | `peek_session(id)` | Quick status check |
251
+ | `get_trajectory(id)` | See what steps the agent took |
247
252
  | `list_sessions()` | List all sessions |
248
253
  | `end_session(id)` | Kill/delete a session |
249
254
  | `sleep(seconds)` | Wait before checking again |
255
+ | `bash(cmd)` | Run verification commands (tests, linters) |
256
+ | `exit()` | Signal task completion |
250
257
 
251
258
  **Async-first**: All sessions run in the background. The orchestrator uses `sleep()` to wait, then checks on progress.
252
259
 
253
- **Multi-adapter**: Pass `adapter="claude"` or `adapter="codex"` to `delegate()` to choose the backend.
260
+ **Model shortcuts**: Just use `model="5.2"` or `model="opus"` - the adapter is auto-detected.
254
261
 
255
262
  **Web Search**: Enable `web_search=True` in config for tasks needing current info (API docs, latest releases, etc.).
256
263
 
@@ -347,14 +354,14 @@ enabled = ["progress", "budget", "delegation", "delegation_reminder"]
347
354
 
348
355
  **`.zwarm/codex.toml`** - Controls the Codex CLI:
349
356
  ```toml
350
- model = "gpt-5.1-codex-mini"
357
+ model = "gpt-5.2-codex" # or gpt-5.2 for extended reasoning
351
358
  model_reasoning_effort = "high" # low | medium | high
352
- full_auto = true
359
+ full_danger = true # Skip approval prompts
353
360
  ```
354
361
 
355
362
  **`.zwarm/claude.toml`** - Controls the Claude Code CLI:
356
363
  ```toml
357
- model = "sonnet" # sonnet | opus | haiku
364
+ model = "opus" # opus | sonnet
358
365
  full_danger = true # Skip permission prompts
359
366
  ```
360
367
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zwarm"
3
- version = "3.10.2"
3
+ version = "3.10.5"
4
4
  description = "Multi-Agent CLI Orchestration Research Platform"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13,<3.14"
@@ -269,10 +269,10 @@ def cmd_ls(manager):
269
269
  task_preview = s.task[:23] + "..." if len(s.task) > 26 else s.task
270
270
  updated = time_ago(s.updated_at)
271
271
 
272
- # Short model name (e.g., "gpt-5.1-codex-mini" -> "codex-mini")
272
+ # Short model name (e.g., "gpt-5.2-codex" -> "5.2-codex")
273
273
  model_short = s.model or "?"
274
274
  if "codex" in model_short.lower():
275
- # Extract codex variant: gpt-5.1-codex-mini -> codex-mini
275
+ # Extract codex variant: gpt-5.2-codex -> 5.2-codex
276
276
  parts = model_short.split("-")
277
277
  codex_idx = next((i for i, p in enumerate(parts) if "codex" in p.lower()), -1)
278
278
  if codex_idx >= 0:
@@ -838,19 +838,17 @@ def init(
838
838
  console.print(" [dim]These control the underlying Codex CLI that runs executor sessions[/]\n")
839
839
 
840
840
  console.print(" Available models:")
841
- console.print(" [cyan]1[/] gpt-5.2-codex [dim]- GPT-5.2 Codex, balanced (Recommended)[/]")
841
+ console.print(" [cyan]1[/] gpt-5.2-codex [dim]- GPT-5.2 Codex, fast and balanced (Recommended)[/]")
842
842
  console.print(" [cyan]2[/] gpt-5.2 [dim]- GPT-5.2 with extended reasoning[/]")
843
- console.print(" [cyan]3[/] gpt-5.1-codex [dim]- GPT-5.1 Codex (legacy)[/]")
844
843
 
845
844
  model_choice = typer.prompt(
846
- " Select model (1-3)",
845
+ " Select model (1-2)",
847
846
  default="1",
848
847
  type=str,
849
848
  )
850
849
  model_map = {
851
850
  "1": "gpt-5.2-codex",
852
851
  "2": "gpt-5.2",
853
- "3": "gpt-5.1-codex",
854
852
  }
855
853
  codex_model = model_map.get(model_choice, model_choice)
856
854
  if model_choice not in model_map:
@@ -1668,7 +1666,7 @@ def session_start(
1668
1666
  $ zwarm session start "Fix the bug in auth.py"
1669
1667
 
1670
1668
  [dim]# With specific model[/]
1671
- $ zwarm session start "Refactor the API" --model gpt-5.1-codex-max
1669
+ $ zwarm session start "Refactor the API" --model gpt-5.2-codex
1672
1670
 
1673
1671
  [dim]# Web search is always available[/]
1674
1672
  $ zwarm session start "Research latest OAuth2 best practices"
@@ -83,22 +83,14 @@ class ChoogingSpinner:
83
83
  # Context window sizes for different models (in tokens)
84
84
  # These are for the ORCHESTRATOR LLM, not the executors
85
85
  MODEL_CONTEXT_WINDOWS = {
86
- # OpenAI models
86
+ # OpenAI models (via Codex CLI)
87
87
  "gpt-5.2-codex": 200_000,
88
88
  "gpt-5.2": 200_000,
89
- "gpt-5.1-codex": 200_000,
90
- "gpt-5.1-codex-mini": 200_000,
91
- "gpt-5": 200_000,
92
- "gpt-5-mini": 200_000,
93
- "o3": 200_000,
94
- "o3-mini": 200_000,
95
- # Claude models (if used as orchestrator)
96
- "claude-sonnet": 200_000,
97
- "claude-opus": 200_000,
98
- "claude-haiku": 200_000,
89
+ # Claude models (via Claude CLI)
99
90
  "sonnet": 200_000,
100
91
  "opus": 200_000,
101
- "haiku": 200_000,
92
+ "claude-sonnet": 200_000,
93
+ "claude-opus": 200_000,
102
94
  # Fallback
103
95
  "default": 128_000,
104
96
  }
@@ -1080,7 +1072,7 @@ def _run_pilot_repl(
1080
1072
  renderer.status("")
1081
1073
 
1082
1074
  # Get model from orchestrator if available
1083
- model = "gpt-5.1-codex" # Default
1075
+ model = "gpt-5.2-codex" # Default
1084
1076
  if hasattr(orchestrator, "lm") and hasattr(orchestrator.lm, "model"):
1085
1077
  model = orchestrator.lm.model
1086
1078
  elif hasattr(orchestrator, "config"):
@@ -0,0 +1,37 @@
1
+ """
2
+ Compression modules for infinite-running agents.
3
+
4
+ Two types of compression:
5
+ 1. TC (Tool Call) Compression - compresses tool call results before they enter context
6
+ 2. Rollout Compression - manages message history eviction (LRU-style)
7
+
8
+ These modules allow agents to run virtually indefinitely without context explosion.
9
+ """
10
+
11
+ from .tc_compression import (
12
+ TCCompressor,
13
+ NoOpTCCompressor,
14
+ NaiveSizeTCCompressor,
15
+ get_tc_compressor,
16
+ )
17
+ from .rollout_compression import (
18
+ RolloutCompressor,
19
+ NoOpRolloutCompressor,
20
+ LRURolloutCompressor,
21
+ SlidingWindowRolloutCompressor,
22
+ get_rollout_compressor,
23
+ )
24
+
25
+ __all__ = [
26
+ # TC Compression
27
+ "TCCompressor",
28
+ "NoOpTCCompressor",
29
+ "NaiveSizeTCCompressor",
30
+ "get_tc_compressor",
31
+ # Rollout Compression
32
+ "RolloutCompressor",
33
+ "NoOpRolloutCompressor",
34
+ "LRURolloutCompressor",
35
+ "SlidingWindowRolloutCompressor",
36
+ "get_rollout_compressor",
37
+ ]
@@ -0,0 +1,292 @@
1
+ """
2
+ Rollout Compression - manages message history eviction for infinite-running agents.
3
+
4
+ As agents run, their conversation history grows. These compressors implement
5
+ different strategies for evicting old messages to keep context bounded.
6
+
7
+ Available compressors:
8
+ - NoOpRolloutCompressor: No eviction (context will eventually overflow)
9
+ - LRURolloutCompressor: Evict oldest messages, keeping system prompt
10
+ - SlidingWindowRolloutCompressor: Keep last N turns (user+assistant pairs)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from abc import ABC, abstractmethod
16
+ from dataclasses import dataclass, field
17
+ from typing import Any
18
+
19
+
20
+ @dataclass
21
+ class EvictionStats:
22
+ """Statistics about message eviction."""
23
+
24
+ messages_before: int = 0
25
+ messages_after: int = 0
26
+ messages_evicted: int = 0
27
+ tokens_evicted_estimate: int = 0 # Rough estimate
28
+ eviction_triggered: bool = False
29
+
30
+ def to_dict(self) -> dict[str, Any]:
31
+ return {
32
+ "messages_before": self.messages_before,
33
+ "messages_after": self.messages_after,
34
+ "messages_evicted": self.messages_evicted,
35
+ "tokens_evicted_estimate": self.tokens_evicted_estimate,
36
+ "eviction_triggered": self.eviction_triggered,
37
+ }
38
+
39
+
40
+ class RolloutCompressor(ABC):
41
+ """
42
+ Abstract base class for rollout (message history) compression.
43
+
44
+ Subclasses implement different eviction strategies to keep the
45
+ conversation history bounded while preserving important context.
46
+ """
47
+
48
+ name: str = "base"
49
+
50
+ @abstractmethod
51
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
52
+ """
53
+ Compress message history, returning trimmed version and stats.
54
+
55
+ Args:
56
+ messages: List of message dicts with 'role' and 'content' keys
57
+
58
+ Returns:
59
+ (compressed_messages, eviction_stats)
60
+ """
61
+ pass
62
+
63
+ def should_compress(self, messages: list[dict]) -> bool:
64
+ """Check if compression is needed (subclasses may override)."""
65
+ return True
66
+
67
+ def __repr__(self) -> str:
68
+ return f"{self.__class__.__name__}()"
69
+
70
+
71
+ class NoOpRolloutCompressor(RolloutCompressor):
72
+ """
73
+ No-op compressor - keeps all messages.
74
+
75
+ Use this when you want to disable rollout compression and let the
76
+ context window naturally overflow (will error eventually).
77
+ """
78
+
79
+ name = "noop"
80
+
81
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
82
+ """Pass through unchanged."""
83
+ return messages, EvictionStats(
84
+ messages_before=len(messages),
85
+ messages_after=len(messages),
86
+ eviction_triggered=False,
87
+ )
88
+
89
+
90
+ class LRURolloutCompressor(RolloutCompressor):
91
+ """
92
+ LRU (Least Recently Used) compressor - evicts oldest messages.
93
+
94
+ Keeps the system prompt and the most recent messages. When the message
95
+ count exceeds max_messages, evicts oldest non-system messages.
96
+
97
+ Args:
98
+ max_messages: Maximum messages to keep (default: 50)
99
+ preserve_system: Keep all system messages (default: True)
100
+ preserve_first_user: Keep first user message as context (default: True)
101
+ """
102
+
103
+ name = "lru"
104
+
105
+ def __init__(
106
+ self,
107
+ max_messages: int = 50,
108
+ preserve_system: bool = True,
109
+ preserve_first_user: bool = True,
110
+ ):
111
+ self.max_messages = max_messages
112
+ self.preserve_system = preserve_system
113
+ self.preserve_first_user = preserve_first_user
114
+
115
+ def should_compress(self, messages: list[dict]) -> bool:
116
+ """Only compress if we exceed max_messages."""
117
+ return len(messages) > self.max_messages
118
+
119
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
120
+ """Evict oldest messages, keeping system prompt and recent history."""
121
+ stats = EvictionStats(messages_before=len(messages))
122
+
123
+ if not self.should_compress(messages):
124
+ stats.messages_after = len(messages)
125
+ return messages, stats
126
+
127
+ # Separate preserved messages from evictable ones
128
+ preserved = []
129
+ evictable = []
130
+
131
+ first_user_seen = False
132
+ for i, msg in enumerate(messages):
133
+ role = msg.get("role", "")
134
+
135
+ # Always preserve system messages
136
+ if self.preserve_system and role == "system":
137
+ preserved.append((i, msg))
138
+ # Preserve first user message as task context
139
+ elif self.preserve_first_user and role == "user" and not first_user_seen:
140
+ preserved.append((i, msg))
141
+ first_user_seen = True
142
+ else:
143
+ evictable.append((i, msg))
144
+
145
+ # Calculate how many evictable messages to keep
146
+ preserved_count = len(preserved)
147
+ keep_count = max(0, self.max_messages - preserved_count)
148
+
149
+ # Keep the most recent evictable messages
150
+ kept_evictable = evictable[-keep_count:] if keep_count > 0 else []
151
+ evicted = evictable[:-keep_count] if keep_count > 0 and len(evictable) > keep_count else []
152
+
153
+ # Merge preserved and kept messages, maintaining original order
154
+ all_kept = preserved + kept_evictable
155
+ all_kept.sort(key=lambda x: x[0]) # Sort by original index
156
+ result = [msg for _, msg in all_kept]
157
+
158
+ # Estimate tokens evicted (rough: ~4 chars per token)
159
+ evicted_content = sum(len(str(msg.get("content", ""))) for _, msg in evicted)
160
+ tokens_evicted = evicted_content // 4
161
+
162
+ stats.messages_after = len(result)
163
+ stats.messages_evicted = len(evicted)
164
+ stats.tokens_evicted_estimate = tokens_evicted
165
+ stats.eviction_triggered = len(evicted) > 0
166
+
167
+ return result, stats
168
+
169
+ def __repr__(self) -> str:
170
+ return f"LRURolloutCompressor(max_messages={self.max_messages})"
171
+
172
+
173
+ class SlidingWindowRolloutCompressor(RolloutCompressor):
174
+ """
175
+ Sliding window compressor - keeps last N turns (user+assistant pairs).
176
+
177
+ A "turn" is a user message followed by an assistant response. This
178
+ preserves conversation coherence better than raw message count.
179
+
180
+ Args:
181
+ max_turns: Maximum turns to keep (default: 20)
182
+ preserve_system: Keep all system messages (default: True)
183
+ preserve_first_turn: Keep first turn as context (default: True)
184
+ """
185
+
186
+ name = "sliding_window"
187
+
188
+ def __init__(
189
+ self,
190
+ max_turns: int = 20,
191
+ preserve_system: bool = True,
192
+ preserve_first_turn: bool = True,
193
+ ):
194
+ self.max_turns = max_turns
195
+ self.preserve_system = preserve_system
196
+ self.preserve_first_turn = preserve_first_turn
197
+
198
+ def compress(self, messages: list[dict]) -> tuple[list[dict], EvictionStats]:
199
+ """Keep last N turns, preserving system messages."""
200
+ stats = EvictionStats(messages_before=len(messages))
201
+
202
+ # Extract system messages
203
+ system_messages = []
204
+ conversation = []
205
+
206
+ for msg in messages:
207
+ if msg.get("role") == "system":
208
+ system_messages.append(msg)
209
+ else:
210
+ conversation.append(msg)
211
+
212
+ # Group conversation into turns (user + assistant + tool results)
213
+ turns: list[list[dict]] = []
214
+ current_turn: list[dict] = []
215
+
216
+ for msg in conversation:
217
+ role = msg.get("role", "")
218
+ if role == "user" and current_turn:
219
+ # New user message starts a new turn
220
+ turns.append(current_turn)
221
+ current_turn = [msg]
222
+ else:
223
+ current_turn.append(msg)
224
+
225
+ # Don't forget the last turn
226
+ if current_turn:
227
+ turns.append(current_turn)
228
+
229
+ # Decide which turns to keep
230
+ if len(turns) <= self.max_turns:
231
+ # No eviction needed
232
+ result = system_messages + conversation
233
+ stats.messages_after = len(result)
234
+ return result, stats
235
+
236
+ # Keep first turn + last (max_turns - 1) turns
237
+ kept_turns = []
238
+ if self.preserve_first_turn and turns:
239
+ kept_turns.append(turns[0])
240
+ remaining_turns = turns[1:]
241
+ kept_turns.extend(remaining_turns[-(self.max_turns - 1):])
242
+ else:
243
+ kept_turns = turns[-self.max_turns:]
244
+
245
+ # Flatten kept turns back into messages
246
+ kept_conversation = []
247
+ for turn in kept_turns:
248
+ kept_conversation.extend(turn)
249
+
250
+ result = system_messages + kept_conversation
251
+
252
+ # Calculate eviction stats
253
+ evicted_count = len(messages) - len(result)
254
+ stats.messages_after = len(result)
255
+ stats.messages_evicted = evicted_count
256
+ stats.eviction_triggered = evicted_count > 0
257
+
258
+ return result, stats
259
+
260
+ def __repr__(self) -> str:
261
+ return f"SlidingWindowRolloutCompressor(max_turns={self.max_turns})"
262
+
263
+
264
+ # =============================================================================
265
+ # Factory
266
+ # =============================================================================
267
+
268
+
269
+ def get_rollout_compressor(
270
+ name: str = "lru",
271
+ **kwargs,
272
+ ) -> RolloutCompressor:
273
+ """
274
+ Get a rollout compressor by name.
275
+
276
+ Args:
277
+ name: Compressor name ("noop", "lru", "sliding_window")
278
+ **kwargs: Passed to compressor constructor
279
+
280
+ Returns:
281
+ Configured RolloutCompressor instance
282
+ """
283
+ compressors = {
284
+ "noop": NoOpRolloutCompressor,
285
+ "lru": LRURolloutCompressor,
286
+ "sliding_window": SlidingWindowRolloutCompressor,
287
+ }
288
+
289
+ if name not in compressors:
290
+ raise ValueError(f"Unknown rollout compressor: {name}. Available: {list(compressors.keys())}")
291
+
292
+ return compressors[name](**kwargs)