zwarm 0.1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ """Tests for the compact module."""
2
+
3
+ import pytest
4
+
5
+ from zwarm.core.compact import (
6
+ compact_messages,
7
+ estimate_tokens,
8
+ find_tool_groups,
9
+ should_compact,
10
+ )
11
+
12
+
13
+ class TestEstimateTokens:
14
+ def test_simple_messages(self):
15
+ """Estimate tokens for simple text messages."""
16
+ messages = [
17
+ {"role": "user", "content": "Hello world"}, # 11 chars
18
+ {"role": "assistant", "content": "Hi there!"}, # 9 chars
19
+ ]
20
+ # ~20 chars / 4 = ~5 tokens
21
+ tokens = estimate_tokens(messages)
22
+ assert tokens == 5
23
+
24
+ def test_empty_messages(self):
25
+ """Empty messages return 0 tokens."""
26
+ assert estimate_tokens([]) == 0
27
+
28
+ def test_messages_with_tool_calls(self):
29
+ """Tool calls add to token count."""
30
+ messages = [
31
+ {
32
+ "role": "assistant",
33
+ "content": "Let me check",
34
+ "tool_calls": [
35
+ {"function": {"name": "read", "arguments": '{"path": "/foo/bar"}'}}
36
+ ],
37
+ }
38
+ ]
39
+ tokens = estimate_tokens(messages)
40
+ assert tokens > 0
41
+
42
+
43
+ class TestFindToolGroups:
44
+ def test_no_tool_calls(self):
45
+ """No tool groups in simple conversation."""
46
+ messages = [
47
+ {"role": "system", "content": "You are helpful"},
48
+ {"role": "user", "content": "Hello"},
49
+ {"role": "assistant", "content": "Hi!"},
50
+ ]
51
+ groups = find_tool_groups(messages)
52
+ assert groups == []
53
+
54
+ def test_openai_format_tool_call(self):
55
+ """Detect OpenAI-style tool call groups."""
56
+ messages = [
57
+ {"role": "system", "content": "System"},
58
+ {"role": "user", "content": "Read file"},
59
+ {
60
+ "role": "assistant",
61
+ "content": "Reading...",
62
+ "tool_calls": [{"id": "tc1", "function": {"name": "read"}}],
63
+ },
64
+ {"role": "tool", "tool_call_id": "tc1", "content": "file contents"},
65
+ {"role": "assistant", "content": "Here's the file"},
66
+ ]
67
+ groups = find_tool_groups(messages)
68
+ assert groups == [(2, 3)] # Assistant with tool_calls + tool response
69
+
70
+ def test_multiple_tool_responses(self):
71
+ """Group includes all consecutive tool responses."""
72
+ messages = [
73
+ {"role": "user", "content": "Do things"},
74
+ {
75
+ "role": "assistant",
76
+ "tool_calls": [
77
+ {"id": "tc1", "function": {"name": "a"}},
78
+ {"id": "tc2", "function": {"name": "b"}},
79
+ ],
80
+ },
81
+ {"role": "tool", "tool_call_id": "tc1", "content": "result1"},
82
+ {"role": "tool", "tool_call_id": "tc2", "content": "result2"},
83
+ {"role": "assistant", "content": "Done"},
84
+ ]
85
+ groups = find_tool_groups(messages)
86
+ assert groups == [(1, 3)] # Indices 1, 2, 3 form one group
87
+
88
+ def test_anthropic_format_tool_use(self):
89
+ """Detect Anthropic-style tool_use content blocks."""
90
+ messages = [
91
+ {"role": "user", "content": "Read file"},
92
+ {
93
+ "role": "assistant",
94
+ "content": [
95
+ {"type": "text", "text": "Reading..."},
96
+ {"type": "tool_use", "id": "tu1", "name": "read", "input": {}},
97
+ ],
98
+ },
99
+ {
100
+ "role": "user",
101
+ "content": [
102
+ {"type": "tool_result", "tool_use_id": "tu1", "content": "data"},
103
+ ],
104
+ },
105
+ {"role": "assistant", "content": "Got it"},
106
+ ]
107
+ groups = find_tool_groups(messages)
108
+ assert groups == [(1, 2)] # Assistant with tool_use + user with tool_result
109
+
110
+
111
+ class TestCompactMessages:
112
+ def test_no_compaction_needed_few_messages(self):
113
+ """Don't compact if we have fewer messages than keep thresholds."""
114
+ messages = [
115
+ {"role": "system", "content": "System"},
116
+ {"role": "user", "content": "Task"},
117
+ {"role": "assistant", "content": "Response"},
118
+ ]
119
+ result = compact_messages(messages, keep_first_n=2, keep_last_n=2)
120
+ assert not result.was_compacted
121
+ assert result.messages == messages
122
+ assert "Too few" in result.preserved_reason
123
+
124
+ def test_compacts_middle_messages(self):
125
+ """Remove messages from the middle, keeping first and last."""
126
+ messages = [
127
+ {"role": "system", "content": "System"},
128
+ {"role": "user", "content": "Task"},
129
+ {"role": "assistant", "content": "Step 1"},
130
+ {"role": "user", "content": "Continue"},
131
+ {"role": "assistant", "content": "Step 2"},
132
+ {"role": "user", "content": "More"},
133
+ {"role": "assistant", "content": "Step 3"},
134
+ {"role": "user", "content": "Final"},
135
+ {"role": "assistant", "content": "Done"},
136
+ ]
137
+ result = compact_messages(messages, keep_first_n=2, keep_last_n=2)
138
+
139
+ assert result.was_compacted
140
+ assert result.removed_count > 0
141
+ # First 2 and last 2 should be preserved
142
+ assert result.messages[0]["content"] == "System"
143
+ assert result.messages[1]["content"] == "Task"
144
+ assert result.messages[-1]["content"] == "Done"
145
+ assert result.messages[-2]["content"] == "Final"
146
+
147
+ def test_preserves_tool_call_pairs(self):
148
+ """Never split tool call from its response."""
149
+ messages = [
150
+ {"role": "system", "content": "System"},
151
+ {"role": "user", "content": "Task"},
152
+ {"role": "assistant", "content": "Old message 1"},
153
+ {"role": "assistant", "content": "Old message 2"},
154
+ {
155
+ "role": "assistant",
156
+ "content": "Calling tool",
157
+ "tool_calls": [{"id": "tc1", "function": {"name": "test"}}],
158
+ },
159
+ {"role": "tool", "tool_call_id": "tc1", "content": "Tool result"},
160
+ {"role": "assistant", "content": "Recent 1"},
161
+ {"role": "user", "content": "Recent 2"},
162
+ ]
163
+ result = compact_messages(messages, keep_first_n=2, keep_last_n=2)
164
+
165
+ # The tool call pair should either both be kept or both removed
166
+ has_tool_call = any(m.get("tool_calls") for m in result.messages)
167
+ has_tool_response = any(m.get("role") == "tool" for m in result.messages)
168
+
169
+ # They should match - either both present or both absent
170
+ assert has_tool_call == has_tool_response
171
+
172
+ def test_adds_compaction_marker(self):
173
+ """Add a marker message when compaction occurs."""
174
+ messages = [
175
+ {"role": "system", "content": "System"},
176
+ {"role": "user", "content": "Task"},
177
+ ] + [{"role": "assistant", "content": f"Msg {i}"} for i in range(20)]
178
+
179
+ result = compact_messages(messages, keep_first_n=2, keep_last_n=3)
180
+
181
+ if result.was_compacted:
182
+ # Should have a system message about compaction
183
+ marker_msgs = [
184
+ m for m in result.messages
185
+ if m.get("role") == "system" and "compacted" in m.get("content", "").lower()
186
+ ]
187
+ assert len(marker_msgs) == 1
188
+
189
+ def test_token_based_compaction(self):
190
+ """Compact based on token threshold."""
191
+ # Create messages that exceed token limit
192
+ messages = [
193
+ {"role": "system", "content": "System prompt " * 100},
194
+ {"role": "user", "content": "Task " * 100},
195
+ ] + [
196
+ {"role": "assistant", "content": f"Response {i} " * 50}
197
+ for i in range(10)
198
+ ]
199
+
200
+ # Should not compact if under limit
201
+ result_under = compact_messages(messages, max_tokens=100000)
202
+ # Might or might not compact depending on estimate
203
+
204
+ # Should compact if over limit
205
+ result_over = compact_messages(messages, max_tokens=100, target_token_pct=0.5)
206
+ # With such a low limit, should definitely try to compact
207
+ assert result_over.original_count == len(messages)
208
+
209
+
210
+ class TestShouldCompact:
211
+ def test_under_threshold(self):
212
+ """Don't compact when under threshold."""
213
+ messages = [{"role": "user", "content": "Hello"}]
214
+ assert not should_compact(messages, max_tokens=1000, threshold_pct=0.85)
215
+
216
+ def test_over_threshold(self):
217
+ """Compact when over threshold."""
218
+ messages = [{"role": "user", "content": "x" * 4000}] # ~1000 tokens
219
+ assert should_compact(messages, max_tokens=500, threshold_pct=0.85)
220
+
221
+
222
+ class TestEdgeCases:
223
+ def test_all_tool_calls(self):
224
+ """Handle conversation that's mostly tool calls."""
225
+ messages = [
226
+ {"role": "system", "content": "System"},
227
+ {"role": "user", "content": "Task"},
228
+ ]
229
+ # Add many tool call pairs
230
+ for i in range(5):
231
+ messages.append({
232
+ "role": "assistant",
233
+ "tool_calls": [{"id": f"tc{i}", "function": {"name": "test"}}],
234
+ })
235
+ messages.append({"role": "tool", "tool_call_id": f"tc{i}", "content": f"result{i}"})
236
+
237
+ messages.append({"role": "assistant", "content": "Final"})
238
+
239
+ result = compact_messages(messages, keep_first_n=2, keep_last_n=1)
240
+
241
+ # Should still produce valid output
242
+ assert len(result.messages) > 0
243
+
244
+ # Check no orphaned tool calls
245
+ for i, msg in enumerate(result.messages):
246
+ if msg.get("tool_calls"):
247
+ # Next message should be a tool response
248
+ if i + 1 < len(result.messages):
249
+ # Either next is tool response, or this is at the end
250
+ pass # Structural validity checked by not raising
251
+
252
+ def test_empty_messages(self):
253
+ """Handle empty message list."""
254
+ result = compact_messages([])
255
+ assert result.messages == []
256
+ assert not result.was_compacted
257
+
258
+ def test_only_system_and_user(self):
259
+ """Handle minimal conversation."""
260
+ messages = [
261
+ {"role": "system", "content": "System"},
262
+ {"role": "user", "content": "Hello"},
263
+ ]
264
+ result = compact_messages(messages, keep_first_n=2, keep_last_n=2)
265
+ assert not result.was_compacted
266
+ assert result.messages == messages
267
+
268
+
269
+ class TestPydanticModelMessages:
270
+ """Test handling of Pydantic model messages (not just dicts)."""
271
+
272
+ def test_estimate_tokens_with_objects(self):
273
+ """estimate_tokens should handle objects with attributes."""
274
+ class MockMessage:
275
+ def __init__(self, role, content):
276
+ self.role = role
277
+ self.content = content
278
+
279
+ messages = [
280
+ MockMessage("user", "Hello world"),
281
+ MockMessage("assistant", "Hi there!"),
282
+ ]
283
+ tokens = estimate_tokens(messages)
284
+ assert tokens > 0
285
+
286
+ def test_should_compact_with_objects(self):
287
+ """should_compact should handle objects with attributes."""
288
+ class MockMessage:
289
+ def __init__(self, role, content):
290
+ self.role = role
291
+ self.content = content
292
+
293
+ messages = [MockMessage("user", "x" * 4000)]
294
+ # Should not crash
295
+ result = should_compact(messages, max_tokens=500, threshold_pct=0.85)
296
+ assert result is True
297
+
298
+ def test_find_tool_groups_with_objects(self):
299
+ """find_tool_groups should handle objects with attributes."""
300
+ class MockMessage:
301
+ def __init__(self, role, content=None, tool_calls=None):
302
+ self.role = role
303
+ self.content = content
304
+ self.tool_calls = tool_calls
305
+
306
+ messages = [
307
+ MockMessage("user", "Task"),
308
+ MockMessage("assistant", "Done"),
309
+ ]
310
+ # Should not crash
311
+ groups = find_tool_groups(messages)
312
+ assert groups == []