zwarm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/__init__.py +38 -0
- zwarm/adapters/__init__.py +0 -0
- zwarm/adapters/base.py +109 -0
- zwarm/adapters/claude_code.py +303 -0
- zwarm/adapters/codex_mcp.py +428 -0
- zwarm/adapters/test_codex_mcp.py +224 -0
- zwarm/cli/__init__.py +0 -0
- zwarm/cli/main.py +534 -0
- zwarm/core/__init__.py +0 -0
- zwarm/core/config.py +271 -0
- zwarm/core/environment.py +83 -0
- zwarm/core/models.py +299 -0
- zwarm/core/state.py +224 -0
- zwarm/core/test_config.py +160 -0
- zwarm/core/test_models.py +265 -0
- zwarm/orchestrator.py +405 -0
- zwarm/prompts/__init__.py +10 -0
- zwarm/prompts/orchestrator.py +214 -0
- zwarm/tools/__init__.py +17 -0
- zwarm/tools/delegation.py +357 -0
- zwarm/watchers/__init__.py +26 -0
- zwarm/watchers/base.py +131 -0
- zwarm/watchers/builtin.py +256 -0
- zwarm/watchers/manager.py +143 -0
- zwarm/watchers/registry.py +57 -0
- zwarm/watchers/test_watchers.py +195 -0
- zwarm-0.1.0.dist-info/METADATA +382 -0
- zwarm-0.1.0.dist-info/RECORD +30 -0
- zwarm-0.1.0.dist-info/WHEEL +4 -0
- zwarm-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Built-in watchers for common trajectory alignment needs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from zwarm.watchers.base import Watcher, WatcherContext, WatcherResult, WatcherAction
|
|
11
|
+
from zwarm.watchers.registry import register_watcher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@register_watcher("progress")
|
|
15
|
+
class ProgressWatcher(Watcher):
|
|
16
|
+
"""
|
|
17
|
+
Watches for lack of progress.
|
|
18
|
+
|
|
19
|
+
Detects when the agent appears stuck:
|
|
20
|
+
- Repeating same tool calls
|
|
21
|
+
- Not making session progress
|
|
22
|
+
- Spinning without completing tasks
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name = "progress"
|
|
26
|
+
description = "Detects when agent is stuck or spinning"
|
|
27
|
+
|
|
28
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
29
|
+
config = self.config
|
|
30
|
+
max_same_calls = config.get("max_same_calls", 3)
|
|
31
|
+
min_progress_steps = config.get("min_progress_steps", 5)
|
|
32
|
+
|
|
33
|
+
# Check for repeated tool calls
|
|
34
|
+
if len(ctx.messages) >= max_same_calls * 2:
|
|
35
|
+
recent_assistant = [
|
|
36
|
+
m for m in ctx.messages[-max_same_calls * 2 :]
|
|
37
|
+
if m.get("role") == "assistant"
|
|
38
|
+
]
|
|
39
|
+
if len(recent_assistant) >= max_same_calls:
|
|
40
|
+
# Check if tool calls are repeating
|
|
41
|
+
tool_calls = []
|
|
42
|
+
for msg in recent_assistant:
|
|
43
|
+
if "tool_calls" in msg:
|
|
44
|
+
for tc in msg["tool_calls"]:
|
|
45
|
+
tool_calls.append(
|
|
46
|
+
f"{tc.get('function', {}).get('name', '')}:{tc.get('function', {}).get('arguments', '')}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if len(tool_calls) >= max_same_calls:
|
|
50
|
+
# Check for repetition
|
|
51
|
+
if len(set(tool_calls[-max_same_calls:])) == 1:
|
|
52
|
+
return WatcherResult.nudge(
|
|
53
|
+
guidance=(
|
|
54
|
+
"You appear to be repeating the same action. "
|
|
55
|
+
"Consider a different approach or ask for clarification."
|
|
56
|
+
),
|
|
57
|
+
reason=f"Repeated tool call: {tool_calls[-1][:100]}",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Check for no session completions in a while
|
|
61
|
+
if ctx.step >= min_progress_steps:
|
|
62
|
+
completed = [
|
|
63
|
+
e for e in ctx.events
|
|
64
|
+
if e.get("kind") == "session_completed"
|
|
65
|
+
]
|
|
66
|
+
started = [
|
|
67
|
+
e for e in ctx.events
|
|
68
|
+
if e.get("kind") == "session_started"
|
|
69
|
+
]
|
|
70
|
+
if len(started) > 0 and len(completed) == 0:
|
|
71
|
+
return WatcherResult.nudge(
|
|
72
|
+
guidance=(
|
|
73
|
+
"Several sessions have been started but none completed. "
|
|
74
|
+
"Focus on completing current sessions before starting new ones."
|
|
75
|
+
),
|
|
76
|
+
reason="No session completions",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return WatcherResult.ok()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@register_watcher("budget")
|
|
83
|
+
class BudgetWatcher(Watcher):
|
|
84
|
+
"""
|
|
85
|
+
Watches resource budget (steps, sessions).
|
|
86
|
+
|
|
87
|
+
Warns when approaching limits.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
name = "budget"
|
|
91
|
+
description = "Monitors resource usage against limits"
|
|
92
|
+
|
|
93
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
94
|
+
config = self.config
|
|
95
|
+
warn_at_percent = config.get("warn_at_percent", 80)
|
|
96
|
+
max_sessions = config.get("max_sessions", 10)
|
|
97
|
+
|
|
98
|
+
# Check step budget
|
|
99
|
+
if ctx.max_steps > 0:
|
|
100
|
+
percent_used = (ctx.step / ctx.max_steps) * 100
|
|
101
|
+
if percent_used >= warn_at_percent:
|
|
102
|
+
remaining = ctx.max_steps - ctx.step
|
|
103
|
+
return WatcherResult.nudge(
|
|
104
|
+
guidance=(
|
|
105
|
+
f"You have {remaining} steps remaining out of {ctx.max_steps}. "
|
|
106
|
+
"Prioritize completing the most important parts of the task."
|
|
107
|
+
),
|
|
108
|
+
reason=f"Step budget {percent_used:.0f}% used",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Check session count
|
|
112
|
+
if len(ctx.sessions) >= max_sessions:
|
|
113
|
+
return WatcherResult.nudge(
|
|
114
|
+
guidance=(
|
|
115
|
+
f"You have {len(ctx.sessions)} active sessions. "
|
|
116
|
+
"Consider completing or closing existing sessions before starting new ones."
|
|
117
|
+
),
|
|
118
|
+
reason=f"Session limit reached ({max_sessions})",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return WatcherResult.ok()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@register_watcher("scope")
|
|
125
|
+
class ScopeWatcher(Watcher):
|
|
126
|
+
"""
|
|
127
|
+
Watches for scope creep.
|
|
128
|
+
|
|
129
|
+
Ensures the agent stays focused on the original task.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
name = "scope"
|
|
133
|
+
description = "Detects scope creep and keeps agent on task"
|
|
134
|
+
|
|
135
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
136
|
+
config = self.config
|
|
137
|
+
focus_keywords = config.get("focus_keywords", [])
|
|
138
|
+
avoid_keywords = config.get("avoid_keywords", [])
|
|
139
|
+
max_tangent_steps = config.get("max_tangent_steps", 3)
|
|
140
|
+
|
|
141
|
+
# Check last few messages for avoid keywords
|
|
142
|
+
if avoid_keywords:
|
|
143
|
+
recent_content = " ".join(
|
|
144
|
+
m.get("content", "") or ""
|
|
145
|
+
for m in ctx.messages[-max_tangent_steps * 2:]
|
|
146
|
+
).lower()
|
|
147
|
+
|
|
148
|
+
for keyword in avoid_keywords:
|
|
149
|
+
if keyword.lower() in recent_content:
|
|
150
|
+
return WatcherResult.nudge(
|
|
151
|
+
guidance=(
|
|
152
|
+
f"The task involves '{keyword}' which may be out of scope. "
|
|
153
|
+
f"Remember the original task: {ctx.task[:200]}"
|
|
154
|
+
),
|
|
155
|
+
reason=f"Detected avoid keyword: {keyword}",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return WatcherResult.ok()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@register_watcher("pattern")
|
|
162
|
+
class PatternWatcher(Watcher):
|
|
163
|
+
"""
|
|
164
|
+
Watches for specific patterns in output.
|
|
165
|
+
|
|
166
|
+
Configurable regex patterns that trigger nudges/alerts.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
name = "pattern"
|
|
170
|
+
description = "Watches for configurable patterns in output"
|
|
171
|
+
|
|
172
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
173
|
+
config = self.config
|
|
174
|
+
patterns = config.get("patterns", [])
|
|
175
|
+
|
|
176
|
+
# Each pattern is: {"regex": "...", "action": "nudge|pause|abort", "message": "..."}
|
|
177
|
+
for pattern_config in patterns:
|
|
178
|
+
regex = pattern_config.get("regex")
|
|
179
|
+
if not regex:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
compiled = re.compile(regex, re.IGNORECASE)
|
|
184
|
+
except re.error:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# Check recent messages
|
|
188
|
+
for msg in ctx.messages[-10:]:
|
|
189
|
+
content = msg.get("content", "") or ""
|
|
190
|
+
if compiled.search(content):
|
|
191
|
+
action = pattern_config.get("action", "nudge")
|
|
192
|
+
message = pattern_config.get("message", f"Pattern matched: {regex}")
|
|
193
|
+
|
|
194
|
+
if action == "abort":
|
|
195
|
+
return WatcherResult.abort(message)
|
|
196
|
+
elif action == "pause":
|
|
197
|
+
return WatcherResult.pause(message)
|
|
198
|
+
else:
|
|
199
|
+
return WatcherResult.nudge(guidance=message, reason=f"Pattern: {regex}")
|
|
200
|
+
|
|
201
|
+
return WatcherResult.ok()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@register_watcher("quality")
|
|
205
|
+
class QualityWatcher(Watcher):
|
|
206
|
+
"""
|
|
207
|
+
Watches for quality issues.
|
|
208
|
+
|
|
209
|
+
Detects:
|
|
210
|
+
- Missing tests when code is written
|
|
211
|
+
- Large file changes
|
|
212
|
+
- Missing error handling
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
name = "quality"
|
|
216
|
+
description = "Watches for quality issues in code changes"
|
|
217
|
+
|
|
218
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
219
|
+
config = self.config
|
|
220
|
+
require_tests = config.get("require_tests", True)
|
|
221
|
+
max_files_changed = config.get("max_files_changed", 10)
|
|
222
|
+
|
|
223
|
+
# Check for large changes
|
|
224
|
+
if len(ctx.files_changed) > max_files_changed:
|
|
225
|
+
return WatcherResult.nudge(
|
|
226
|
+
guidance=(
|
|
227
|
+
f"You've modified {len(ctx.files_changed)} files. "
|
|
228
|
+
"Consider breaking this into smaller, focused changes."
|
|
229
|
+
),
|
|
230
|
+
reason=f"Large change: {len(ctx.files_changed)} files",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Check for tests if code files are changed
|
|
234
|
+
if require_tests and ctx.files_changed:
|
|
235
|
+
code_files = [
|
|
236
|
+
f for f in ctx.files_changed
|
|
237
|
+
if f.endswith((".py", ".js", ".ts", ".go", ".rs"))
|
|
238
|
+
and not f.startswith("test_")
|
|
239
|
+
and not f.endswith("_test.py")
|
|
240
|
+
and "/test" not in f
|
|
241
|
+
]
|
|
242
|
+
test_files = [
|
|
243
|
+
f for f in ctx.files_changed
|
|
244
|
+
if "test" in f.lower()
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
if code_files and not test_files:
|
|
248
|
+
return WatcherResult.nudge(
|
|
249
|
+
guidance=(
|
|
250
|
+
"Code files were modified but no test files were added or updated. "
|
|
251
|
+
"Consider adding tests for the changes."
|
|
252
|
+
),
|
|
253
|
+
reason="Code without tests",
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return WatcherResult.ok()
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Watcher manager for running multiple watchers.
|
|
3
|
+
|
|
4
|
+
Handles:
|
|
5
|
+
- Running watchers in parallel
|
|
6
|
+
- Combining results by priority
|
|
7
|
+
- Injecting guidance into orchestrator
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from zwarm.watchers.base import Watcher, WatcherContext, WatcherResult, WatcherAction
|
|
17
|
+
from zwarm.watchers.registry import get_watcher
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class WatcherConfig:
|
|
22
|
+
"""Configuration for a watcher instance."""
|
|
23
|
+
|
|
24
|
+
name: str
|
|
25
|
+
enabled: bool = True
|
|
26
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class WatcherManager:
|
|
30
|
+
"""
|
|
31
|
+
Manages and runs multiple watchers.
|
|
32
|
+
|
|
33
|
+
Watchers run in parallel and results are combined by priority.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, watcher_configs: list[WatcherConfig | dict] | None = None):
|
|
37
|
+
"""
|
|
38
|
+
Initialize manager with watcher configurations.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
watcher_configs: List of WatcherConfig or dicts with watcher configs
|
|
42
|
+
"""
|
|
43
|
+
self._watchers: list[Watcher] = []
|
|
44
|
+
self._results_history: list[tuple[str, WatcherResult]] = []
|
|
45
|
+
|
|
46
|
+
# Load watchers from configs
|
|
47
|
+
for cfg in watcher_configs or []:
|
|
48
|
+
if isinstance(cfg, dict):
|
|
49
|
+
cfg = WatcherConfig(**cfg)
|
|
50
|
+
|
|
51
|
+
if cfg.enabled:
|
|
52
|
+
try:
|
|
53
|
+
watcher = get_watcher(cfg.name, cfg.config)
|
|
54
|
+
self._watchers.append(watcher)
|
|
55
|
+
except ValueError:
|
|
56
|
+
# Unknown watcher, skip
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def add_watcher(self, watcher: Watcher) -> None:
|
|
60
|
+
"""Add a watcher instance."""
|
|
61
|
+
self._watchers.append(watcher)
|
|
62
|
+
|
|
63
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
64
|
+
"""
|
|
65
|
+
Run all watchers and return combined result.
|
|
66
|
+
|
|
67
|
+
Results are combined by priority:
|
|
68
|
+
- ABORT takes precedence over everything
|
|
69
|
+
- PAUSE takes precedence over NUDGE
|
|
70
|
+
- NUDGE takes precedence over CONTINUE
|
|
71
|
+
- Within same action, higher priority wins
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
ctx: Context for watchers
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Combined WatcherResult
|
|
78
|
+
"""
|
|
79
|
+
if not self._watchers:
|
|
80
|
+
return WatcherResult.ok()
|
|
81
|
+
|
|
82
|
+
# Run all watchers in parallel
|
|
83
|
+
tasks = [watcher.observe(ctx) for watcher in self._watchers]
|
|
84
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
85
|
+
|
|
86
|
+
# Collect valid results with their watcher names
|
|
87
|
+
valid_results: list[tuple[str, WatcherResult]] = []
|
|
88
|
+
for watcher, result in zip(self._watchers, results):
|
|
89
|
+
if isinstance(result, Exception):
|
|
90
|
+
# Log and skip failed watchers
|
|
91
|
+
continue
|
|
92
|
+
if isinstance(result, WatcherResult):
|
|
93
|
+
valid_results.append((watcher.name, result))
|
|
94
|
+
self._results_history.append((watcher.name, result))
|
|
95
|
+
|
|
96
|
+
if not valid_results:
|
|
97
|
+
return WatcherResult.ok()
|
|
98
|
+
|
|
99
|
+
# Sort by action severity (abort > pause > nudge > continue) then priority
|
|
100
|
+
def sort_key(item: tuple[str, WatcherResult]) -> tuple[int, int]:
|
|
101
|
+
_, result = item
|
|
102
|
+
action_order = {
|
|
103
|
+
WatcherAction.ABORT: 0,
|
|
104
|
+
WatcherAction.PAUSE: 1,
|
|
105
|
+
WatcherAction.NUDGE: 2,
|
|
106
|
+
WatcherAction.CONTINUE: 3,
|
|
107
|
+
}
|
|
108
|
+
return (action_order[result.action], -result.priority)
|
|
109
|
+
|
|
110
|
+
valid_results.sort(key=sort_key)
|
|
111
|
+
|
|
112
|
+
# Return highest priority non-continue result
|
|
113
|
+
for name, result in valid_results:
|
|
114
|
+
if result.action != WatcherAction.CONTINUE:
|
|
115
|
+
# Add which watcher triggered this
|
|
116
|
+
result.metadata["triggered_by"] = name
|
|
117
|
+
return result
|
|
118
|
+
|
|
119
|
+
return WatcherResult.ok()
|
|
120
|
+
|
|
121
|
+
def get_history(self) -> list[tuple[str, WatcherResult]]:
|
|
122
|
+
"""Get history of all watcher results."""
|
|
123
|
+
return list(self._results_history)
|
|
124
|
+
|
|
125
|
+
def clear_history(self) -> None:
|
|
126
|
+
"""Clear results history."""
|
|
127
|
+
self._results_history.clear()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def build_watcher_manager(
|
|
131
|
+
config: dict[str, Any] | None = None
|
|
132
|
+
) -> WatcherManager:
|
|
133
|
+
"""
|
|
134
|
+
Build a WatcherManager from configuration.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
config: Dict with "watchers" key containing list of watcher configs
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Configured WatcherManager
|
|
141
|
+
"""
|
|
142
|
+
watcher_configs = (config or {}).get("watchers", [])
|
|
143
|
+
return WatcherManager(watcher_configs)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Watcher registry for discovering and instantiating watchers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Type
|
|
8
|
+
|
|
9
|
+
from zwarm.watchers.base import Watcher
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Global watcher registry
|
|
13
|
+
_WATCHERS: dict[str, Type[Watcher]] = {}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def register_watcher(name: str):
|
|
17
|
+
"""
|
|
18
|
+
Decorator to register a watcher class.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
@register_watcher("progress")
|
|
22
|
+
class ProgressWatcher(Watcher):
|
|
23
|
+
...
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def decorator(cls: Type[Watcher]) -> Type[Watcher]:
|
|
27
|
+
cls.name = name
|
|
28
|
+
_WATCHERS[name] = cls
|
|
29
|
+
return cls
|
|
30
|
+
|
|
31
|
+
return decorator
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_watcher(name: str, config: dict[str, Any] | None = None) -> Watcher:
|
|
35
|
+
"""
|
|
36
|
+
Get a watcher instance by name.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
name: Registered watcher name
|
|
40
|
+
config: Optional config to pass to watcher
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Instantiated watcher
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If watcher not found
|
|
47
|
+
"""
|
|
48
|
+
if name not in _WATCHERS:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Unknown watcher: {name}. Available: {list(_WATCHERS.keys())}"
|
|
51
|
+
)
|
|
52
|
+
return _WATCHERS[name](config)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def list_watchers() -> list[str]:
|
|
56
|
+
"""List all registered watcher names."""
|
|
57
|
+
return list(_WATCHERS.keys())
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Tests for the watcher system."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from zwarm.watchers import (
|
|
6
|
+
Watcher,
|
|
7
|
+
WatcherContext,
|
|
8
|
+
WatcherResult,
|
|
9
|
+
WatcherAction,
|
|
10
|
+
WatcherManager,
|
|
11
|
+
WatcherConfig,
|
|
12
|
+
get_watcher,
|
|
13
|
+
list_watchers,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestWatcherRegistry:
|
|
18
|
+
def test_list_watchers(self):
|
|
19
|
+
"""Built-in watchers should be registered."""
|
|
20
|
+
watchers = list_watchers()
|
|
21
|
+
assert "progress" in watchers
|
|
22
|
+
assert "budget" in watchers
|
|
23
|
+
assert "scope" in watchers
|
|
24
|
+
assert "pattern" in watchers
|
|
25
|
+
assert "quality" in watchers
|
|
26
|
+
|
|
27
|
+
def test_get_watcher(self):
|
|
28
|
+
"""Can get watcher by name."""
|
|
29
|
+
watcher = get_watcher("progress")
|
|
30
|
+
assert watcher.name == "progress"
|
|
31
|
+
|
|
32
|
+
def test_get_unknown_watcher(self):
|
|
33
|
+
"""Unknown watcher raises error."""
|
|
34
|
+
with pytest.raises(ValueError, match="Unknown watcher"):
|
|
35
|
+
get_watcher("nonexistent")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestProgressWatcher:
|
|
39
|
+
@pytest.mark.asyncio
|
|
40
|
+
async def test_continues_on_normal_progress(self):
|
|
41
|
+
"""Normal progress should continue."""
|
|
42
|
+
watcher = get_watcher("progress")
|
|
43
|
+
ctx = WatcherContext(
|
|
44
|
+
task="Test task",
|
|
45
|
+
step=2,
|
|
46
|
+
max_steps=10,
|
|
47
|
+
messages=[
|
|
48
|
+
{"role": "user", "content": "Start"},
|
|
49
|
+
{"role": "assistant", "content": "Working on it"},
|
|
50
|
+
],
|
|
51
|
+
)
|
|
52
|
+
result = await watcher.observe(ctx)
|
|
53
|
+
assert result.action == WatcherAction.CONTINUE
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestBudgetWatcher:
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_warns_at_budget_threshold(self):
|
|
59
|
+
"""Should warn when approaching step limit."""
|
|
60
|
+
watcher = get_watcher("budget", {"warn_at_percent": 80})
|
|
61
|
+
ctx = WatcherContext(
|
|
62
|
+
task="Test task",
|
|
63
|
+
step=9, # 90% of max
|
|
64
|
+
max_steps=10,
|
|
65
|
+
messages=[],
|
|
66
|
+
)
|
|
67
|
+
result = await watcher.observe(ctx)
|
|
68
|
+
assert result.action == WatcherAction.NUDGE
|
|
69
|
+
assert "remaining" in result.guidance.lower()
|
|
70
|
+
|
|
71
|
+
@pytest.mark.asyncio
|
|
72
|
+
async def test_continues_when_under_budget(self):
|
|
73
|
+
"""Should continue when well under budget."""
|
|
74
|
+
watcher = get_watcher("budget")
|
|
75
|
+
ctx = WatcherContext(
|
|
76
|
+
task="Test task",
|
|
77
|
+
step=2,
|
|
78
|
+
max_steps=10,
|
|
79
|
+
messages=[],
|
|
80
|
+
)
|
|
81
|
+
result = await watcher.observe(ctx)
|
|
82
|
+
assert result.action == WatcherAction.CONTINUE
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class TestPatternWatcher:
|
|
86
|
+
@pytest.mark.asyncio
|
|
87
|
+
async def test_detects_pattern(self):
|
|
88
|
+
"""Should detect configured patterns."""
|
|
89
|
+
watcher = get_watcher("pattern", {
|
|
90
|
+
"patterns": [
|
|
91
|
+
{"regex": r"ERROR", "action": "nudge", "message": "Error detected!"}
|
|
92
|
+
]
|
|
93
|
+
})
|
|
94
|
+
ctx = WatcherContext(
|
|
95
|
+
task="Test task",
|
|
96
|
+
step=1,
|
|
97
|
+
max_steps=10,
|
|
98
|
+
messages=[
|
|
99
|
+
{"role": "assistant", "content": "Got ERROR in the build"}
|
|
100
|
+
],
|
|
101
|
+
)
|
|
102
|
+
result = await watcher.observe(ctx)
|
|
103
|
+
assert result.action == WatcherAction.NUDGE
|
|
104
|
+
assert "Error detected" in result.guidance
|
|
105
|
+
|
|
106
|
+
@pytest.mark.asyncio
|
|
107
|
+
async def test_abort_pattern(self):
|
|
108
|
+
"""Should abort on critical patterns."""
|
|
109
|
+
watcher = get_watcher("pattern", {
|
|
110
|
+
"patterns": [
|
|
111
|
+
{"regex": r"rm -rf /", "action": "abort", "message": "Dangerous command!"}
|
|
112
|
+
]
|
|
113
|
+
})
|
|
114
|
+
ctx = WatcherContext(
|
|
115
|
+
task="Test task",
|
|
116
|
+
step=1,
|
|
117
|
+
max_steps=10,
|
|
118
|
+
messages=[
|
|
119
|
+
{"role": "assistant", "content": "Running rm -rf /"}
|
|
120
|
+
],
|
|
121
|
+
)
|
|
122
|
+
result = await watcher.observe(ctx)
|
|
123
|
+
assert result.action == WatcherAction.ABORT
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class TestWatcherManager:
|
|
127
|
+
@pytest.mark.asyncio
|
|
128
|
+
async def test_runs_multiple_watchers(self):
|
|
129
|
+
"""Manager runs all watchers."""
|
|
130
|
+
manager = WatcherManager([
|
|
131
|
+
WatcherConfig(name="progress"),
|
|
132
|
+
WatcherConfig(name="budget"),
|
|
133
|
+
])
|
|
134
|
+
ctx = WatcherContext(
|
|
135
|
+
task="Test task",
|
|
136
|
+
step=2,
|
|
137
|
+
max_steps=10,
|
|
138
|
+
messages=[],
|
|
139
|
+
)
|
|
140
|
+
result = await manager.observe(ctx)
|
|
141
|
+
assert isinstance(result, WatcherResult)
|
|
142
|
+
|
|
143
|
+
@pytest.mark.asyncio
|
|
144
|
+
async def test_highest_priority_wins(self):
|
|
145
|
+
"""Most severe action should win."""
|
|
146
|
+
manager = WatcherManager([
|
|
147
|
+
WatcherConfig(name="budget", config={"warn_at_percent": 50}), # Will nudge
|
|
148
|
+
WatcherConfig(name="pattern", config={
|
|
149
|
+
"patterns": [{"regex": "ABORT", "action": "abort", "message": "Abort!"}]
|
|
150
|
+
}),
|
|
151
|
+
])
|
|
152
|
+
ctx = WatcherContext(
|
|
153
|
+
task="Test task",
|
|
154
|
+
step=6, # 60% - triggers budget nudge
|
|
155
|
+
max_steps=10,
|
|
156
|
+
messages=[
|
|
157
|
+
{"role": "assistant", "content": "Must ABORT now"}
|
|
158
|
+
],
|
|
159
|
+
)
|
|
160
|
+
result = await manager.observe(ctx)
|
|
161
|
+
# Abort should take precedence over nudge
|
|
162
|
+
assert result.action == WatcherAction.ABORT
|
|
163
|
+
|
|
164
|
+
@pytest.mark.asyncio
|
|
165
|
+
async def test_empty_manager_continues(self):
|
|
166
|
+
"""Manager with no watchers should continue."""
|
|
167
|
+
manager = WatcherManager([])
|
|
168
|
+
ctx = WatcherContext(
|
|
169
|
+
task="Test task",
|
|
170
|
+
step=1,
|
|
171
|
+
max_steps=10,
|
|
172
|
+
messages=[],
|
|
173
|
+
)
|
|
174
|
+
result = await manager.observe(ctx)
|
|
175
|
+
assert result.action == WatcherAction.CONTINUE
|
|
176
|
+
|
|
177
|
+
@pytest.mark.asyncio
|
|
178
|
+
async def test_disabled_watcher_skipped(self):
|
|
179
|
+
"""Disabled watchers should be skipped."""
|
|
180
|
+
manager = WatcherManager([
|
|
181
|
+
WatcherConfig(name="pattern", enabled=False, config={
|
|
182
|
+
"patterns": [{"regex": ".*", "action": "abort", "message": "Always abort"}]
|
|
183
|
+
}),
|
|
184
|
+
])
|
|
185
|
+
ctx = WatcherContext(
|
|
186
|
+
task="Test task",
|
|
187
|
+
step=1,
|
|
188
|
+
max_steps=10,
|
|
189
|
+
messages=[
|
|
190
|
+
{"role": "assistant", "content": "This would normally trigger abort"}
|
|
191
|
+
],
|
|
192
|
+
)
|
|
193
|
+
result = await manager.observe(ctx)
|
|
194
|
+
# Since the pattern watcher is disabled, should continue
|
|
195
|
+
assert result.action == WatcherAction.CONTINUE
|