zwarm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,256 @@
1
+ """
2
+ Built-in watchers for common trajectory alignment needs.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import re
8
+ from typing import Any
9
+
10
+ from zwarm.watchers.base import Watcher, WatcherContext, WatcherResult, WatcherAction
11
+ from zwarm.watchers.registry import register_watcher
12
+
13
+
14
+ @register_watcher("progress")
15
+ class ProgressWatcher(Watcher):
16
+ """
17
+ Watches for lack of progress.
18
+
19
+ Detects when the agent appears stuck:
20
+ - Repeating same tool calls
21
+ - Not making session progress
22
+ - Spinning without completing tasks
23
+ """
24
+
25
+ name = "progress"
26
+ description = "Detects when agent is stuck or spinning"
27
+
28
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
29
+ config = self.config
30
+ max_same_calls = config.get("max_same_calls", 3)
31
+ min_progress_steps = config.get("min_progress_steps", 5)
32
+
33
+ # Check for repeated tool calls
34
+ if len(ctx.messages) >= max_same_calls * 2:
35
+ recent_assistant = [
36
+ m for m in ctx.messages[-max_same_calls * 2 :]
37
+ if m.get("role") == "assistant"
38
+ ]
39
+ if len(recent_assistant) >= max_same_calls:
40
+ # Check if tool calls are repeating
41
+ tool_calls = []
42
+ for msg in recent_assistant:
43
+ if "tool_calls" in msg:
44
+ for tc in msg["tool_calls"]:
45
+ tool_calls.append(
46
+ f"{tc.get('function', {}).get('name', '')}:{tc.get('function', {}).get('arguments', '')}"
47
+ )
48
+
49
+ if len(tool_calls) >= max_same_calls:
50
+ # Check for repetition
51
+ if len(set(tool_calls[-max_same_calls:])) == 1:
52
+ return WatcherResult.nudge(
53
+ guidance=(
54
+ "You appear to be repeating the same action. "
55
+ "Consider a different approach or ask for clarification."
56
+ ),
57
+ reason=f"Repeated tool call: {tool_calls[-1][:100]}",
58
+ )
59
+
60
+ # Check for no session completions in a while
61
+ if ctx.step >= min_progress_steps:
62
+ completed = [
63
+ e for e in ctx.events
64
+ if e.get("kind") == "session_completed"
65
+ ]
66
+ started = [
67
+ e for e in ctx.events
68
+ if e.get("kind") == "session_started"
69
+ ]
70
+ if len(started) > 0 and len(completed) == 0:
71
+ return WatcherResult.nudge(
72
+ guidance=(
73
+ "Several sessions have been started but none completed. "
74
+ "Focus on completing current sessions before starting new ones."
75
+ ),
76
+ reason="No session completions",
77
+ )
78
+
79
+ return WatcherResult.ok()
80
+
81
+
82
+ @register_watcher("budget")
83
+ class BudgetWatcher(Watcher):
84
+ """
85
+ Watches resource budget (steps, sessions).
86
+
87
+ Warns when approaching limits.
88
+ """
89
+
90
+ name = "budget"
91
+ description = "Monitors resource usage against limits"
92
+
93
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
94
+ config = self.config
95
+ warn_at_percent = config.get("warn_at_percent", 80)
96
+ max_sessions = config.get("max_sessions", 10)
97
+
98
+ # Check step budget
99
+ if ctx.max_steps > 0:
100
+ percent_used = (ctx.step / ctx.max_steps) * 100
101
+ if percent_used >= warn_at_percent:
102
+ remaining = ctx.max_steps - ctx.step
103
+ return WatcherResult.nudge(
104
+ guidance=(
105
+ f"You have {remaining} steps remaining out of {ctx.max_steps}. "
106
+ "Prioritize completing the most important parts of the task."
107
+ ),
108
+ reason=f"Step budget {percent_used:.0f}% used",
109
+ )
110
+
111
+ # Check session count
112
+ if len(ctx.sessions) >= max_sessions:
113
+ return WatcherResult.nudge(
114
+ guidance=(
115
+ f"You have {len(ctx.sessions)} active sessions. "
116
+ "Consider completing or closing existing sessions before starting new ones."
117
+ ),
118
+ reason=f"Session limit reached ({max_sessions})",
119
+ )
120
+
121
+ return WatcherResult.ok()
122
+
123
+
124
+ @register_watcher("scope")
125
+ class ScopeWatcher(Watcher):
126
+ """
127
+ Watches for scope creep.
128
+
129
+ Ensures the agent stays focused on the original task.
130
+ """
131
+
132
+ name = "scope"
133
+ description = "Detects scope creep and keeps agent on task"
134
+
135
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
136
+ config = self.config
137
+ focus_keywords = config.get("focus_keywords", [])
138
+ avoid_keywords = config.get("avoid_keywords", [])
139
+ max_tangent_steps = config.get("max_tangent_steps", 3)
140
+
141
+ # Check last few messages for avoid keywords
142
+ if avoid_keywords:
143
+ recent_content = " ".join(
144
+ m.get("content", "") or ""
145
+ for m in ctx.messages[-max_tangent_steps * 2:]
146
+ ).lower()
147
+
148
+ for keyword in avoid_keywords:
149
+ if keyword.lower() in recent_content:
150
+ return WatcherResult.nudge(
151
+ guidance=(
152
+ f"The task involves '{keyword}' which may be out of scope. "
153
+ f"Remember the original task: {ctx.task[:200]}"
154
+ ),
155
+ reason=f"Detected avoid keyword: {keyword}",
156
+ )
157
+
158
+ return WatcherResult.ok()
159
+
160
+
161
+ @register_watcher("pattern")
162
+ class PatternWatcher(Watcher):
163
+ """
164
+ Watches for specific patterns in output.
165
+
166
+ Configurable regex patterns that trigger nudges/alerts.
167
+ """
168
+
169
+ name = "pattern"
170
+ description = "Watches for configurable patterns in output"
171
+
172
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
173
+ config = self.config
174
+ patterns = config.get("patterns", [])
175
+
176
+ # Each pattern is: {"regex": "...", "action": "nudge|pause|abort", "message": "..."}
177
+ for pattern_config in patterns:
178
+ regex = pattern_config.get("regex")
179
+ if not regex:
180
+ continue
181
+
182
+ try:
183
+ compiled = re.compile(regex, re.IGNORECASE)
184
+ except re.error:
185
+ continue
186
+
187
+ # Check recent messages
188
+ for msg in ctx.messages[-10:]:
189
+ content = msg.get("content", "") or ""
190
+ if compiled.search(content):
191
+ action = pattern_config.get("action", "nudge")
192
+ message = pattern_config.get("message", f"Pattern matched: {regex}")
193
+
194
+ if action == "abort":
195
+ return WatcherResult.abort(message)
196
+ elif action == "pause":
197
+ return WatcherResult.pause(message)
198
+ else:
199
+ return WatcherResult.nudge(guidance=message, reason=f"Pattern: {regex}")
200
+
201
+ return WatcherResult.ok()
202
+
203
+
204
+ @register_watcher("quality")
205
+ class QualityWatcher(Watcher):
206
+ """
207
+ Watches for quality issues.
208
+
209
+ Detects:
210
+ - Missing tests when code is written
211
+ - Large file changes
212
+ - Missing error handling
213
+ """
214
+
215
+ name = "quality"
216
+ description = "Watches for quality issues in code changes"
217
+
218
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
219
+ config = self.config
220
+ require_tests = config.get("require_tests", True)
221
+ max_files_changed = config.get("max_files_changed", 10)
222
+
223
+ # Check for large changes
224
+ if len(ctx.files_changed) > max_files_changed:
225
+ return WatcherResult.nudge(
226
+ guidance=(
227
+ f"You've modified {len(ctx.files_changed)} files. "
228
+ "Consider breaking this into smaller, focused changes."
229
+ ),
230
+ reason=f"Large change: {len(ctx.files_changed)} files",
231
+ )
232
+
233
+ # Check for tests if code files are changed
234
+ if require_tests and ctx.files_changed:
235
+ code_files = [
236
+ f for f in ctx.files_changed
237
+ if f.endswith((".py", ".js", ".ts", ".go", ".rs"))
238
+ and not f.startswith("test_")
239
+ and not f.endswith("_test.py")
240
+ and "/test" not in f
241
+ ]
242
+ test_files = [
243
+ f for f in ctx.files_changed
244
+ if "test" in f.lower()
245
+ ]
246
+
247
+ if code_files and not test_files:
248
+ return WatcherResult.nudge(
249
+ guidance=(
250
+ "Code files were modified but no test files were added or updated. "
251
+ "Consider adding tests for the changes."
252
+ ),
253
+ reason="Code without tests",
254
+ )
255
+
256
+ return WatcherResult.ok()
@@ -0,0 +1,143 @@
1
+ """
2
+ Watcher manager for running multiple watchers.
3
+
4
+ Handles:
5
+ - Running watchers in parallel
6
+ - Combining results by priority
7
+ - Injecting guidance into orchestrator
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ from dataclasses import dataclass, field
14
+ from typing import Any
15
+
16
+ from zwarm.watchers.base import Watcher, WatcherContext, WatcherResult, WatcherAction
17
+ from zwarm.watchers.registry import get_watcher
18
+
19
+
20
+ @dataclass
21
+ class WatcherConfig:
22
+ """Configuration for a watcher instance."""
23
+
24
+ name: str
25
+ enabled: bool = True
26
+ config: dict[str, Any] = field(default_factory=dict)
27
+
28
+
29
+ class WatcherManager:
30
+ """
31
+ Manages and runs multiple watchers.
32
+
33
+ Watchers run in parallel and results are combined by priority.
34
+ """
35
+
36
+ def __init__(self, watcher_configs: list[WatcherConfig | dict] | None = None):
37
+ """
38
+ Initialize manager with watcher configurations.
39
+
40
+ Args:
41
+ watcher_configs: List of WatcherConfig or dicts with watcher configs
42
+ """
43
+ self._watchers: list[Watcher] = []
44
+ self._results_history: list[tuple[str, WatcherResult]] = []
45
+
46
+ # Load watchers from configs
47
+ for cfg in watcher_configs or []:
48
+ if isinstance(cfg, dict):
49
+ cfg = WatcherConfig(**cfg)
50
+
51
+ if cfg.enabled:
52
+ try:
53
+ watcher = get_watcher(cfg.name, cfg.config)
54
+ self._watchers.append(watcher)
55
+ except ValueError:
56
+ # Unknown watcher, skip
57
+ pass
58
+
59
+ def add_watcher(self, watcher: Watcher) -> None:
60
+ """Add a watcher instance."""
61
+ self._watchers.append(watcher)
62
+
63
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
64
+ """
65
+ Run all watchers and return combined result.
66
+
67
+ Results are combined by priority:
68
+ - ABORT takes precedence over everything
69
+ - PAUSE takes precedence over NUDGE
70
+ - NUDGE takes precedence over CONTINUE
71
+ - Within same action, higher priority wins
72
+
73
+ Args:
74
+ ctx: Context for watchers
75
+
76
+ Returns:
77
+ Combined WatcherResult
78
+ """
79
+ if not self._watchers:
80
+ return WatcherResult.ok()
81
+
82
+ # Run all watchers in parallel
83
+ tasks = [watcher.observe(ctx) for watcher in self._watchers]
84
+ results = await asyncio.gather(*tasks, return_exceptions=True)
85
+
86
+ # Collect valid results with their watcher names
87
+ valid_results: list[tuple[str, WatcherResult]] = []
88
+ for watcher, result in zip(self._watchers, results):
89
+ if isinstance(result, Exception):
90
+ # Log and skip failed watchers
91
+ continue
92
+ if isinstance(result, WatcherResult):
93
+ valid_results.append((watcher.name, result))
94
+ self._results_history.append((watcher.name, result))
95
+
96
+ if not valid_results:
97
+ return WatcherResult.ok()
98
+
99
+ # Sort by action severity (abort > pause > nudge > continue) then priority
100
+ def sort_key(item: tuple[str, WatcherResult]) -> tuple[int, int]:
101
+ _, result = item
102
+ action_order = {
103
+ WatcherAction.ABORT: 0,
104
+ WatcherAction.PAUSE: 1,
105
+ WatcherAction.NUDGE: 2,
106
+ WatcherAction.CONTINUE: 3,
107
+ }
108
+ return (action_order[result.action], -result.priority)
109
+
110
+ valid_results.sort(key=sort_key)
111
+
112
+ # Return highest priority non-continue result
113
+ for name, result in valid_results:
114
+ if result.action != WatcherAction.CONTINUE:
115
+ # Add which watcher triggered this
116
+ result.metadata["triggered_by"] = name
117
+ return result
118
+
119
+ return WatcherResult.ok()
120
+
121
+ def get_history(self) -> list[tuple[str, WatcherResult]]:
122
+ """Get history of all watcher results."""
123
+ return list(self._results_history)
124
+
125
+ def clear_history(self) -> None:
126
+ """Clear results history."""
127
+ self._results_history.clear()
128
+
129
+
130
+ def build_watcher_manager(
131
+ config: dict[str, Any] | None = None
132
+ ) -> WatcherManager:
133
+ """
134
+ Build a WatcherManager from configuration.
135
+
136
+ Args:
137
+ config: Dict with "watchers" key containing list of watcher configs
138
+
139
+ Returns:
140
+ Configured WatcherManager
141
+ """
142
+ watcher_configs = (config or {}).get("watchers", [])
143
+ return WatcherManager(watcher_configs)
@@ -0,0 +1,57 @@
1
+ """
2
+ Watcher registry for discovering and instantiating watchers.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Type
8
+
9
+ from zwarm.watchers.base import Watcher
10
+
11
+
12
+ # Global watcher registry
13
+ _WATCHERS: dict[str, Type[Watcher]] = {}
14
+
15
+
16
+ def register_watcher(name: str):
17
+ """
18
+ Decorator to register a watcher class.
19
+
20
+ Example:
21
+ @register_watcher("progress")
22
+ class ProgressWatcher(Watcher):
23
+ ...
24
+ """
25
+
26
+ def decorator(cls: Type[Watcher]) -> Type[Watcher]:
27
+ cls.name = name
28
+ _WATCHERS[name] = cls
29
+ return cls
30
+
31
+ return decorator
32
+
33
+
34
+ def get_watcher(name: str, config: dict[str, Any] | None = None) -> Watcher:
35
+ """
36
+ Get a watcher instance by name.
37
+
38
+ Args:
39
+ name: Registered watcher name
40
+ config: Optional config to pass to watcher
41
+
42
+ Returns:
43
+ Instantiated watcher
44
+
45
+ Raises:
46
+ ValueError: If watcher not found
47
+ """
48
+ if name not in _WATCHERS:
49
+ raise ValueError(
50
+ f"Unknown watcher: {name}. Available: {list(_WATCHERS.keys())}"
51
+ )
52
+ return _WATCHERS[name](config)
53
+
54
+
55
+ def list_watchers() -> list[str]:
56
+ """List all registered watcher names."""
57
+ return list(_WATCHERS.keys())
@@ -0,0 +1,195 @@
1
+ """Tests for the watcher system."""
2
+
3
+ import pytest
4
+
5
+ from zwarm.watchers import (
6
+ Watcher,
7
+ WatcherContext,
8
+ WatcherResult,
9
+ WatcherAction,
10
+ WatcherManager,
11
+ WatcherConfig,
12
+ get_watcher,
13
+ list_watchers,
14
+ )
15
+
16
+
17
+ class TestWatcherRegistry:
18
+ def test_list_watchers(self):
19
+ """Built-in watchers should be registered."""
20
+ watchers = list_watchers()
21
+ assert "progress" in watchers
22
+ assert "budget" in watchers
23
+ assert "scope" in watchers
24
+ assert "pattern" in watchers
25
+ assert "quality" in watchers
26
+
27
+ def test_get_watcher(self):
28
+ """Can get watcher by name."""
29
+ watcher = get_watcher("progress")
30
+ assert watcher.name == "progress"
31
+
32
+ def test_get_unknown_watcher(self):
33
+ """Unknown watcher raises error."""
34
+ with pytest.raises(ValueError, match="Unknown watcher"):
35
+ get_watcher("nonexistent")
36
+
37
+
38
+ class TestProgressWatcher:
39
+ @pytest.mark.asyncio
40
+ async def test_continues_on_normal_progress(self):
41
+ """Normal progress should continue."""
42
+ watcher = get_watcher("progress")
43
+ ctx = WatcherContext(
44
+ task="Test task",
45
+ step=2,
46
+ max_steps=10,
47
+ messages=[
48
+ {"role": "user", "content": "Start"},
49
+ {"role": "assistant", "content": "Working on it"},
50
+ ],
51
+ )
52
+ result = await watcher.observe(ctx)
53
+ assert result.action == WatcherAction.CONTINUE
54
+
55
+
56
+ class TestBudgetWatcher:
57
+ @pytest.mark.asyncio
58
+ async def test_warns_at_budget_threshold(self):
59
+ """Should warn when approaching step limit."""
60
+ watcher = get_watcher("budget", {"warn_at_percent": 80})
61
+ ctx = WatcherContext(
62
+ task="Test task",
63
+ step=9, # 90% of max
64
+ max_steps=10,
65
+ messages=[],
66
+ )
67
+ result = await watcher.observe(ctx)
68
+ assert result.action == WatcherAction.NUDGE
69
+ assert "remaining" in result.guidance.lower()
70
+
71
+ @pytest.mark.asyncio
72
+ async def test_continues_when_under_budget(self):
73
+ """Should continue when well under budget."""
74
+ watcher = get_watcher("budget")
75
+ ctx = WatcherContext(
76
+ task="Test task",
77
+ step=2,
78
+ max_steps=10,
79
+ messages=[],
80
+ )
81
+ result = await watcher.observe(ctx)
82
+ assert result.action == WatcherAction.CONTINUE
83
+
84
+
85
+ class TestPatternWatcher:
86
+ @pytest.mark.asyncio
87
+ async def test_detects_pattern(self):
88
+ """Should detect configured patterns."""
89
+ watcher = get_watcher("pattern", {
90
+ "patterns": [
91
+ {"regex": r"ERROR", "action": "nudge", "message": "Error detected!"}
92
+ ]
93
+ })
94
+ ctx = WatcherContext(
95
+ task="Test task",
96
+ step=1,
97
+ max_steps=10,
98
+ messages=[
99
+ {"role": "assistant", "content": "Got ERROR in the build"}
100
+ ],
101
+ )
102
+ result = await watcher.observe(ctx)
103
+ assert result.action == WatcherAction.NUDGE
104
+ assert "Error detected" in result.guidance
105
+
106
+ @pytest.mark.asyncio
107
+ async def test_abort_pattern(self):
108
+ """Should abort on critical patterns."""
109
+ watcher = get_watcher("pattern", {
110
+ "patterns": [
111
+ {"regex": r"rm -rf /", "action": "abort", "message": "Dangerous command!"}
112
+ ]
113
+ })
114
+ ctx = WatcherContext(
115
+ task="Test task",
116
+ step=1,
117
+ max_steps=10,
118
+ messages=[
119
+ {"role": "assistant", "content": "Running rm -rf /"}
120
+ ],
121
+ )
122
+ result = await watcher.observe(ctx)
123
+ assert result.action == WatcherAction.ABORT
124
+
125
+
126
+ class TestWatcherManager:
127
+ @pytest.mark.asyncio
128
+ async def test_runs_multiple_watchers(self):
129
+ """Manager runs all watchers."""
130
+ manager = WatcherManager([
131
+ WatcherConfig(name="progress"),
132
+ WatcherConfig(name="budget"),
133
+ ])
134
+ ctx = WatcherContext(
135
+ task="Test task",
136
+ step=2,
137
+ max_steps=10,
138
+ messages=[],
139
+ )
140
+ result = await manager.observe(ctx)
141
+ assert isinstance(result, WatcherResult)
142
+
143
+ @pytest.mark.asyncio
144
+ async def test_highest_priority_wins(self):
145
+ """Most severe action should win."""
146
+ manager = WatcherManager([
147
+ WatcherConfig(name="budget", config={"warn_at_percent": 50}), # Will nudge
148
+ WatcherConfig(name="pattern", config={
149
+ "patterns": [{"regex": "ABORT", "action": "abort", "message": "Abort!"}]
150
+ }),
151
+ ])
152
+ ctx = WatcherContext(
153
+ task="Test task",
154
+ step=6, # 60% - triggers budget nudge
155
+ max_steps=10,
156
+ messages=[
157
+ {"role": "assistant", "content": "Must ABORT now"}
158
+ ],
159
+ )
160
+ result = await manager.observe(ctx)
161
+ # Abort should take precedence over nudge
162
+ assert result.action == WatcherAction.ABORT
163
+
164
+ @pytest.mark.asyncio
165
+ async def test_empty_manager_continues(self):
166
+ """Manager with no watchers should continue."""
167
+ manager = WatcherManager([])
168
+ ctx = WatcherContext(
169
+ task="Test task",
170
+ step=1,
171
+ max_steps=10,
172
+ messages=[],
173
+ )
174
+ result = await manager.observe(ctx)
175
+ assert result.action == WatcherAction.CONTINUE
176
+
177
+ @pytest.mark.asyncio
178
+ async def test_disabled_watcher_skipped(self):
179
+ """Disabled watchers should be skipped."""
180
+ manager = WatcherManager([
181
+ WatcherConfig(name="pattern", enabled=False, config={
182
+ "patterns": [{"regex": ".*", "action": "abort", "message": "Always abort"}]
183
+ }),
184
+ ])
185
+ ctx = WatcherContext(
186
+ task="Test task",
187
+ step=1,
188
+ max_steps=10,
189
+ messages=[
190
+ {"role": "assistant", "content": "This would normally trigger abort"}
191
+ ],
192
+ )
193
+ result = await manager.observe(ctx)
194
+ # Since the pattern watcher is disabled, should continue
195
+ assert result.action == WatcherAction.CONTINUE