zwarm 1.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/watchers/base.py ADDED
@@ -0,0 +1,131 @@
1
+ """
2
+ Base watcher interface and types.
3
+
4
+ Watchers observe agent trajectories and can intervene to correct course.
5
+ They're designed to be:
6
+ - Composable: Layer multiple watchers for different concerns
7
+ - Non-blocking: Check asynchronously, don't slow down the agent
8
+ - Actionable: Return clear guidance when correction is needed
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from abc import ABC, abstractmethod
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+ from typing import Any
17
+
18
+
19
+ class WatcherAction(str, Enum):
20
+ """What action to take based on watcher observation."""
21
+
22
+ CONTINUE = "continue" # Keep going, trajectory looks good
23
+ NUDGE = "nudge" # Insert guidance into next prompt
24
+ PAUSE = "pause" # Pause for human review
25
+ ABORT = "abort" # Stop execution immediately
26
+
27
+
28
+ @dataclass
29
+ class WatcherContext:
30
+ """
31
+ Context provided to watchers for observation.
32
+
33
+ Contains everything a watcher might need to evaluate trajectory.
34
+ """
35
+
36
+ # Current orchestrator state
37
+ task: str # Original task
38
+ step: int # Current step number
39
+ max_steps: int # Maximum steps allowed
40
+ messages: list[dict[str, Any]] # Conversation history
41
+
42
+ # Session activity
43
+ sessions: list[dict[str, Any]] = field(default_factory=list)
44
+ events: list[dict[str, Any]] = field(default_factory=list)
45
+
46
+ # Working directory context
47
+ working_dir: str | None = None
48
+ files_changed: list[str] = field(default_factory=list)
49
+
50
+ # Custom metadata
51
+ metadata: dict[str, Any] = field(default_factory=dict)
52
+
53
+
54
+ @dataclass
55
+ class WatcherResult:
56
+ """
57
+ Result from a watcher observation.
58
+
59
+ Contains the recommended action and any guidance to inject.
60
+ """
61
+
62
+ action: WatcherAction = WatcherAction.CONTINUE
63
+ reason: str = "" # Why this action was recommended
64
+ guidance: str = "" # Message to inject if action is NUDGE
65
+ priority: int = 0 # Higher priority watchers take precedence
66
+ metadata: dict[str, Any] = field(default_factory=dict)
67
+
68
+ @staticmethod
69
+ def ok() -> "WatcherResult":
70
+ """Trajectory looks good, continue."""
71
+ return WatcherResult(action=WatcherAction.CONTINUE)
72
+
73
+ @staticmethod
74
+ def nudge(guidance: str, reason: str = "", priority: int = 0) -> "WatcherResult":
75
+ """Insert guidance to correct trajectory."""
76
+ return WatcherResult(
77
+ action=WatcherAction.NUDGE,
78
+ guidance=guidance,
79
+ reason=reason,
80
+ priority=priority,
81
+ )
82
+
83
+ @staticmethod
84
+ def pause(reason: str, priority: int = 0) -> "WatcherResult":
85
+ """Pause for human review."""
86
+ return WatcherResult(
87
+ action=WatcherAction.PAUSE,
88
+ reason=reason,
89
+ priority=priority,
90
+ )
91
+
92
+ @staticmethod
93
+ def abort(reason: str, priority: int = 100) -> "WatcherResult":
94
+ """Stop execution immediately."""
95
+ return WatcherResult(
96
+ action=WatcherAction.ABORT,
97
+ reason=reason,
98
+ priority=priority,
99
+ )
100
+
101
+
102
+ class Watcher(ABC):
103
+ """
104
+ Base class for watchers.
105
+
106
+ Watchers observe agent trajectories and provide guidance when needed.
107
+ They're designed to be stateless - all context comes from WatcherContext.
108
+ """
109
+
110
+ name: str = "base"
111
+ description: str = ""
112
+
113
+ def __init__(self, config: dict[str, Any] | None = None):
114
+ """Initialize watcher with optional config."""
115
+ self.config = config or {}
116
+
117
+ @abstractmethod
118
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
119
+ """
120
+ Observe the current trajectory and decide action.
121
+
122
+ Args:
123
+ ctx: Current context with all trajectory info
124
+
125
+ Returns:
126
+ WatcherResult with recommended action
127
+ """
128
+ ...
129
+
130
+ def __repr__(self) -> str:
131
+ return f"<{self.__class__.__name__}({self.name})>"
@@ -0,0 +1,424 @@
1
+ """
2
+ Built-in watchers for common trajectory alignment needs.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import re
8
+ from typing import Any
9
+
10
+ from zwarm.watchers.base import Watcher, WatcherContext, WatcherResult, WatcherAction
11
+ from zwarm.watchers.registry import register_watcher
12
+
13
+
14
+ @register_watcher("progress")
15
+ class ProgressWatcher(Watcher):
16
+ """
17
+ Watches for lack of progress.
18
+
19
+ Detects when the agent appears stuck:
20
+ - Repeating same tool calls
21
+ - Not making session progress
22
+ - Spinning without completing tasks
23
+ """
24
+
25
+ name = "progress"
26
+ description = "Detects when agent is stuck or spinning"
27
+
28
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
29
+ config = self.config
30
+ max_same_calls = config.get("max_same_calls", 3)
31
+ min_progress_steps = config.get("min_progress_steps", 5)
32
+
33
+ # Check for repeated tool calls
34
+ if len(ctx.messages) >= max_same_calls * 2:
35
+ recent_assistant = [
36
+ m for m in ctx.messages[-max_same_calls * 2 :]
37
+ if m.get("role") == "assistant"
38
+ ]
39
+ if len(recent_assistant) >= max_same_calls:
40
+ # Check if tool calls are repeating
41
+ tool_calls = []
42
+ for msg in recent_assistant:
43
+ if "tool_calls" in msg:
44
+ for tc in msg["tool_calls"]:
45
+ tool_calls.append(
46
+ f"{tc.get('function', {}).get('name', '')}:{tc.get('function', {}).get('arguments', '')}"
47
+ )
48
+
49
+ if len(tool_calls) >= max_same_calls:
50
+ # Check for repetition
51
+ if len(set(tool_calls[-max_same_calls:])) == 1:
52
+ return WatcherResult.nudge(
53
+ guidance=(
54
+ "You appear to be repeating the same action. "
55
+ "Consider a different approach or ask for clarification."
56
+ ),
57
+ reason=f"Repeated tool call: {tool_calls[-1][:100]}",
58
+ )
59
+
60
+ # Check for no session completions in a while
61
+ if ctx.step >= min_progress_steps:
62
+ completed = [
63
+ e for e in ctx.events
64
+ if e.get("kind") == "session_completed"
65
+ ]
66
+ started = [
67
+ e for e in ctx.events
68
+ if e.get("kind") == "session_started"
69
+ ]
70
+ if len(started) > 0 and len(completed) == 0:
71
+ return WatcherResult.nudge(
72
+ guidance=(
73
+ "Several sessions have been started but none completed. "
74
+ "Focus on completing current sessions before starting new ones."
75
+ ),
76
+ reason="No session completions",
77
+ )
78
+
79
+ return WatcherResult.ok()
80
+
81
+
82
+ @register_watcher("budget")
83
+ class BudgetWatcher(Watcher):
84
+ """
85
+ Watches resource budget (steps, sessions).
86
+
87
+ Warns when approaching limits.
88
+ """
89
+
90
+ name = "budget"
91
+ description = "Monitors resource usage against limits"
92
+
93
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
94
+ config = self.config
95
+ warn_at_percent = config.get("warn_at_percent", 80)
96
+ max_sessions = config.get("max_sessions", 10)
97
+
98
+ # Check step budget
99
+ if ctx.max_steps > 0:
100
+ percent_used = (ctx.step / ctx.max_steps) * 100
101
+ if percent_used >= warn_at_percent:
102
+ remaining = ctx.max_steps - ctx.step
103
+ return WatcherResult.nudge(
104
+ guidance=(
105
+ f"You have {remaining} steps remaining out of {ctx.max_steps}. "
106
+ "Prioritize completing the most important parts of the task."
107
+ ),
108
+ reason=f"Step budget {percent_used:.0f}% used",
109
+ )
110
+
111
+ # Check session count (only count active sessions, not completed/failed)
112
+ active_sessions = [
113
+ s for s in ctx.sessions
114
+ if s.get("status") == "active"
115
+ ]
116
+ if len(active_sessions) >= max_sessions:
117
+ return WatcherResult.nudge(
118
+ guidance=(
119
+ f"You have {len(active_sessions)} active sessions. "
120
+ "Consider completing or closing existing sessions before starting new ones."
121
+ ),
122
+ reason=f"Active session limit reached ({len(active_sessions)}/{max_sessions})",
123
+ )
124
+
125
+ return WatcherResult.ok()
126
+
127
+
128
+ @register_watcher("scope")
129
+ class ScopeWatcher(Watcher):
130
+ """
131
+ Watches for scope creep.
132
+
133
+ Ensures the agent stays focused on the original task.
134
+ """
135
+
136
+ name = "scope"
137
+ description = "Detects scope creep and keeps agent on task"
138
+
139
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
140
+ config = self.config
141
+ focus_keywords = config.get("focus_keywords", [])
142
+ avoid_keywords = config.get("avoid_keywords", [])
143
+ max_tangent_steps = config.get("max_tangent_steps", 3)
144
+
145
+ # Check last few messages for avoid keywords
146
+ if avoid_keywords:
147
+ recent_content = " ".join(
148
+ m.get("content", "") or ""
149
+ for m in ctx.messages[-max_tangent_steps * 2:]
150
+ ).lower()
151
+
152
+ for keyword in avoid_keywords:
153
+ if keyword.lower() in recent_content:
154
+ return WatcherResult.nudge(
155
+ guidance=(
156
+ f"The task involves '{keyword}' which may be out of scope. "
157
+ f"Remember the original task: {ctx.task[:200]}"
158
+ ),
159
+ reason=f"Detected avoid keyword: {keyword}",
160
+ )
161
+
162
+ return WatcherResult.ok()
163
+
164
+
165
+ @register_watcher("pattern")
166
+ class PatternWatcher(Watcher):
167
+ """
168
+ Watches for specific patterns in output.
169
+
170
+ Configurable regex patterns that trigger nudges/alerts.
171
+ """
172
+
173
+ name = "pattern"
174
+ description = "Watches for configurable patterns in output"
175
+
176
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
177
+ config = self.config
178
+ patterns = config.get("patterns", [])
179
+
180
+ # Each pattern is: {"regex": "...", "action": "nudge|pause|abort", "message": "..."}
181
+ for pattern_config in patterns:
182
+ regex = pattern_config.get("regex")
183
+ if not regex:
184
+ continue
185
+
186
+ try:
187
+ compiled = re.compile(regex, re.IGNORECASE)
188
+ except re.error:
189
+ continue
190
+
191
+ # Check recent messages
192
+ for msg in ctx.messages[-10:]:
193
+ content = msg.get("content", "") or ""
194
+ if compiled.search(content):
195
+ action = pattern_config.get("action", "nudge")
196
+ message = pattern_config.get("message", f"Pattern matched: {regex}")
197
+
198
+ if action == "abort":
199
+ return WatcherResult.abort(message)
200
+ elif action == "pause":
201
+ return WatcherResult.pause(message)
202
+ else:
203
+ return WatcherResult.nudge(guidance=message, reason=f"Pattern: {regex}")
204
+
205
+ return WatcherResult.ok()
206
+
207
+
208
+ @register_watcher("delegation")
209
+ class DelegationWatcher(Watcher):
210
+ """
211
+ Watches for the orchestrator trying to write code directly.
212
+
213
+ The orchestrator should DELEGATE coding tasks to executors (Codex, Claude Code),
214
+ not write code itself via bash heredocs, cat, echo, etc.
215
+
216
+ Detects patterns like:
217
+ - cat >> file << 'EOF' (heredocs)
218
+ - echo "code" >> file
219
+ - printf "..." > file.py
220
+ - tee file.py << EOF
221
+ """
222
+
223
+ name = "delegation"
224
+ description = "Ensures orchestrator delegates coding instead of writing directly"
225
+
226
+ # Patterns that indicate direct code writing
227
+ DIRECT_WRITE_PATTERNS = [
228
+ # Heredocs
229
+ r"cat\s+>+\s*\S+.*<<",
230
+ r"tee\s+\S+.*<<",
231
+ # Echo/printf to code files
232
+ r"echo\s+['\"].*['\"]\s*>+\s*\S+\.(py|js|ts|go|rs|java|cpp|c|rb|sh)",
233
+ r"printf\s+['\"].*['\"]\s*>+\s*\S+\.(py|js|ts|go|rs|java|cpp|c|rb|sh)",
234
+ # Sed/awk inline editing (complex patterns suggest code modification)
235
+ r"sed\s+-i.*['\"].*def\s+|class\s+|function\s+|import\s+",
236
+ ]
237
+
238
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
239
+ config = self.config
240
+ strict = config.get("strict", True) # If True, nudge. If False, just warn.
241
+
242
+ # Check recent messages for bash tool calls
243
+ for msg in ctx.messages[-10:]:
244
+ if msg.get("role") != "assistant":
245
+ continue
246
+
247
+ # Check tool calls
248
+ tool_calls = msg.get("tool_calls", [])
249
+ for tc in tool_calls:
250
+ func = tc.get("function", {})
251
+ name = func.get("name", "")
252
+ args = func.get("arguments", "")
253
+
254
+ # Only check bash calls
255
+ if name != "bash":
256
+ continue
257
+
258
+ # Parse arguments (could be JSON string)
259
+ if isinstance(args, str):
260
+ try:
261
+ import json
262
+ args_dict = json.loads(args)
263
+ command = args_dict.get("command", "")
264
+ except (json.JSONDecodeError, AttributeError):
265
+ command = args
266
+ else:
267
+ command = args.get("command", "") if isinstance(args, dict) else ""
268
+
269
+ # Check for direct write patterns
270
+ for pattern in self.DIRECT_WRITE_PATTERNS:
271
+ if re.search(pattern, command, re.IGNORECASE):
272
+ guidance = (
273
+ "You are trying to write code directly via bash. "
274
+ "As the orchestrator, you should DELEGATE coding tasks to executors "
275
+ "using delegate(). Use bash only for verification commands "
276
+ "(git status, running tests, etc.), not for writing code."
277
+ )
278
+ if strict:
279
+ return WatcherResult.nudge(
280
+ guidance=guidance,
281
+ reason=f"Direct code write detected: {command[:100]}...",
282
+ )
283
+ else:
284
+ # Just log, don't nudge
285
+ return WatcherResult.ok()
286
+
287
+ return WatcherResult.ok()
288
+
289
+
290
+ @register_watcher("quality")
291
+ class QualityWatcher(Watcher):
292
+ """
293
+ Watches for quality issues.
294
+
295
+ Detects:
296
+ - Missing tests when code is written
297
+ - Large file changes
298
+ - Missing error handling
299
+ """
300
+
301
+ name = "quality"
302
+ description = "Watches for quality issues in code changes"
303
+
304
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
305
+ config = self.config
306
+ require_tests = config.get("require_tests", True)
307
+ max_files_changed = config.get("max_files_changed", 10)
308
+
309
+ # Check for large changes
310
+ if len(ctx.files_changed) > max_files_changed:
311
+ return WatcherResult.nudge(
312
+ guidance=(
313
+ f"You've modified {len(ctx.files_changed)} files. "
314
+ "Consider breaking this into smaller, focused changes."
315
+ ),
316
+ reason=f"Large change: {len(ctx.files_changed)} files",
317
+ )
318
+
319
+ # Check for tests if code files are changed
320
+ if require_tests and ctx.files_changed:
321
+ code_files = [
322
+ f for f in ctx.files_changed
323
+ if f.endswith((".py", ".js", ".ts", ".go", ".rs"))
324
+ and not f.startswith("test_")
325
+ and not f.endswith("_test.py")
326
+ and "/test" not in f
327
+ ]
328
+ test_files = [
329
+ f for f in ctx.files_changed
330
+ if "test" in f.lower()
331
+ ]
332
+
333
+ if code_files and not test_files:
334
+ return WatcherResult.nudge(
335
+ guidance=(
336
+ "Code files were modified but no test files were added or updated. "
337
+ "Consider adding tests for the changes."
338
+ ),
339
+ reason="Code without tests",
340
+ )
341
+
342
+ return WatcherResult.ok()
343
+
344
+
345
+ @register_watcher("delegation_reminder")
346
+ class DelegationReminderWatcher(Watcher):
347
+ """
348
+ Reminds the orchestrator to delegate work instead of doing it directly.
349
+
350
+ Counts consecutive non-delegation tool calls (bash commands that aren't
351
+ delegation-related). When the count exceeds a threshold, nudges the
352
+ orchestrator to consider delegating to executors instead.
353
+
354
+ This is a softer reminder than the DelegationWatcher - it doesn't detect
355
+ specific code-writing patterns, just notices when the orchestrator seems
356
+ to be doing a lot of direct work that could potentially be delegated.
357
+ """
358
+
359
+ name = "delegation_reminder"
360
+ description = "Reminds orchestrator to delegate after many direct tool calls"
361
+
362
+ # Tools that count as delegation-related (don't count against threshold)
363
+ DELEGATION_TOOLS = {
364
+ "delegate",
365
+ "converse",
366
+ "check_session",
367
+ "end_session",
368
+ "list_sessions",
369
+ "chat", # Talking to user is not direct work
370
+ }
371
+
372
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
373
+ config = self.config
374
+ threshold = config.get("threshold", 10) # Max consecutive non-delegation calls
375
+ lookback = config.get("lookback", 30) # How many messages to check
376
+
377
+ # Count consecutive non-delegation tool calls from the end
378
+ consecutive_non_delegation = 0
379
+
380
+ # Look through recent messages in reverse order
381
+ for msg in reversed(ctx.messages[-lookback:]):
382
+ if msg.get("role") != "assistant":
383
+ continue
384
+
385
+ tool_calls = msg.get("tool_calls", [])
386
+ if not tool_calls:
387
+ # Text-only response doesn't reset counter, but doesn't add to it
388
+ continue
389
+
390
+ # Check each tool call in this message
391
+ has_delegation = False
392
+ has_non_delegation = False
393
+
394
+ for tc in tool_calls:
395
+ func = tc.get("function", {})
396
+ name = func.get("name", "")
397
+
398
+ if name in self.DELEGATION_TOOLS:
399
+ has_delegation = True
400
+ elif name: # Any other tool call
401
+ has_non_delegation = True
402
+
403
+ if has_delegation:
404
+ # Found a delegation tool - stop counting
405
+ break
406
+ elif has_non_delegation:
407
+ # Add to consecutive count (one per message, not per tool call)
408
+ consecutive_non_delegation += 1
409
+
410
+ # Check if threshold exceeded
411
+ if consecutive_non_delegation >= threshold:
412
+ return WatcherResult.nudge(
413
+ guidance=(
414
+ f"You've made {consecutive_non_delegation} consecutive direct tool calls "
415
+ "without delegating to an executor. Remember: as the orchestrator, your role "
416
+ "is to delegate coding work to executors, not do it yourself via bash. "
417
+ "Consider whether the work you're doing could be delegated to an executor "
418
+ "using delegate(). Executors can write code, run tests, and handle complex "
419
+ "file operations more effectively than direct bash commands."
420
+ ),
421
+ reason=f"Consecutive non-delegation calls: {consecutive_non_delegation}",
422
+ )
423
+
424
+ return WatcherResult.ok()