zwarm 1.3.2__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {zwarm-1.3.2 → zwarm-1.3.3}/PKG-INFO +30 -15
  2. {zwarm-1.3.2 → zwarm-1.3.3}/README.md +29 -14
  3. {zwarm-1.3.2 → zwarm-1.3.3}/pyproject.toml +1 -1
  4. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/cli/main.py +3 -3
  5. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/config.py +9 -1
  6. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/orchestrator.py +7 -2
  7. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/prompts/orchestrator.py +18 -0
  8. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/watchers/builtin.py +82 -0
  9. {zwarm-1.3.2 → zwarm-1.3.3}/.gitignore +0 -0
  10. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/__init__.py +0 -0
  11. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/__init__.py +0 -0
  12. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/base.py +0 -0
  13. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/claude_code.py +0 -0
  14. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/codex_mcp.py +0 -0
  15. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/registry.py +0 -0
  16. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/test_codex_mcp.py +0 -0
  17. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/adapters/test_registry.py +0 -0
  18. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/cli/__init__.py +0 -0
  19. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/__init__.py +0 -0
  20. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/compact.py +0 -0
  21. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/environment.py +0 -0
  22. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/models.py +0 -0
  23. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/state.py +0 -0
  24. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/test_compact.py +0 -0
  25. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/test_config.py +0 -0
  26. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/core/test_models.py +0 -0
  27. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/prompts/__init__.py +0 -0
  28. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/test_orchestrator_watchers.py +0 -0
  29. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/tools/__init__.py +0 -0
  30. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/tools/delegation.py +0 -0
  31. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/watchers/__init__.py +0 -0
  32. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/watchers/base.py +0 -0
  33. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/watchers/manager.py +0 -0
  34. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/watchers/registry.py +0 -0
  35. {zwarm-1.3.2 → zwarm-1.3.3}/src/zwarm/watchers/test_watchers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 1.3.2
3
+ Version: 1.3.3
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: python-dotenv>=1.0.0
@@ -136,12 +136,17 @@ state_dir: .zwarm # State directory for sessions/events
136
136
 
137
137
  watchers:
138
138
  enabled: true
139
+ message_role: user # Role for nudge messages: user | assistant | system
139
140
  watchers:
140
141
  - name: progress
141
142
  - name: budget
142
143
  config:
143
144
  max_steps: 50
144
145
  max_sessions: 10
146
+ - name: delegation_reminder
147
+ config:
148
+ threshold: 10 # Nudge after N consecutive non-delegation calls
149
+ lookback: 30 # How many messages to check
145
150
  - name: scope
146
151
  config:
147
152
  keywords: []
@@ -217,28 +222,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
217
222
  | `pattern` | Custom regex pattern matching |
218
223
  | `quality` | Code quality checks |
219
224
  | `delegation` | Ensures orchestrator delegates instead of writing code directly |
225
+ | `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
220
226
 
221
227
  ### Enabling Watchers
222
228
 
223
229
  ```yaml
224
230
  # config.yaml
225
231
  watchers:
226
- enabled:
227
- - progress
228
- - budget
229
- - scope
230
- config:
231
- progress:
232
- stuck_threshold: 5 # Flag after 5 similar steps
233
- budget:
234
- max_steps: 50
235
- max_sessions: 10
236
- scope:
237
- keywords:
238
- - "refactor"
239
- - "rewrite"
232
+ enabled: true
233
+ message_role: user # How nudges appear: user | assistant | system
234
+ watchers:
235
+ - name: progress
236
+ config:
237
+ max_same_calls: 3 # Flag after 3 identical tool calls
238
+ - name: budget
239
+ config:
240
+ max_steps: 50
241
+ max_sessions: 10
242
+ - name: delegation_reminder
243
+ config:
244
+ threshold: 10 # Nudge after 10 non-delegation calls
245
+ - name: scope
246
+ config:
247
+ avoid_keywords:
248
+ - "refactor everything"
249
+ - "rewrite"
240
250
  ```
241
251
 
252
+ The `message_role` setting controls how watcher nudges are injected:
253
+ - `user` (default): Appears as a user message - strong nudge, agent must respond
254
+ - `assistant`: Appears as a previous assistant thought - softer, agent can continue
255
+ - `system`: Appears as system instruction - authoritative guidance
256
+
242
257
  ### Watcher Actions
243
258
 
244
259
  Watchers can return different actions:
@@ -124,12 +124,17 @@ state_dir: .zwarm # State directory for sessions/events
124
124
 
125
125
  watchers:
126
126
  enabled: true
127
+ message_role: user # Role for nudge messages: user | assistant | system
127
128
  watchers:
128
129
  - name: progress
129
130
  - name: budget
130
131
  config:
131
132
  max_steps: 50
132
133
  max_sessions: 10
134
+ - name: delegation_reminder
135
+ config:
136
+ threshold: 10 # Nudge after N consecutive non-delegation calls
137
+ lookback: 30 # How many messages to check
133
138
  - name: scope
134
139
  config:
135
140
  keywords: []
@@ -205,28 +210,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
205
210
  | `pattern` | Custom regex pattern matching |
206
211
  | `quality` | Code quality checks |
207
212
  | `delegation` | Ensures orchestrator delegates instead of writing code directly |
213
+ | `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
208
214
 
209
215
  ### Enabling Watchers
210
216
 
211
217
  ```yaml
212
218
  # config.yaml
213
219
  watchers:
214
- enabled:
215
- - progress
216
- - budget
217
- - scope
218
- config:
219
- progress:
220
- stuck_threshold: 5 # Flag after 5 similar steps
221
- budget:
222
- max_steps: 50
223
- max_sessions: 10
224
- scope:
225
- keywords:
226
- - "refactor"
227
- - "rewrite"
220
+ enabled: true
221
+ message_role: user # How nudges appear: user | assistant | system
222
+ watchers:
223
+ - name: progress
224
+ config:
225
+ max_same_calls: 3 # Flag after 3 identical tool calls
226
+ - name: budget
227
+ config:
228
+ max_steps: 50
229
+ max_sessions: 10
230
+ - name: delegation_reminder
231
+ config:
232
+ threshold: 10 # Nudge after 10 non-delegation calls
233
+ - name: scope
234
+ config:
235
+ avoid_keywords:
236
+ - "refactor everything"
237
+ - "rewrite"
228
238
  ```
229
239
 
240
+ The `message_role` setting controls how watcher nudges are injected:
241
+ - `user` (default): Appears as a user message - strong nudge, agent must respond
242
+ - `assistant`: Appears as a previous assistant thought - softer, agent can continue
243
+ - `system`: Appears as system instruction - authoritative guidance
244
+
230
245
  ### Watcher Actions
231
246
 
232
247
  Watchers can return different actions:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zwarm"
3
- version = "1.3.2"
3
+ version = "1.3.3"
4
4
  description = "Multi-Agent CLI Orchestration Research Platform"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13,<3.14"
@@ -672,7 +672,7 @@ def init(
672
672
  # Gather settings
673
673
  weave_project = ""
674
674
  adapter = "codex_mcp"
675
- watchers_enabled = ["progress", "budget", "delegation"]
675
+ watchers_enabled = ["progress", "budget", "delegation", "delegation_reminder"]
676
676
  create_project_config = with_project
677
677
  project_description = ""
678
678
  project_context = ""
@@ -696,10 +696,10 @@ def init(
696
696
 
697
697
  # Watchers
698
698
  console.print("\n [bold]Watchers[/] (trajectory aligners)")
699
- available_watchers = ["progress", "budget", "delegation", "scope", "pattern", "quality"]
699
+ available_watchers = ["progress", "budget", "delegation", "delegation_reminder", "scope", "pattern", "quality"]
700
700
  watchers_enabled = []
701
701
  for w in available_watchers:
702
- default = w in ["progress", "budget", "delegation"]
702
+ default = w in ["progress", "budget", "delegation", "delegation_reminder"]
703
703
  if typer.confirm(f" Enable {w}?", default=default):
704
704
  watchers_enabled.append(w)
705
705
 
@@ -86,7 +86,13 @@ class WatchersConfig:
86
86
  watchers: list[WatcherConfigItem] = field(default_factory=lambda: [
87
87
  WatcherConfigItem(name="progress"),
88
88
  WatcherConfigItem(name="budget"),
89
+ WatcherConfigItem(name="delegation_reminder"),
89
90
  ])
91
+ # Role for watcher nudge messages: "user" | "assistant" | "system"
92
+ # "user" (default) - Appears as if user sent the message, strong nudge
93
+ # "assistant" - Appears as previous assistant thought, softer nudge
94
+ # "system" - Appears as system instruction, authoritative
95
+ message_role: str = "user"
90
96
 
91
97
 
92
98
  @dataclass
@@ -122,13 +128,14 @@ class ZwarmConfig:
122
128
  ],
123
129
  )
124
130
  else:
125
- # Full format: watchers: {enabled: true, watchers: [...]}
131
+ # Full format: watchers: {enabled: true, watchers: [...], message_role: "user"}
126
132
  watchers_config = WatchersConfig(
127
133
  enabled=watchers_data.get("enabled", True),
128
134
  watchers=[
129
135
  WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
130
136
  for w in watchers_data.get("watchers", [])
131
137
  ] or WatchersConfig().watchers,
138
+ message_role=watchers_data.get("message_role", "user"),
132
139
  )
133
140
 
134
141
  # Build orchestrator config with nested compaction
@@ -180,6 +187,7 @@ class ZwarmConfig:
180
187
  {"name": w.name, "enabled": w.enabled, "config": w.config}
181
188
  for w in self.watchers.watchers
182
189
  ],
190
+ "message_role": self.watchers.message_role,
183
191
  },
184
192
  "state_dir": self.state_dir,
185
193
  }
@@ -352,10 +352,15 @@ Review what was accomplished in the previous session and delegate new tasks as n
352
352
 
353
353
  # Handle watcher result
354
354
  if result.action == WatcherAction.NUDGE and result.guidance:
355
- # Inject guidance as a system message
355
+ # Inject guidance as a message with configurable role
356
+ message_role = self.config.watchers.message_role
357
+ # Validate role (default to user if invalid)
358
+ if message_role not in ("user", "assistant", "system"):
359
+ message_role = "user"
360
+
356
361
  self.messages.append(
357
362
  {
358
- "role": "user",
363
+ "role": message_role,
359
364
  "content": f"[WATCHER: {result.metadata.get('triggered_by', 'unknown')}] {result.guidance}",
360
365
  }
361
366
  )
@@ -43,6 +43,24 @@ Your primary tools are for delegation and verification:
43
43
 
44
44
  ---
45
45
 
46
+ # Watchers
47
+
48
+ Your execution is monitored by "watchers" - automated systems that observe your trajectory and provide guidance when you may be going off course. Watchers are designed to help you stay aligned with best practices and catch common pitfalls.
49
+
50
+ When you see a message prefixed with `[WATCHER: ...]`, pay attention. These are interventions from the watcher system indicating that your current approach may need adjustment. Watchers might notice:
51
+
52
+ - You're doing direct work (bash commands) when you should be delegating to executors
53
+ - You're spinning or repeating the same actions without making progress
54
+ - You're approaching resource limits (steps, sessions)
55
+ - You're drifting from the original task scope
56
+ - You're making changes without corresponding tests
57
+
58
+ Watcher guidance is not optional advice - treat it as an important course correction. If a watcher tells you to delegate instead of doing work directly, delegate. If a watcher says you're stuck, step back and try a different approach. If a watcher warns about budget limits, prioritize and wrap up.
59
+
60
+ The watchers are on your side. They exist to help you succeed, not to criticize. Heed their guidance promptly.
61
+
62
+ ---
63
+
46
64
  # Sync vs Async: Choosing the Right Mode
47
65
 
48
66
  The mode you choose for delegation significantly affects how work proceeds.
@@ -340,3 +340,85 @@ class QualityWatcher(Watcher):
340
340
  )
341
341
 
342
342
  return WatcherResult.ok()
343
+
344
+
345
+ @register_watcher("delegation_reminder")
346
+ class DelegationReminderWatcher(Watcher):
347
+ """
348
+ Reminds the orchestrator to delegate work instead of doing it directly.
349
+
350
+ Counts consecutive non-delegation tool calls (bash commands that aren't
351
+ delegation-related). When the count exceeds a threshold, nudges the
352
+ orchestrator to consider delegating to executors instead.
353
+
354
+ This is a softer reminder than the DelegationWatcher - it doesn't detect
355
+ specific code-writing patterns, just notices when the orchestrator seems
356
+ to be doing a lot of direct work that could potentially be delegated.
357
+ """
358
+
359
+ name = "delegation_reminder"
360
+ description = "Reminds orchestrator to delegate after many direct tool calls"
361
+
362
+ # Tools that count as delegation-related (don't count against threshold)
363
+ DELEGATION_TOOLS = {
364
+ "delegate",
365
+ "converse",
366
+ "check_session",
367
+ "end_session",
368
+ "list_sessions",
369
+ "chat", # Talking to user is not direct work
370
+ }
371
+
372
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
373
+ config = self.config
374
+ threshold = config.get("threshold", 10) # Max consecutive non-delegation calls
375
+ lookback = config.get("lookback", 30) # How many messages to check
376
+
377
+ # Count consecutive non-delegation tool calls from the end
378
+ consecutive_non_delegation = 0
379
+
380
+ # Look through recent messages in reverse order
381
+ for msg in reversed(ctx.messages[-lookback:]):
382
+ if msg.get("role") != "assistant":
383
+ continue
384
+
385
+ tool_calls = msg.get("tool_calls", [])
386
+ if not tool_calls:
387
+ # Text-only response doesn't reset counter, but doesn't add to it
388
+ continue
389
+
390
+ # Check each tool call in this message
391
+ has_delegation = False
392
+ has_non_delegation = False
393
+
394
+ for tc in tool_calls:
395
+ func = tc.get("function", {})
396
+ name = func.get("name", "")
397
+
398
+ if name in self.DELEGATION_TOOLS:
399
+ has_delegation = True
400
+ elif name: # Any other tool call
401
+ has_non_delegation = True
402
+
403
+ if has_delegation:
404
+ # Found a delegation tool - stop counting
405
+ break
406
+ elif has_non_delegation:
407
+ # Add to consecutive count (one per message, not per tool call)
408
+ consecutive_non_delegation += 1
409
+
410
+ # Check if threshold exceeded
411
+ if consecutive_non_delegation >= threshold:
412
+ return WatcherResult.nudge(
413
+ guidance=(
414
+ f"You've made {consecutive_non_delegation} consecutive direct tool calls "
415
+ "without delegating to an executor. Remember: as the orchestrator, your role "
416
+ "is to delegate coding work to executors, not do it yourself via bash. "
417
+ "Consider whether the work you're doing could be delegated to an executor "
418
+ "using delegate(). Executors can write code, run tests, and handle complex "
419
+ "file operations more effectively than direct bash commands."
420
+ ),
421
+ reason=f"Consecutive non-delegation calls: {consecutive_non_delegation}",
422
+ )
423
+
424
+ return WatcherResult.ok()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes