zwarm 1.3.2__py3-none-any.whl → 1.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/adapters/codex_mcp.py +124 -23
- zwarm/cli/main.py +3 -3
- zwarm/core/config.py +9 -1
- zwarm/orchestrator.py +7 -2
- zwarm/prompts/orchestrator.py +18 -0
- zwarm/watchers/builtin.py +82 -0
- {zwarm-1.3.2.dist-info → zwarm-1.3.5.dist-info}/METADATA +30 -15
- {zwarm-1.3.2.dist-info → zwarm-1.3.5.dist-info}/RECORD +10 -10
- {zwarm-1.3.2.dist-info → zwarm-1.3.5.dist-info}/WHEEL +0 -0
- {zwarm-1.3.2.dist-info → zwarm-1.3.5.dist-info}/entry_points.txt +0 -0
zwarm/adapters/codex_mcp.py
CHANGED
|
@@ -44,6 +44,7 @@ class MCPClient:
|
|
|
44
44
|
|
|
45
45
|
def __init__(self):
|
|
46
46
|
self._proc: subprocess.Popen | None = None
|
|
47
|
+
self._proc_pid: int | None = None # Track PID to detect restarts
|
|
47
48
|
self._request_id = 0
|
|
48
49
|
self._initialized = False
|
|
49
50
|
self._stderr_thread: threading.Thread | None = None
|
|
@@ -51,14 +52,24 @@ class MCPClient:
|
|
|
51
52
|
self._stderr_lines: list[str] = []
|
|
52
53
|
self._stdout_queue: queue.Queue[str | None] = queue.Queue()
|
|
53
54
|
self._lock = threading.Lock() # Protect writes only
|
|
55
|
+
self._start_count = 0 # Track how many times we've started
|
|
54
56
|
|
|
55
57
|
def start(self) -> None:
|
|
56
58
|
"""Start the MCP server process."""
|
|
57
59
|
with self._lock:
|
|
58
60
|
if self._proc is not None and self._proc.poll() is None:
|
|
61
|
+
logger.debug(f"MCP server already running (pid={self._proc.pid}, start_count={self._start_count})")
|
|
59
62
|
return # Already running
|
|
60
63
|
|
|
61
|
-
|
|
64
|
+
# Check if this is a restart (previous server died)
|
|
65
|
+
if self._proc_pid is not None:
|
|
66
|
+
logger.warning(
|
|
67
|
+
f"MCP server restart detected! Previous pid={self._proc_pid}, "
|
|
68
|
+
f"start_count={self._start_count}. All conversation state will be lost."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self._start_count += 1
|
|
72
|
+
logger.info(f"Starting codex mcp-server... (start_count={self._start_count})")
|
|
62
73
|
self._proc = subprocess.Popen(
|
|
63
74
|
["codex", "mcp-server"],
|
|
64
75
|
stdin=subprocess.PIPE,
|
|
@@ -66,6 +77,7 @@ class MCPClient:
|
|
|
66
77
|
stderr=subprocess.PIPE,
|
|
67
78
|
text=False, # Binary mode for explicit encoding control
|
|
68
79
|
)
|
|
80
|
+
self._proc_pid = self._proc.pid
|
|
69
81
|
self._initialized = False
|
|
70
82
|
self._stderr_lines = []
|
|
71
83
|
self._stdout_queue = queue.Queue() # Fresh queue
|
|
@@ -251,11 +263,13 @@ class MCPClient:
|
|
|
251
263
|
# Collect streaming events until final result
|
|
252
264
|
# Reader thread queues lines, we pull from queue with timeout
|
|
253
265
|
session_id = None
|
|
266
|
+
conversation_id = None # Track conversation ID separately
|
|
254
267
|
agent_messages: list[str] = []
|
|
255
268
|
streaming_text: list[str] = [] # Accumulate streaming delta text
|
|
256
269
|
final_result = None
|
|
257
270
|
token_usage: dict[str, Any] = {} # Track token usage
|
|
258
271
|
start_time = time.time()
|
|
272
|
+
all_events: list[dict] = [] # Keep ALL events for debugging
|
|
259
273
|
|
|
260
274
|
for event_count in range(1000): # Safety limit on events
|
|
261
275
|
self._check_alive()
|
|
@@ -278,6 +292,7 @@ class MCPClient:
|
|
|
278
292
|
|
|
279
293
|
try:
|
|
280
294
|
event = json.loads(line)
|
|
295
|
+
all_events.append(event) # Keep for debugging
|
|
281
296
|
except json.JSONDecodeError as e:
|
|
282
297
|
logger.warning(f"Invalid JSON from MCP: {line[:100]}... - {e}")
|
|
283
298
|
continue
|
|
@@ -286,7 +301,10 @@ class MCPClient:
|
|
|
286
301
|
if event.get("id") == request_id:
|
|
287
302
|
if "result" in event:
|
|
288
303
|
final_result = event["result"]
|
|
289
|
-
|
|
304
|
+
# Extract conversation ID from final result
|
|
305
|
+
if isinstance(final_result, dict):
|
|
306
|
+
conversation_id = final_result.get("conversationId") or final_result.get("conversation_id")
|
|
307
|
+
logger.debug(f"Got final result after {event_count} events, conversation_id={conversation_id}")
|
|
290
308
|
break
|
|
291
309
|
elif "error" in event:
|
|
292
310
|
error = event["error"]
|
|
@@ -309,6 +327,9 @@ class MCPClient:
|
|
|
309
327
|
item = msg.get("item", {})
|
|
310
328
|
item_type = item.get("type")
|
|
311
329
|
|
|
330
|
+
# Log ALL item_completed events to help debug
|
|
331
|
+
logger.debug(f"item_completed: type={item_type}, keys={list(item.keys())}")
|
|
332
|
+
|
|
312
333
|
# Agent text responses - codex uses "AgentMessage" type
|
|
313
334
|
if item_type == "AgentMessage":
|
|
314
335
|
content = item.get("content", [])
|
|
@@ -318,6 +339,19 @@ class MCPClient:
|
|
|
318
339
|
elif isinstance(block, str):
|
|
319
340
|
agent_messages.append(block)
|
|
320
341
|
|
|
342
|
+
# Also check for "agent_message" (lowercase) variant
|
|
343
|
+
elif item_type == "agent_message":
|
|
344
|
+
text = item.get("text", "") or item.get("message", "")
|
|
345
|
+
if text:
|
|
346
|
+
agent_messages.append(text)
|
|
347
|
+
# Also check content array
|
|
348
|
+
content = item.get("content", [])
|
|
349
|
+
for block in content:
|
|
350
|
+
if isinstance(block, dict) and block.get("text"):
|
|
351
|
+
agent_messages.append(block["text"])
|
|
352
|
+
elif isinstance(block, str):
|
|
353
|
+
agent_messages.append(block)
|
|
354
|
+
|
|
321
355
|
# Legacy format check
|
|
322
356
|
elif item_type == "message" and item.get("role") == "assistant":
|
|
323
357
|
content = item.get("content", [])
|
|
@@ -327,6 +361,21 @@ class MCPClient:
|
|
|
327
361
|
elif isinstance(block, str):
|
|
328
362
|
agent_messages.append(block)
|
|
329
363
|
|
|
364
|
+
# Generic message type - check for text/content
|
|
365
|
+
elif item_type == "message":
|
|
366
|
+
text = item.get("text", "")
|
|
367
|
+
if text:
|
|
368
|
+
agent_messages.append(text)
|
|
369
|
+
content = item.get("content", [])
|
|
370
|
+
if isinstance(content, str):
|
|
371
|
+
agent_messages.append(content)
|
|
372
|
+
elif isinstance(content, list):
|
|
373
|
+
for block in content:
|
|
374
|
+
if isinstance(block, dict) and block.get("text"):
|
|
375
|
+
agent_messages.append(block["text"])
|
|
376
|
+
elif isinstance(block, str):
|
|
377
|
+
agent_messages.append(block)
|
|
378
|
+
|
|
330
379
|
# Function call outputs (for context)
|
|
331
380
|
elif item_type == "function_call_output":
|
|
332
381
|
output = item.get("output", "")
|
|
@@ -334,8 +383,8 @@ class MCPClient:
|
|
|
334
383
|
agent_messages.append(f"[Tool output]: {output[:500]}")
|
|
335
384
|
|
|
336
385
|
# Log other item types we're not handling
|
|
337
|
-
elif item_type not in ("function_call", "tool_call", "UserMessage"):
|
|
338
|
-
logger.debug(f"Unhandled item_completed type: {item_type},
|
|
386
|
+
elif item_type not in ("function_call", "tool_call", "UserMessage", "user_message"):
|
|
387
|
+
logger.debug(f"Unhandled item_completed type: {item_type}, item={item}")
|
|
339
388
|
|
|
340
389
|
elif msg_type == "agent_message":
|
|
341
390
|
# Direct agent message event
|
|
@@ -406,28 +455,50 @@ class MCPClient:
|
|
|
406
455
|
agent_messages.append(full_streaming)
|
|
407
456
|
logger.debug(f"Captured {len(streaming_text)} streaming chunks ({len(full_streaming)} chars)")
|
|
408
457
|
|
|
409
|
-
#
|
|
410
|
-
|
|
411
|
-
"
|
|
412
|
-
"messages": agent_messages,
|
|
413
|
-
"output": "\n".join(agent_messages) if agent_messages else "",
|
|
414
|
-
"usage": token_usage, # Token usage for cost tracking
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
# Merge final result and try to extract content if no messages
|
|
418
|
-
if final_result:
|
|
419
|
-
result.update(final_result)
|
|
420
|
-
if not agent_messages and "content" in final_result:
|
|
458
|
+
# Try to extract content from final_result if we have no messages
|
|
459
|
+
if final_result and not agent_messages:
|
|
460
|
+
if "content" in final_result:
|
|
421
461
|
content = final_result["content"]
|
|
422
462
|
if isinstance(content, list):
|
|
423
463
|
for block in content:
|
|
424
464
|
if isinstance(block, dict) and block.get("text"):
|
|
425
465
|
agent_messages.append(block["text"])
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
466
|
+
elif isinstance(block, str):
|
|
467
|
+
agent_messages.append(block)
|
|
468
|
+
elif isinstance(content, str):
|
|
469
|
+
agent_messages.append(content)
|
|
470
|
+
# Also check for text field
|
|
471
|
+
if not agent_messages and "text" in final_result:
|
|
472
|
+
agent_messages.append(final_result["text"])
|
|
473
|
+
|
|
474
|
+
# Build result - prefer conversation_id from final result, fallback to session_id from events
|
|
475
|
+
effective_conversation_id = conversation_id or session_id
|
|
476
|
+
result = {
|
|
477
|
+
"conversationId": effective_conversation_id,
|
|
478
|
+
"messages": agent_messages,
|
|
479
|
+
"output": "\n".join(agent_messages) if agent_messages else "",
|
|
480
|
+
"usage": token_usage, # Token usage for cost tracking
|
|
481
|
+
}
|
|
429
482
|
|
|
430
|
-
|
|
483
|
+
# Log detailed debug info if we didn't capture any messages
|
|
484
|
+
if not agent_messages:
|
|
485
|
+
event_types = [e.get("method") or f"id:{e.get('id')}" for e in all_events[:20]]
|
|
486
|
+
logger.warning(
|
|
487
|
+
f"MCP call returned no messages. "
|
|
488
|
+
f"conversation_id={effective_conversation_id}, "
|
|
489
|
+
f"session_id={session_id}, "
|
|
490
|
+
f"event_count={len(all_events)}, "
|
|
491
|
+
f"event_types={event_types}, "
|
|
492
|
+
f"final_result_keys={list(final_result.keys()) if final_result else 'None'}"
|
|
493
|
+
)
|
|
494
|
+
# Log codex/event details for debugging
|
|
495
|
+
codex_events = [e for e in all_events if e.get("method") == "codex/event"]
|
|
496
|
+
if codex_events:
|
|
497
|
+
for ce in codex_events[-5:]: # Last 5 codex events
|
|
498
|
+
msg = ce.get("params", {}).get("msg", {})
|
|
499
|
+
logger.debug(f" codex/event: type={msg.get('type')}, keys={list(msg.keys())}")
|
|
500
|
+
|
|
501
|
+
logger.debug(f"MCP call complete: {len(agent_messages)} messages, conversation_id={effective_conversation_id}")
|
|
431
502
|
return result
|
|
432
503
|
|
|
433
504
|
def close(self) -> None:
|
|
@@ -521,14 +592,35 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
521
592
|
if model:
|
|
522
593
|
args["model"] = model
|
|
523
594
|
|
|
595
|
+
logger.info(f"Calling codex with task_len={len(task)}, cwd={cwd}, model={model or 'default'}")
|
|
596
|
+
|
|
524
597
|
result = client.call_tool("codex", args)
|
|
525
598
|
|
|
526
|
-
#
|
|
599
|
+
# Log the result structure
|
|
600
|
+
conversation_id = result.get("conversationId")
|
|
601
|
+
messages_count = len(result.get("messages", []))
|
|
602
|
+
output_len = len(result.get("output", ""))
|
|
527
603
|
usage = result.get("usage", {})
|
|
604
|
+
|
|
605
|
+
logger.info(
|
|
606
|
+
f"codex result: conversation_id={conversation_id}, "
|
|
607
|
+
f"messages_count={messages_count}, output_len={output_len}, "
|
|
608
|
+
f"usage={usage.get('total_tokens', 0)} tokens"
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
# Warn if we got a conversation ID but no messages (agent did work but we lost output)
|
|
612
|
+
if conversation_id and not messages_count and not output_len:
|
|
613
|
+
logger.warning(
|
|
614
|
+
f"codex returned conversation_id={conversation_id} but NO messages/output! "
|
|
615
|
+
f"The agent processed {usage.get('total_tokens', 0)} tokens but we didn't capture the response. "
|
|
616
|
+
f"This may indicate an issue with event parsing."
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Track usage
|
|
528
620
|
self._accumulate_usage(usage)
|
|
529
621
|
|
|
530
622
|
return {
|
|
531
|
-
"conversation_id":
|
|
623
|
+
"conversation_id": conversation_id,
|
|
532
624
|
"response": self._extract_response(result),
|
|
533
625
|
"raw_messages": result.get("messages", []),
|
|
534
626
|
"usage": usage,
|
|
@@ -549,13 +641,22 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
549
641
|
"""
|
|
550
642
|
client = self._ensure_client()
|
|
551
643
|
|
|
552
|
-
logger.
|
|
644
|
+
logger.info(f"Calling codex-reply with conversation_id={conversation_id}, message_len={len(message)}")
|
|
645
|
+
logger.debug(f"MCP client alive: {client.is_alive}, initialized: {client._initialized}")
|
|
553
646
|
|
|
554
647
|
result = client.call_tool("codex-reply", {
|
|
555
648
|
"conversationId": conversation_id,
|
|
556
649
|
"prompt": message,
|
|
557
650
|
})
|
|
558
651
|
|
|
652
|
+
# Log the full result structure for debugging
|
|
653
|
+
logger.info(
|
|
654
|
+
f"codex-reply result: conversationId={result.get('conversationId')}, "
|
|
655
|
+
f"messages_count={len(result.get('messages', []))}, "
|
|
656
|
+
f"output_len={len(result.get('output', ''))}, "
|
|
657
|
+
f"usage={result.get('usage', {}).get('total_tokens', 0)} tokens"
|
|
658
|
+
)
|
|
659
|
+
|
|
559
660
|
# Check for conversation loss - MCP returns empty result when session not found
|
|
560
661
|
if not result.get("messages") and not result.get("output"):
|
|
561
662
|
logger.error(
|
zwarm/cli/main.py
CHANGED
|
@@ -672,7 +672,7 @@ def init(
|
|
|
672
672
|
# Gather settings
|
|
673
673
|
weave_project = ""
|
|
674
674
|
adapter = "codex_mcp"
|
|
675
|
-
watchers_enabled = ["progress", "budget", "delegation"]
|
|
675
|
+
watchers_enabled = ["progress", "budget", "delegation", "delegation_reminder"]
|
|
676
676
|
create_project_config = with_project
|
|
677
677
|
project_description = ""
|
|
678
678
|
project_context = ""
|
|
@@ -696,10 +696,10 @@ def init(
|
|
|
696
696
|
|
|
697
697
|
# Watchers
|
|
698
698
|
console.print("\n [bold]Watchers[/] (trajectory aligners)")
|
|
699
|
-
available_watchers = ["progress", "budget", "delegation", "scope", "pattern", "quality"]
|
|
699
|
+
available_watchers = ["progress", "budget", "delegation", "delegation_reminder", "scope", "pattern", "quality"]
|
|
700
700
|
watchers_enabled = []
|
|
701
701
|
for w in available_watchers:
|
|
702
|
-
default = w in ["progress", "budget", "delegation"]
|
|
702
|
+
default = w in ["progress", "budget", "delegation", "delegation_reminder"]
|
|
703
703
|
if typer.confirm(f" Enable {w}?", default=default):
|
|
704
704
|
watchers_enabled.append(w)
|
|
705
705
|
|
zwarm/core/config.py
CHANGED
|
@@ -86,7 +86,13 @@ class WatchersConfig:
|
|
|
86
86
|
watchers: list[WatcherConfigItem] = field(default_factory=lambda: [
|
|
87
87
|
WatcherConfigItem(name="progress"),
|
|
88
88
|
WatcherConfigItem(name="budget"),
|
|
89
|
+
WatcherConfigItem(name="delegation_reminder"),
|
|
89
90
|
])
|
|
91
|
+
# Role for watcher nudge messages: "user" | "assistant" | "system"
|
|
92
|
+
# "user" (default) - Appears as if user sent the message, strong nudge
|
|
93
|
+
# "assistant" - Appears as previous assistant thought, softer nudge
|
|
94
|
+
# "system" - Appears as system instruction, authoritative
|
|
95
|
+
message_role: str = "user"
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
@dataclass
|
|
@@ -122,13 +128,14 @@ class ZwarmConfig:
|
|
|
122
128
|
],
|
|
123
129
|
)
|
|
124
130
|
else:
|
|
125
|
-
# Full format: watchers: {enabled: true, watchers: [...]}
|
|
131
|
+
# Full format: watchers: {enabled: true, watchers: [...], message_role: "user"}
|
|
126
132
|
watchers_config = WatchersConfig(
|
|
127
133
|
enabled=watchers_data.get("enabled", True),
|
|
128
134
|
watchers=[
|
|
129
135
|
WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
|
|
130
136
|
for w in watchers_data.get("watchers", [])
|
|
131
137
|
] or WatchersConfig().watchers,
|
|
138
|
+
message_role=watchers_data.get("message_role", "user"),
|
|
132
139
|
)
|
|
133
140
|
|
|
134
141
|
# Build orchestrator config with nested compaction
|
|
@@ -180,6 +187,7 @@ class ZwarmConfig:
|
|
|
180
187
|
{"name": w.name, "enabled": w.enabled, "config": w.config}
|
|
181
188
|
for w in self.watchers.watchers
|
|
182
189
|
],
|
|
190
|
+
"message_role": self.watchers.message_role,
|
|
183
191
|
},
|
|
184
192
|
"state_dir": self.state_dir,
|
|
185
193
|
}
|
zwarm/orchestrator.py
CHANGED
|
@@ -352,10 +352,15 @@ Review what was accomplished in the previous session and delegate new tasks as n
|
|
|
352
352
|
|
|
353
353
|
# Handle watcher result
|
|
354
354
|
if result.action == WatcherAction.NUDGE and result.guidance:
|
|
355
|
-
# Inject guidance as a
|
|
355
|
+
# Inject guidance as a message with configurable role
|
|
356
|
+
message_role = self.config.watchers.message_role
|
|
357
|
+
# Validate role (default to user if invalid)
|
|
358
|
+
if message_role not in ("user", "assistant", "system"):
|
|
359
|
+
message_role = "user"
|
|
360
|
+
|
|
356
361
|
self.messages.append(
|
|
357
362
|
{
|
|
358
|
-
"role":
|
|
363
|
+
"role": message_role,
|
|
359
364
|
"content": f"[WATCHER: {result.metadata.get('triggered_by', 'unknown')}] {result.guidance}",
|
|
360
365
|
}
|
|
361
366
|
)
|
zwarm/prompts/orchestrator.py
CHANGED
|
@@ -43,6 +43,24 @@ Your primary tools are for delegation and verification:
|
|
|
43
43
|
|
|
44
44
|
---
|
|
45
45
|
|
|
46
|
+
# Watchers
|
|
47
|
+
|
|
48
|
+
Your execution is monitored by "watchers" - automated systems that observe your trajectory and provide guidance when you may be going off course. Watchers are designed to help you stay aligned with best practices and catch common pitfalls.
|
|
49
|
+
|
|
50
|
+
When you see a message prefixed with `[WATCHER: ...]`, pay attention. These are interventions from the watcher system indicating that your current approach may need adjustment. Watchers might notice:
|
|
51
|
+
|
|
52
|
+
- You're doing direct work (bash commands) when you should be delegating to executors
|
|
53
|
+
- You're spinning or repeating the same actions without making progress
|
|
54
|
+
- You're approaching resource limits (steps, sessions)
|
|
55
|
+
- You're drifting from the original task scope
|
|
56
|
+
- You're making changes without corresponding tests
|
|
57
|
+
|
|
58
|
+
Watcher guidance is not optional advice - treat it as an important course correction. If a watcher tells you to delegate instead of doing work directly, delegate. If a watcher says you're stuck, step back and try a different approach. If a watcher warns about budget limits, prioritize and wrap up.
|
|
59
|
+
|
|
60
|
+
The watchers are on your side. They exist to help you succeed, not to criticize. Heed their guidance promptly.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
46
64
|
# Sync vs Async: Choosing the Right Mode
|
|
47
65
|
|
|
48
66
|
The mode you choose for delegation significantly affects how work proceeds.
|
zwarm/watchers/builtin.py
CHANGED
|
@@ -340,3 +340,85 @@ class QualityWatcher(Watcher):
|
|
|
340
340
|
)
|
|
341
341
|
|
|
342
342
|
return WatcherResult.ok()
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@register_watcher("delegation_reminder")
|
|
346
|
+
class DelegationReminderWatcher(Watcher):
|
|
347
|
+
"""
|
|
348
|
+
Reminds the orchestrator to delegate work instead of doing it directly.
|
|
349
|
+
|
|
350
|
+
Counts consecutive non-delegation tool calls (bash commands that aren't
|
|
351
|
+
delegation-related). When the count exceeds a threshold, nudges the
|
|
352
|
+
orchestrator to consider delegating to executors instead.
|
|
353
|
+
|
|
354
|
+
This is a softer reminder than the DelegationWatcher - it doesn't detect
|
|
355
|
+
specific code-writing patterns, just notices when the orchestrator seems
|
|
356
|
+
to be doing a lot of direct work that could potentially be delegated.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
name = "delegation_reminder"
|
|
360
|
+
description = "Reminds orchestrator to delegate after many direct tool calls"
|
|
361
|
+
|
|
362
|
+
# Tools that count as delegation-related (don't count against threshold)
|
|
363
|
+
DELEGATION_TOOLS = {
|
|
364
|
+
"delegate",
|
|
365
|
+
"converse",
|
|
366
|
+
"check_session",
|
|
367
|
+
"end_session",
|
|
368
|
+
"list_sessions",
|
|
369
|
+
"chat", # Talking to user is not direct work
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
373
|
+
config = self.config
|
|
374
|
+
threshold = config.get("threshold", 10) # Max consecutive non-delegation calls
|
|
375
|
+
lookback = config.get("lookback", 30) # How many messages to check
|
|
376
|
+
|
|
377
|
+
# Count consecutive non-delegation tool calls from the end
|
|
378
|
+
consecutive_non_delegation = 0
|
|
379
|
+
|
|
380
|
+
# Look through recent messages in reverse order
|
|
381
|
+
for msg in reversed(ctx.messages[-lookback:]):
|
|
382
|
+
if msg.get("role") != "assistant":
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
tool_calls = msg.get("tool_calls", [])
|
|
386
|
+
if not tool_calls:
|
|
387
|
+
# Text-only response doesn't reset counter, but doesn't add to it
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
# Check each tool call in this message
|
|
391
|
+
has_delegation = False
|
|
392
|
+
has_non_delegation = False
|
|
393
|
+
|
|
394
|
+
for tc in tool_calls:
|
|
395
|
+
func = tc.get("function", {})
|
|
396
|
+
name = func.get("name", "")
|
|
397
|
+
|
|
398
|
+
if name in self.DELEGATION_TOOLS:
|
|
399
|
+
has_delegation = True
|
|
400
|
+
elif name: # Any other tool call
|
|
401
|
+
has_non_delegation = True
|
|
402
|
+
|
|
403
|
+
if has_delegation:
|
|
404
|
+
# Found a delegation tool - stop counting
|
|
405
|
+
break
|
|
406
|
+
elif has_non_delegation:
|
|
407
|
+
# Add to consecutive count (one per message, not per tool call)
|
|
408
|
+
consecutive_non_delegation += 1
|
|
409
|
+
|
|
410
|
+
# Check if threshold exceeded
|
|
411
|
+
if consecutive_non_delegation >= threshold:
|
|
412
|
+
return WatcherResult.nudge(
|
|
413
|
+
guidance=(
|
|
414
|
+
f"You've made {consecutive_non_delegation} consecutive direct tool calls "
|
|
415
|
+
"without delegating to an executor. Remember: as the orchestrator, your role "
|
|
416
|
+
"is to delegate coding work to executors, not do it yourself via bash. "
|
|
417
|
+
"Consider whether the work you're doing could be delegated to an executor "
|
|
418
|
+
"using delegate(). Executors can write code, run tests, and handle complex "
|
|
419
|
+
"file operations more effectively than direct bash commands."
|
|
420
|
+
),
|
|
421
|
+
reason=f"Consecutive non-delegation calls: {consecutive_non_delegation}",
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
return WatcherResult.ok()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zwarm
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.5
|
|
4
4
|
Summary: Multi-Agent CLI Orchestration Research Platform
|
|
5
5
|
Requires-Python: <3.14,>=3.13
|
|
6
6
|
Requires-Dist: python-dotenv>=1.0.0
|
|
@@ -136,12 +136,17 @@ state_dir: .zwarm # State directory for sessions/events
|
|
|
136
136
|
|
|
137
137
|
watchers:
|
|
138
138
|
enabled: true
|
|
139
|
+
message_role: user # Role for nudge messages: user | assistant | system
|
|
139
140
|
watchers:
|
|
140
141
|
- name: progress
|
|
141
142
|
- name: budget
|
|
142
143
|
config:
|
|
143
144
|
max_steps: 50
|
|
144
145
|
max_sessions: 10
|
|
146
|
+
- name: delegation_reminder
|
|
147
|
+
config:
|
|
148
|
+
threshold: 10 # Nudge after N consecutive non-delegation calls
|
|
149
|
+
lookback: 30 # How many messages to check
|
|
145
150
|
- name: scope
|
|
146
151
|
config:
|
|
147
152
|
keywords: []
|
|
@@ -217,28 +222,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
|
|
|
217
222
|
| `pattern` | Custom regex pattern matching |
|
|
218
223
|
| `quality` | Code quality checks |
|
|
219
224
|
| `delegation` | Ensures orchestrator delegates instead of writing code directly |
|
|
225
|
+
| `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
|
|
220
226
|
|
|
221
227
|
### Enabling Watchers
|
|
222
228
|
|
|
223
229
|
```yaml
|
|
224
230
|
# config.yaml
|
|
225
231
|
watchers:
|
|
226
|
-
enabled:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
-
|
|
239
|
-
|
|
232
|
+
enabled: true
|
|
233
|
+
message_role: user # How nudges appear: user | assistant | system
|
|
234
|
+
watchers:
|
|
235
|
+
- name: progress
|
|
236
|
+
config:
|
|
237
|
+
max_same_calls: 3 # Flag after 3 identical tool calls
|
|
238
|
+
- name: budget
|
|
239
|
+
config:
|
|
240
|
+
max_steps: 50
|
|
241
|
+
max_sessions: 10
|
|
242
|
+
- name: delegation_reminder
|
|
243
|
+
config:
|
|
244
|
+
threshold: 10 # Nudge after 10 non-delegation calls
|
|
245
|
+
- name: scope
|
|
246
|
+
config:
|
|
247
|
+
avoid_keywords:
|
|
248
|
+
- "refactor everything"
|
|
249
|
+
- "rewrite"
|
|
240
250
|
```
|
|
241
251
|
|
|
252
|
+
The `message_role` setting controls how watcher nudges are injected:
|
|
253
|
+
- `user` (default): Appears as a user message - strong nudge, agent must respond
|
|
254
|
+
- `assistant`: Appears as a previous assistant thought - softer, agent can continue
|
|
255
|
+
- `system`: Appears as system instruction - authoritative guidance
|
|
256
|
+
|
|
242
257
|
### Watcher Actions
|
|
243
258
|
|
|
244
259
|
Watchers can return different actions:
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
zwarm/__init__.py,sha256=3i3LMjHwIzE-LFIS2aUrwv3EZmpkvVMe-xj1h97rcSM,837
|
|
2
|
-
zwarm/orchestrator.py,sha256=
|
|
2
|
+
zwarm/orchestrator.py,sha256=38PKWwT79U0VY9aqZe5atf9C5EAFGgMXE7PXPX5QJ0w,21359
|
|
3
3
|
zwarm/test_orchestrator_watchers.py,sha256=QpoaehPU7ekT4XshbTOWnJ2H0wRveV3QOZjxbgyJJLY,807
|
|
4
4
|
zwarm/adapters/__init__.py,sha256=O0b-SfZpb6txeNqFkXZ2aaf34yLFYreznyrAV25jF_Q,656
|
|
5
5
|
zwarm/adapters/base.py,sha256=fZlQviTgVvOcwnxduTla6WuM6FzQJ_yoHMW5SxwVgQg,2527
|
|
6
6
|
zwarm/adapters/claude_code.py,sha256=vAjsjD-_JjARmC4_FBSILQZmQCBrk_oNHo18a9ubuqk,11481
|
|
7
|
-
zwarm/adapters/codex_mcp.py,sha256=
|
|
7
|
+
zwarm/adapters/codex_mcp.py,sha256=qX6blFC_rZwl3JaS9TLIG1yxIvWhgX42Goq2DdcJjbU,38474
|
|
8
8
|
zwarm/adapters/registry.py,sha256=EdyHECaNA5Kv1od64pYFBJyA_r_6I1r_eJTNP1XYLr4,1781
|
|
9
9
|
zwarm/adapters/test_codex_mcp.py,sha256=0qhVzxn_KF-XUS30gXSJKwMdR3kWGsDY9iPk1Ihqn3w,10698
|
|
10
10
|
zwarm/adapters/test_registry.py,sha256=otxcVDONwFCMisyANToF3iy7Y8dSbCL8bTmZNhxNuF4,2383
|
|
11
11
|
zwarm/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
zwarm/cli/main.py,sha256=
|
|
12
|
+
zwarm/cli/main.py,sha256=GstswIKfmwD1thNBfP0vOzKHaOq3PjRuu3PYe5rLVnc,59082
|
|
13
13
|
zwarm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
zwarm/core/compact.py,sha256=Y8C7Gs-5-WOU43WRvQ863Qzd5xtuEqR6Aw3r2p8_-i8,10907
|
|
15
|
-
zwarm/core/config.py,sha256=
|
|
15
|
+
zwarm/core/config.py,sha256=7mzxrWvHmTjwiUWAoE4NYS_1yWj85-vWkpT6X6kiMIg,11579
|
|
16
16
|
zwarm/core/environment.py,sha256=HVDpDZEpDSfyh9-wHZMzMKVUPKvioBkPVWeiME2JmFo,5435
|
|
17
17
|
zwarm/core/models.py,sha256=PrC3okRBVJxISUa1Fax4KkagqLT6Xub-kTxC9drN0sY,10083
|
|
18
18
|
zwarm/core/state.py,sha256=MzrvODKEiJovI7YI1jajW4uukineZ3ezmW5oQinMgjg,11563
|
|
@@ -20,16 +20,16 @@ zwarm/core/test_compact.py,sha256=WSdjCB5t4YMcknsrkmJIUsVOPY28s4y9GnDmu3Z4BFw,11
|
|
|
20
20
|
zwarm/core/test_config.py,sha256=26ozyiFOdjFF2c9Q-HDfFM6GOLfgw_5FZ55nTDMNYA8,4888
|
|
21
21
|
zwarm/core/test_models.py,sha256=sWTIhMZvuLP5AooGR6y8OR2EyWydqVfhmGrE7NPBBnk,8450
|
|
22
22
|
zwarm/prompts/__init__.py,sha256=FiaIOniLrIyfD3_osxT6I7FfyKjtctbf8jNs5QTPs_s,213
|
|
23
|
-
zwarm/prompts/orchestrator.py,sha256=
|
|
23
|
+
zwarm/prompts/orchestrator.py,sha256=af5547L2g6HwGz-PxlKCAXJzqXEdjcwSAIdVH7_4LSk,15412
|
|
24
24
|
zwarm/tools/__init__.py,sha256=FpqxwXJA6-fQ7C-oLj30jjK_0qqcE7MbI0dQuaB56kU,290
|
|
25
25
|
zwarm/tools/delegation.py,sha256=PeB0W7x2TcGCZ9rGBYDlIIFr7AT1TOdc1SLe7BKCUoM,15332
|
|
26
26
|
zwarm/watchers/__init__.py,sha256=yYGTbhuImQLESUdtfrYbHYBJNvCNX3B-Ei-vY5BizX8,760
|
|
27
27
|
zwarm/watchers/base.py,sha256=r1GoPlj06nOT2xp4fghfSjxbRyFFFQUB6HpZbEyO2OY,3834
|
|
28
|
-
zwarm/watchers/builtin.py,sha256=
|
|
28
|
+
zwarm/watchers/builtin.py,sha256=k1pCnQBEmLHeuCo8t6UXoenJUpfWY7AuGt_aEk8syew,15828
|
|
29
29
|
zwarm/watchers/manager.py,sha256=XZjBVeHjgCUlkTUeHqdvBvHoBC862U1ik0fG6nlRGog,5587
|
|
30
30
|
zwarm/watchers/registry.py,sha256=A9iBIVIFNtO7KPX0kLpUaP8dAK7ozqWLA44ocJGnOw4,1219
|
|
31
31
|
zwarm/watchers/test_watchers.py,sha256=zOsxumBqKfR5ZVGxrNlxz6KcWjkcdp0QhW9WB0_20zM,7855
|
|
32
|
-
zwarm-1.3.
|
|
33
|
-
zwarm-1.3.
|
|
34
|
-
zwarm-1.3.
|
|
35
|
-
zwarm-1.3.
|
|
32
|
+
zwarm-1.3.5.dist-info/METADATA,sha256=9boM7ysJGr-hfN_0YrNZfWD_Lwi8B9iF-BJWorrmNXs,16114
|
|
33
|
+
zwarm-1.3.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
34
|
+
zwarm-1.3.5.dist-info/entry_points.txt,sha256=u0OXq4q8d3yJ3EkUXwZfkS-Y8Lcy0F8cWrcQfoRxM6Q,46
|
|
35
|
+
zwarm-1.3.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|