zwarm 1.2.1__tar.gz → 1.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zwarm-1.2.1 → zwarm-1.3.3}/.gitignore +2 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/PKG-INFO +30 -15
- {zwarm-1.2.1 → zwarm-1.3.3}/README.md +29 -14
- {zwarm-1.2.1 → zwarm-1.3.3}/pyproject.toml +1 -1
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/codex_mcp.py +42 -2
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/cli/main.py +100 -5
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/config.py +9 -1
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/state.py +143 -12
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/orchestrator.py +51 -9
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/prompts/orchestrator.py +18 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/tools/delegation.py +37 -2
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/builtin.py +82 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/base.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/claude_code.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/registry.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/test_codex_mcp.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/test_registry.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/cli/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/compact.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/environment.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/models.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/test_compact.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/test_config.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/test_models.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/prompts/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/test_orchestrator_watchers.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/tools/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/__init__.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/base.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/manager.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/registry.py +0 -0
- {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/test_watchers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zwarm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.3
|
|
4
4
|
Summary: Multi-Agent CLI Orchestration Research Platform
|
|
5
5
|
Requires-Python: <3.14,>=3.13
|
|
6
6
|
Requires-Dist: python-dotenv>=1.0.0
|
|
@@ -136,12 +136,17 @@ state_dir: .zwarm # State directory for sessions/events
|
|
|
136
136
|
|
|
137
137
|
watchers:
|
|
138
138
|
enabled: true
|
|
139
|
+
message_role: user # Role for nudge messages: user | assistant | system
|
|
139
140
|
watchers:
|
|
140
141
|
- name: progress
|
|
141
142
|
- name: budget
|
|
142
143
|
config:
|
|
143
144
|
max_steps: 50
|
|
144
145
|
max_sessions: 10
|
|
146
|
+
- name: delegation_reminder
|
|
147
|
+
config:
|
|
148
|
+
threshold: 10 # Nudge after N consecutive non-delegation calls
|
|
149
|
+
lookback: 30 # How many messages to check
|
|
145
150
|
- name: scope
|
|
146
151
|
config:
|
|
147
152
|
keywords: []
|
|
@@ -217,28 +222,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
|
|
|
217
222
|
| `pattern` | Custom regex pattern matching |
|
|
218
223
|
| `quality` | Code quality checks |
|
|
219
224
|
| `delegation` | Ensures orchestrator delegates instead of writing code directly |
|
|
225
|
+
| `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
|
|
220
226
|
|
|
221
227
|
### Enabling Watchers
|
|
222
228
|
|
|
223
229
|
```yaml
|
|
224
230
|
# config.yaml
|
|
225
231
|
watchers:
|
|
226
|
-
enabled:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
-
|
|
239
|
-
|
|
232
|
+
enabled: true
|
|
233
|
+
message_role: user # How nudges appear: user | assistant | system
|
|
234
|
+
watchers:
|
|
235
|
+
- name: progress
|
|
236
|
+
config:
|
|
237
|
+
max_same_calls: 3 # Flag after 3 identical tool calls
|
|
238
|
+
- name: budget
|
|
239
|
+
config:
|
|
240
|
+
max_steps: 50
|
|
241
|
+
max_sessions: 10
|
|
242
|
+
- name: delegation_reminder
|
|
243
|
+
config:
|
|
244
|
+
threshold: 10 # Nudge after 10 non-delegation calls
|
|
245
|
+
- name: scope
|
|
246
|
+
config:
|
|
247
|
+
avoid_keywords:
|
|
248
|
+
- "refactor everything"
|
|
249
|
+
- "rewrite"
|
|
240
250
|
```
|
|
241
251
|
|
|
252
|
+
The `message_role` setting controls how watcher nudges are injected:
|
|
253
|
+
- `user` (default): Appears as a user message - strong nudge, agent must respond
|
|
254
|
+
- `assistant`: Appears as a previous assistant thought - softer, agent can continue
|
|
255
|
+
- `system`: Appears as system instruction - authoritative guidance
|
|
256
|
+
|
|
242
257
|
### Watcher Actions
|
|
243
258
|
|
|
244
259
|
Watchers can return different actions:
|
|
@@ -124,12 +124,17 @@ state_dir: .zwarm # State directory for sessions/events
|
|
|
124
124
|
|
|
125
125
|
watchers:
|
|
126
126
|
enabled: true
|
|
127
|
+
message_role: user # Role for nudge messages: user | assistant | system
|
|
127
128
|
watchers:
|
|
128
129
|
- name: progress
|
|
129
130
|
- name: budget
|
|
130
131
|
config:
|
|
131
132
|
max_steps: 50
|
|
132
133
|
max_sessions: 10
|
|
134
|
+
- name: delegation_reminder
|
|
135
|
+
config:
|
|
136
|
+
threshold: 10 # Nudge after N consecutive non-delegation calls
|
|
137
|
+
lookback: 30 # How many messages to check
|
|
133
138
|
- name: scope
|
|
134
139
|
config:
|
|
135
140
|
keywords: []
|
|
@@ -205,28 +210,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
|
|
|
205
210
|
| `pattern` | Custom regex pattern matching |
|
|
206
211
|
| `quality` | Code quality checks |
|
|
207
212
|
| `delegation` | Ensures orchestrator delegates instead of writing code directly |
|
|
213
|
+
| `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
|
|
208
214
|
|
|
209
215
|
### Enabling Watchers
|
|
210
216
|
|
|
211
217
|
```yaml
|
|
212
218
|
# config.yaml
|
|
213
219
|
watchers:
|
|
214
|
-
enabled:
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
-
|
|
227
|
-
|
|
220
|
+
enabled: true
|
|
221
|
+
message_role: user # How nudges appear: user | assistant | system
|
|
222
|
+
watchers:
|
|
223
|
+
- name: progress
|
|
224
|
+
config:
|
|
225
|
+
max_same_calls: 3 # Flag after 3 identical tool calls
|
|
226
|
+
- name: budget
|
|
227
|
+
config:
|
|
228
|
+
max_steps: 50
|
|
229
|
+
max_sessions: 10
|
|
230
|
+
- name: delegation_reminder
|
|
231
|
+
config:
|
|
232
|
+
threshold: 10 # Nudge after 10 non-delegation calls
|
|
233
|
+
- name: scope
|
|
234
|
+
config:
|
|
235
|
+
avoid_keywords:
|
|
236
|
+
- "refactor everything"
|
|
237
|
+
- "rewrite"
|
|
228
238
|
```
|
|
229
239
|
|
|
240
|
+
The `message_role` setting controls how watcher nudges are injected:
|
|
241
|
+
- `user` (default): Appears as a user message - strong nudge, agent must respond
|
|
242
|
+
- `assistant`: Appears as a previous assistant thought - softer, agent can continue
|
|
243
|
+
- `system`: Appears as system instruction - authoritative guidance
|
|
244
|
+
|
|
230
245
|
### Watcher Actions
|
|
231
246
|
|
|
232
247
|
Watchers can return different actions:
|
|
@@ -549,20 +549,33 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
549
549
|
"""
|
|
550
550
|
client = self._ensure_client()
|
|
551
551
|
|
|
552
|
+
logger.debug(f"Calling codex-reply with conversation_id={conversation_id}")
|
|
553
|
+
|
|
552
554
|
result = client.call_tool("codex-reply", {
|
|
553
555
|
"conversationId": conversation_id,
|
|
554
556
|
"prompt": message,
|
|
555
557
|
})
|
|
556
558
|
|
|
559
|
+
# Check for conversation loss - MCP returns empty result when session not found
|
|
560
|
+
if not result.get("messages") and not result.get("output"):
|
|
561
|
+
logger.error(
|
|
562
|
+
f"codex-reply returned empty result for conversation_id={conversation_id}. "
|
|
563
|
+
f"The MCP server may have lost the conversation state. Result: {result}"
|
|
564
|
+
)
|
|
565
|
+
|
|
557
566
|
# Track usage
|
|
558
567
|
usage = result.get("usage", {})
|
|
559
568
|
self._accumulate_usage(usage)
|
|
560
569
|
|
|
570
|
+
response = self._extract_response(result)
|
|
571
|
+
logger.debug(f"codex-reply response length: {len(response)} chars")
|
|
572
|
+
|
|
561
573
|
return {
|
|
562
|
-
"response":
|
|
574
|
+
"response": response,
|
|
563
575
|
"raw_messages": result.get("messages", []),
|
|
564
576
|
"usage": usage,
|
|
565
577
|
"total_usage": self.total_usage,
|
|
578
|
+
"conversation_lost": not result.get("messages") and not result.get("output"),
|
|
566
579
|
}
|
|
567
580
|
|
|
568
581
|
@weave.op()
|
|
@@ -598,6 +611,13 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
598
611
|
session.conversation_id = result["conversation_id"]
|
|
599
612
|
if session.conversation_id:
|
|
600
613
|
self._sessions[session.id] = session.conversation_id
|
|
614
|
+
logger.debug(f"Session {session.id[:8]} mapped to conversation {session.conversation_id}")
|
|
615
|
+
else:
|
|
616
|
+
# This is bad - we won't be able to continue this conversation
|
|
617
|
+
logger.warning(
|
|
618
|
+
f"Session {session.id[:8]} started but MCP didn't return a conversation ID. "
|
|
619
|
+
"Further converse() calls will fail."
|
|
620
|
+
)
|
|
601
621
|
|
|
602
622
|
session.add_message("user", task)
|
|
603
623
|
session.add_message("assistant", result["response"])
|
|
@@ -652,6 +672,16 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
652
672
|
)
|
|
653
673
|
|
|
654
674
|
response_text = result["response"]
|
|
675
|
+
|
|
676
|
+
# Check if conversation was lost
|
|
677
|
+
if result.get("conversation_lost"):
|
|
678
|
+
logger.warning(
|
|
679
|
+
f"Conversation {session.conversation_id} was lost. "
|
|
680
|
+
f"Session {session.id} will be marked as needing re-delegation."
|
|
681
|
+
)
|
|
682
|
+
# Mark the session as having a lost conversation so orchestrator can handle it
|
|
683
|
+
session.conversation_id = None # Clear the stale ID
|
|
684
|
+
|
|
655
685
|
session.add_message("user", message)
|
|
656
686
|
session.add_message("assistant", response_text)
|
|
657
687
|
|
|
@@ -797,6 +827,15 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
797
827
|
|
|
798
828
|
def _extract_response(self, result: dict) -> str:
|
|
799
829
|
"""Extract response text from MCP result."""
|
|
830
|
+
# Check for error indicators - empty result suggests lost conversation
|
|
831
|
+
if (
|
|
832
|
+
result.get("conversationId") is None
|
|
833
|
+
and not result.get("messages")
|
|
834
|
+
and not result.get("output")
|
|
835
|
+
):
|
|
836
|
+
logger.warning(f"MCP returned empty result - conversation may be lost: {result}")
|
|
837
|
+
return "[ERROR] Conversation lost - the MCP server no longer has this session. Please re-delegate the task."
|
|
838
|
+
|
|
800
839
|
# First check for our collected output
|
|
801
840
|
if result.get("output"):
|
|
802
841
|
return result["output"]
|
|
@@ -823,5 +862,6 @@ class CodexMCPAdapter(ExecutorAdapter):
|
|
|
823
862
|
if "text" in result:
|
|
824
863
|
return result["text"]
|
|
825
864
|
|
|
826
|
-
# Fallback: stringify the result
|
|
865
|
+
# Fallback: stringify the result (but log it as unexpected)
|
|
866
|
+
logger.warning(f"Unexpected MCP result format, returning raw: {list(result.keys())}")
|
|
827
867
|
return json.dumps(result, indent=2)
|
|
@@ -141,6 +141,8 @@ def orchestrate(
|
|
|
141
141
|
resume: Annotated[bool, typer.Option("--resume", help="Resume from previous state")] = False,
|
|
142
142
|
max_steps: Annotated[Optional[int], typer.Option("--max-steps", help="Maximum orchestrator steps")] = None,
|
|
143
143
|
verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Show detailed output")] = False,
|
|
144
|
+
instance: Annotated[Optional[str], typer.Option("--instance", "-i", help="Instance ID (for isolation/resume)")] = None,
|
|
145
|
+
instance_name: Annotated[Optional[str], typer.Option("--name", "-n", help="Human-readable instance name")] = None,
|
|
144
146
|
):
|
|
145
147
|
"""
|
|
146
148
|
Start an orchestrator session.
|
|
@@ -149,6 +151,9 @@ def orchestrate(
|
|
|
149
151
|
(Codex, Claude Code). It can have sync conversations or fire-and-forget
|
|
150
152
|
async delegations.
|
|
151
153
|
|
|
154
|
+
Each run creates an isolated instance to prevent conflicts when running
|
|
155
|
+
multiple orchestrators in the same directory.
|
|
156
|
+
|
|
152
157
|
[bold]Examples:[/]
|
|
153
158
|
[dim]# Simple task[/]
|
|
154
159
|
$ zwarm orchestrate --task "Add a logout button to the navbar"
|
|
@@ -166,8 +171,14 @@ def orchestrate(
|
|
|
166
171
|
[dim]# Override settings[/]
|
|
167
172
|
$ zwarm orchestrate --task "Fix bug" --set executor.adapter=claude_code
|
|
168
173
|
|
|
169
|
-
[dim]#
|
|
170
|
-
$ zwarm orchestrate --task "
|
|
174
|
+
[dim]# Named instance (easier to track)[/]
|
|
175
|
+
$ zwarm orchestrate --task "Add tests" --name test-work
|
|
176
|
+
|
|
177
|
+
[dim]# Resume a specific instance[/]
|
|
178
|
+
$ zwarm orchestrate --resume --instance abc123
|
|
179
|
+
|
|
180
|
+
[dim]# List all instances[/]
|
|
181
|
+
$ zwarm instances
|
|
171
182
|
"""
|
|
172
183
|
from zwarm.orchestrator import build_orchestrator
|
|
173
184
|
|
|
@@ -187,6 +198,8 @@ def orchestrate(
|
|
|
187
198
|
console.print(f"[bold]Starting orchestrator...[/]")
|
|
188
199
|
console.print(f" Task: {task}")
|
|
189
200
|
console.print(f" Working dir: {working_dir.absolute()}")
|
|
201
|
+
if instance:
|
|
202
|
+
console.print(f" Instance: {instance}" + (f" ({instance_name})" if instance_name else ""))
|
|
190
203
|
console.print()
|
|
191
204
|
|
|
192
205
|
# Output handler to show orchestrator messages
|
|
@@ -203,11 +216,17 @@ def orchestrate(
|
|
|
203
216
|
overrides=override_list,
|
|
204
217
|
resume=resume,
|
|
205
218
|
output_handler=output_handler,
|
|
219
|
+
instance_id=instance,
|
|
220
|
+
instance_name=instance_name,
|
|
206
221
|
)
|
|
207
222
|
|
|
208
223
|
if resume:
|
|
209
224
|
console.print(" [dim]Resuming from previous state...[/]")
|
|
210
225
|
|
|
226
|
+
# Show instance ID if auto-generated
|
|
227
|
+
if orchestrator.instance_id and not instance:
|
|
228
|
+
console.print(f" [dim]Instance: {orchestrator.instance_id[:8]}[/]")
|
|
229
|
+
|
|
211
230
|
# Run the orchestrator loop
|
|
212
231
|
console.print("[bold]--- Orchestrator running ---[/]\n")
|
|
213
232
|
result = orchestrator.run(task=task)
|
|
@@ -223,16 +242,35 @@ def orchestrate(
|
|
|
223
242
|
# Save state for potential resume
|
|
224
243
|
orchestrator.save_state()
|
|
225
244
|
|
|
245
|
+
# Update instance status
|
|
246
|
+
if orchestrator.instance_id:
|
|
247
|
+
from zwarm.core.state import update_instance_status
|
|
248
|
+
update_instance_status(
|
|
249
|
+
orchestrator.instance_id,
|
|
250
|
+
"completed",
|
|
251
|
+
working_dir / ".zwarm",
|
|
252
|
+
)
|
|
253
|
+
console.print(f" [dim]Instance {orchestrator.instance_id[:8]} marked completed[/]")
|
|
254
|
+
|
|
226
255
|
except KeyboardInterrupt:
|
|
227
256
|
console.print("\n\n[yellow]Interrupted.[/]")
|
|
228
257
|
if orchestrator:
|
|
229
258
|
orchestrator.save_state()
|
|
230
259
|
console.print("[dim]State saved. Use --resume to continue.[/]")
|
|
260
|
+
# Keep instance as "active" so it can be resumed
|
|
231
261
|
sys.exit(1)
|
|
232
262
|
except Exception as e:
|
|
233
263
|
console.print(f"\n[red]Error:[/] {e}")
|
|
234
264
|
if verbose:
|
|
235
265
|
console.print_exception()
|
|
266
|
+
# Update instance status to failed
|
|
267
|
+
if orchestrator and orchestrator.instance_id:
|
|
268
|
+
from zwarm.core.state import update_instance_status
|
|
269
|
+
update_instance_status(
|
|
270
|
+
orchestrator.instance_id,
|
|
271
|
+
"failed",
|
|
272
|
+
working_dir / ".zwarm",
|
|
273
|
+
)
|
|
236
274
|
sys.exit(1)
|
|
237
275
|
|
|
238
276
|
|
|
@@ -384,6 +422,63 @@ def status(
|
|
|
384
422
|
console.print(" [dim](none)[/]")
|
|
385
423
|
|
|
386
424
|
|
|
425
|
+
@app.command()
|
|
426
|
+
def instances(
|
|
427
|
+
working_dir: Annotated[Path, typer.Option("--working-dir", "-w", help="Working directory")] = Path("."),
|
|
428
|
+
all_instances: Annotated[bool, typer.Option("--all", "-a", help="Show all instances (including completed)")] = False,
|
|
429
|
+
):
|
|
430
|
+
"""
|
|
431
|
+
List all orchestrator instances.
|
|
432
|
+
|
|
433
|
+
Shows instances that have been run in this directory. Use --all to include
|
|
434
|
+
completed instances.
|
|
435
|
+
|
|
436
|
+
[bold]Examples:[/]
|
|
437
|
+
[dim]# List active instances[/]
|
|
438
|
+
$ zwarm instances
|
|
439
|
+
|
|
440
|
+
[dim]# List all instances[/]
|
|
441
|
+
$ zwarm instances --all
|
|
442
|
+
"""
|
|
443
|
+
from zwarm.core.state import list_instances as get_instances
|
|
444
|
+
|
|
445
|
+
state_dir = working_dir / ".zwarm"
|
|
446
|
+
all_inst = get_instances(state_dir)
|
|
447
|
+
|
|
448
|
+
if not all_inst:
|
|
449
|
+
console.print("[dim]No instances found.[/]")
|
|
450
|
+
console.print("[dim]Run 'zwarm orchestrate' to start a new instance.[/]")
|
|
451
|
+
return
|
|
452
|
+
|
|
453
|
+
# Filter if not showing all
|
|
454
|
+
if not all_instances:
|
|
455
|
+
all_inst = [i for i in all_inst if i.get("status") == "active"]
|
|
456
|
+
|
|
457
|
+
if not all_inst:
|
|
458
|
+
console.print("[dim]No active instances. Use --all to see completed ones.[/]")
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
console.print(f"[bold]Instances[/] ({len(all_inst)} total)\n")
|
|
462
|
+
|
|
463
|
+
for inst in all_inst:
|
|
464
|
+
status = inst.get("status", "unknown")
|
|
465
|
+
status_icon = {"active": "[green]●[/]", "completed": "[dim]✓[/]", "failed": "[red]✗[/]"}.get(status, "[dim]?[/]")
|
|
466
|
+
|
|
467
|
+
inst_id = inst.get("id", "unknown")[:8]
|
|
468
|
+
name = inst.get("name", "")
|
|
469
|
+
task = (inst.get("task") or "")[:60]
|
|
470
|
+
updated = inst.get("updated_at", "")[:19] if inst.get("updated_at") else ""
|
|
471
|
+
|
|
472
|
+
console.print(f" {status_icon} [bold]{inst_id}[/]" + (f" ({name})" if name and name != inst_id else ""))
|
|
473
|
+
if task:
|
|
474
|
+
console.print(f" [dim]{task}[/]")
|
|
475
|
+
if updated:
|
|
476
|
+
console.print(f" [dim]Updated: {updated}[/]")
|
|
477
|
+
console.print()
|
|
478
|
+
|
|
479
|
+
console.print("[dim]Use --instance <id> with 'orchestrate --resume' to resume an instance.[/]")
|
|
480
|
+
|
|
481
|
+
|
|
387
482
|
@app.command()
|
|
388
483
|
def history(
|
|
389
484
|
working_dir: Annotated[Path, typer.Option("--working-dir", "-w", help="Working directory")] = Path("."),
|
|
@@ -577,7 +672,7 @@ def init(
|
|
|
577
672
|
# Gather settings
|
|
578
673
|
weave_project = ""
|
|
579
674
|
adapter = "codex_mcp"
|
|
580
|
-
watchers_enabled = ["progress", "budget", "delegation"]
|
|
675
|
+
watchers_enabled = ["progress", "budget", "delegation", "delegation_reminder"]
|
|
581
676
|
create_project_config = with_project
|
|
582
677
|
project_description = ""
|
|
583
678
|
project_context = ""
|
|
@@ -601,10 +696,10 @@ def init(
|
|
|
601
696
|
|
|
602
697
|
# Watchers
|
|
603
698
|
console.print("\n [bold]Watchers[/] (trajectory aligners)")
|
|
604
|
-
available_watchers = ["progress", "budget", "delegation", "scope", "pattern", "quality"]
|
|
699
|
+
available_watchers = ["progress", "budget", "delegation", "delegation_reminder", "scope", "pattern", "quality"]
|
|
605
700
|
watchers_enabled = []
|
|
606
701
|
for w in available_watchers:
|
|
607
|
-
default = w in ["progress", "budget", "delegation"]
|
|
702
|
+
default = w in ["progress", "budget", "delegation", "delegation_reminder"]
|
|
608
703
|
if typer.confirm(f" Enable {w}?", default=default):
|
|
609
704
|
watchers_enabled.append(w)
|
|
610
705
|
|
|
@@ -86,7 +86,13 @@ class WatchersConfig:
|
|
|
86
86
|
watchers: list[WatcherConfigItem] = field(default_factory=lambda: [
|
|
87
87
|
WatcherConfigItem(name="progress"),
|
|
88
88
|
WatcherConfigItem(name="budget"),
|
|
89
|
+
WatcherConfigItem(name="delegation_reminder"),
|
|
89
90
|
])
|
|
91
|
+
# Role for watcher nudge messages: "user" | "assistant" | "system"
|
|
92
|
+
# "user" (default) - Appears as if user sent the message, strong nudge
|
|
93
|
+
# "assistant" - Appears as previous assistant thought, softer nudge
|
|
94
|
+
# "system" - Appears as system instruction, authoritative
|
|
95
|
+
message_role: str = "user"
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
@dataclass
|
|
@@ -122,13 +128,14 @@ class ZwarmConfig:
|
|
|
122
128
|
],
|
|
123
129
|
)
|
|
124
130
|
else:
|
|
125
|
-
# Full format: watchers: {enabled: true, watchers: [...]}
|
|
131
|
+
# Full format: watchers: {enabled: true, watchers: [...], message_role: "user"}
|
|
126
132
|
watchers_config = WatchersConfig(
|
|
127
133
|
enabled=watchers_data.get("enabled", True),
|
|
128
134
|
watchers=[
|
|
129
135
|
WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
|
|
130
136
|
for w in watchers_data.get("watchers", [])
|
|
131
137
|
] or WatchersConfig().watchers,
|
|
138
|
+
message_role=watchers_data.get("message_role", "user"),
|
|
132
139
|
)
|
|
133
140
|
|
|
134
141
|
# Build orchestrator config with nested compaction
|
|
@@ -180,6 +187,7 @@ class ZwarmConfig:
|
|
|
180
187
|
{"name": w.name, "enabled": w.enabled, "config": w.config}
|
|
181
188
|
for w in self.watchers.watchers
|
|
182
189
|
],
|
|
190
|
+
"message_role": self.watchers.message_role,
|
|
183
191
|
},
|
|
184
192
|
"state_dir": self.state_dir,
|
|
185
193
|
}
|
|
@@ -1,16 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Flat-file state management for zwarm.
|
|
3
3
|
|
|
4
|
-
State structure:
|
|
4
|
+
State structure (with instance isolation):
|
|
5
5
|
.zwarm/
|
|
6
|
-
├──
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
└──
|
|
13
|
-
|
|
6
|
+
├── instances.json # Registry of all instances
|
|
7
|
+
└── instances/
|
|
8
|
+
└── <instance-id>/
|
|
9
|
+
├── state.json # Current state (sessions, tasks)
|
|
10
|
+
├── events.jsonl # Append-only event log
|
|
11
|
+
├── sessions/
|
|
12
|
+
│ └── <session-id>/
|
|
13
|
+
│ ├── messages.json
|
|
14
|
+
│ └── output.log
|
|
15
|
+
└── orchestrator/
|
|
16
|
+
└── messages.json # Orchestrator's message history (for resume)
|
|
17
|
+
|
|
18
|
+
Legacy structure (single instance, for backwards compat):
|
|
19
|
+
.zwarm/
|
|
20
|
+
├── state.json
|
|
21
|
+
├── events.jsonl
|
|
22
|
+
└── ...
|
|
14
23
|
"""
|
|
15
24
|
|
|
16
25
|
from __future__ import annotations
|
|
@@ -19,10 +28,116 @@ import json
|
|
|
19
28
|
from datetime import datetime
|
|
20
29
|
from pathlib import Path
|
|
21
30
|
from typing import Any
|
|
31
|
+
from uuid import uuid4
|
|
22
32
|
|
|
23
33
|
from .models import ConversationSession, Event, Task
|
|
24
34
|
|
|
25
35
|
|
|
36
|
+
# --- Instance Registry ---
|
|
37
|
+
|
|
38
|
+
def get_instances_registry_path(base_dir: Path | str = ".zwarm") -> Path:
|
|
39
|
+
"""Get path to the instances registry file."""
|
|
40
|
+
return Path(base_dir) / "instances.json"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def list_instances(base_dir: Path | str = ".zwarm") -> list[dict[str, Any]]:
|
|
44
|
+
"""List all registered instances."""
|
|
45
|
+
registry_path = get_instances_registry_path(base_dir)
|
|
46
|
+
if not registry_path.exists():
|
|
47
|
+
return []
|
|
48
|
+
try:
|
|
49
|
+
return json.loads(registry_path.read_text()).get("instances", [])
|
|
50
|
+
except (json.JSONDecodeError, KeyError):
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def register_instance(
|
|
55
|
+
instance_id: str,
|
|
56
|
+
name: str | None = None,
|
|
57
|
+
task: str | None = None,
|
|
58
|
+
base_dir: Path | str = ".zwarm",
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Register an instance in the global registry."""
|
|
61
|
+
base = Path(base_dir)
|
|
62
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
registry_path = get_instances_registry_path(base_dir)
|
|
65
|
+
|
|
66
|
+
# Load existing registry
|
|
67
|
+
if registry_path.exists():
|
|
68
|
+
try:
|
|
69
|
+
registry = json.loads(registry_path.read_text())
|
|
70
|
+
except json.JSONDecodeError:
|
|
71
|
+
registry = {"instances": []}
|
|
72
|
+
else:
|
|
73
|
+
registry = {"instances": []}
|
|
74
|
+
|
|
75
|
+
# Check if instance already registered
|
|
76
|
+
existing_ids = {inst["id"] for inst in registry["instances"]}
|
|
77
|
+
if instance_id in existing_ids:
|
|
78
|
+
# Update existing entry
|
|
79
|
+
for inst in registry["instances"]:
|
|
80
|
+
if inst["id"] == instance_id:
|
|
81
|
+
inst["updated_at"] = datetime.now().isoformat()
|
|
82
|
+
inst["status"] = "active"
|
|
83
|
+
if name:
|
|
84
|
+
inst["name"] = name
|
|
85
|
+
if task:
|
|
86
|
+
inst["task"] = task[:100] # Truncate
|
|
87
|
+
break
|
|
88
|
+
else:
|
|
89
|
+
# Add new entry
|
|
90
|
+
registry["instances"].append({
|
|
91
|
+
"id": instance_id,
|
|
92
|
+
"name": name or instance_id[:8],
|
|
93
|
+
"task": (task[:100] if task else None),
|
|
94
|
+
"created_at": datetime.now().isoformat(),
|
|
95
|
+
"updated_at": datetime.now().isoformat(),
|
|
96
|
+
"status": "active",
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
registry_path.write_text(json.dumps(registry, indent=2))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def update_instance_status(
|
|
103
|
+
instance_id: str,
|
|
104
|
+
status: str,
|
|
105
|
+
base_dir: Path | str = ".zwarm",
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Update an instance's status in the registry."""
|
|
108
|
+
registry_path = get_instances_registry_path(base_dir)
|
|
109
|
+
if not registry_path.exists():
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
registry = json.loads(registry_path.read_text())
|
|
114
|
+
except json.JSONDecodeError:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
for inst in registry.get("instances", []):
|
|
118
|
+
if inst["id"] == instance_id:
|
|
119
|
+
inst["status"] = status
|
|
120
|
+
inst["updated_at"] = datetime.now().isoformat()
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
registry_path.write_text(json.dumps(registry, indent=2))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_instance_state_dir(
|
|
127
|
+
instance_id: str | None = None,
|
|
128
|
+
base_dir: Path | str = ".zwarm",
|
|
129
|
+
) -> Path:
|
|
130
|
+
"""
|
|
131
|
+
Get the state directory for an instance.
|
|
132
|
+
|
|
133
|
+
If instance_id is None, returns the legacy path for backwards compat.
|
|
134
|
+
"""
|
|
135
|
+
base = Path(base_dir)
|
|
136
|
+
if instance_id is None:
|
|
137
|
+
return base # Legacy: .zwarm/
|
|
138
|
+
return base / "instances" / instance_id
|
|
139
|
+
|
|
140
|
+
|
|
26
141
|
def _json_serializer(obj: Any) -> Any:
|
|
27
142
|
"""Custom JSON serializer for non-standard types."""
|
|
28
143
|
# Handle pydantic models
|
|
@@ -42,15 +157,31 @@ class StateManager:
|
|
|
42
157
|
"""
|
|
43
158
|
Manages flat-file state for zwarm.
|
|
44
159
|
|
|
45
|
-
All state is stored as JSON files in a directory
|
|
160
|
+
All state is stored as JSON files in a directory.
|
|
161
|
+
With instance isolation: .zwarm/instances/<instance-id>/
|
|
162
|
+
Legacy (no instance): .zwarm/
|
|
163
|
+
|
|
46
164
|
This enables:
|
|
47
165
|
- Git-backed history
|
|
48
166
|
- Easy debugging (just read the files)
|
|
49
167
|
- Resume from previous state
|
|
168
|
+
- Multiple concurrent orchestrators (with instance isolation)
|
|
50
169
|
"""
|
|
51
170
|
|
|
52
|
-
def __init__(
|
|
53
|
-
self
|
|
171
|
+
def __init__(
|
|
172
|
+
self,
|
|
173
|
+
state_dir: Path | str = ".zwarm",
|
|
174
|
+
instance_id: str | None = None,
|
|
175
|
+
):
|
|
176
|
+
self.base_dir = Path(state_dir)
|
|
177
|
+
self.instance_id = instance_id
|
|
178
|
+
|
|
179
|
+
# Resolve actual state directory
|
|
180
|
+
if instance_id:
|
|
181
|
+
self.state_dir = get_instance_state_dir(instance_id, self.base_dir)
|
|
182
|
+
else:
|
|
183
|
+
self.state_dir = self.base_dir
|
|
184
|
+
|
|
54
185
|
self._sessions: dict[str, ConversationSession] = {}
|
|
55
186
|
self._tasks: dict[str, Task] = {}
|
|
56
187
|
self._orchestrator_messages: list[dict[str, Any]] = []
|
|
@@ -52,6 +52,10 @@ class Orchestrator(YamlAgent):
|
|
|
52
52
|
config: ZwarmConfig = Field(default_factory=ZwarmConfig)
|
|
53
53
|
working_dir: Path = Field(default_factory=Path.cwd)
|
|
54
54
|
|
|
55
|
+
# Instance identification (for multi-orchestrator isolation)
|
|
56
|
+
instance_id: str | None = Field(default=None)
|
|
57
|
+
instance_name: str | None = Field(default=None)
|
|
58
|
+
|
|
55
59
|
# Load tools from modules (delegation + bash for verification)
|
|
56
60
|
agent_tool_modules: list[str] = Field(
|
|
57
61
|
default=[
|
|
@@ -77,11 +81,25 @@ class Orchestrator(YamlAgent):
|
|
|
77
81
|
"""Initialize state and adapters after model creation."""
|
|
78
82
|
super().model_post_init(__context)
|
|
79
83
|
|
|
80
|
-
# Initialize state manager
|
|
81
|
-
|
|
84
|
+
# Initialize state manager with instance isolation
|
|
85
|
+
base_state_dir = self.working_dir / self.config.state_dir
|
|
86
|
+
self._state = StateManager(
|
|
87
|
+
state_dir=base_state_dir,
|
|
88
|
+
instance_id=self.instance_id,
|
|
89
|
+
)
|
|
82
90
|
self._state.init()
|
|
83
91
|
self._state.load()
|
|
84
92
|
|
|
93
|
+
# Register instance if using instance isolation
|
|
94
|
+
if self.instance_id:
|
|
95
|
+
from zwarm.core.state import register_instance
|
|
96
|
+
register_instance(
|
|
97
|
+
instance_id=self.instance_id,
|
|
98
|
+
name=self.instance_name,
|
|
99
|
+
task=None, # Will be updated when task is set
|
|
100
|
+
base_dir=base_state_dir,
|
|
101
|
+
)
|
|
102
|
+
|
|
85
103
|
# Load existing sessions
|
|
86
104
|
for session in self._state.list_sessions():
|
|
87
105
|
self._sessions[session.id] = session
|
|
@@ -215,12 +233,18 @@ class Orchestrator(YamlAgent):
|
|
|
215
233
|
if not self._resumed:
|
|
216
234
|
return
|
|
217
235
|
|
|
218
|
-
# Build list of old sessions
|
|
236
|
+
# Build list of old sessions and INVALIDATE their conversation IDs
|
|
237
|
+
# The MCP server was restarted, so all conversation IDs are now stale
|
|
219
238
|
old_sessions = []
|
|
239
|
+
invalidated_count = 0
|
|
220
240
|
for sid, session in self._sessions.items():
|
|
221
241
|
old_sessions.append(
|
|
222
242
|
f" - {sid[:8]}... ({session.adapter}, {session.status.value})"
|
|
223
243
|
)
|
|
244
|
+
# Clear stale conversation_id to prevent converse() from trying to use it
|
|
245
|
+
if session.conversation_id:
|
|
246
|
+
session.conversation_id = None
|
|
247
|
+
invalidated_count += 1
|
|
224
248
|
|
|
225
249
|
session_info = "\n".join(old_sessions) if old_sessions else " (none)"
|
|
226
250
|
|
|
@@ -228,14 +252,14 @@ class Orchestrator(YamlAgent):
|
|
|
228
252
|
"role": "user",
|
|
229
253
|
"content": f"""[SYSTEM NOTICE] You have been resumed from a previous session.
|
|
230
254
|
|
|
231
|
-
|
|
255
|
+
CRITICAL: Your previous executor sessions are NO LONGER USABLE. The MCP server was restarted, so all conversation state was lost. {invalidated_count} conversation ID(s) have been invalidated.
|
|
232
256
|
|
|
233
|
-
Previous sessions (
|
|
257
|
+
Previous sessions (conversation IDs cleared):
|
|
234
258
|
{session_info}
|
|
235
259
|
|
|
236
|
-
You
|
|
260
|
+
You MUST start NEW sessions with delegate() to continue any work. The converse() tool will fail on these old sessions because they have no active conversation.
|
|
237
261
|
|
|
238
|
-
|
|
262
|
+
Review what was accomplished in the previous session and delegate new tasks as needed.""",
|
|
239
263
|
}
|
|
240
264
|
|
|
241
265
|
self.messages.append(resume_msg)
|
|
@@ -328,10 +352,15 @@ Continue with your task from where you left off.""",
|
|
|
328
352
|
|
|
329
353
|
# Handle watcher result
|
|
330
354
|
if result.action == WatcherAction.NUDGE and result.guidance:
|
|
331
|
-
# Inject guidance as a
|
|
355
|
+
# Inject guidance as a message with configurable role
|
|
356
|
+
message_role = self.config.watchers.message_role
|
|
357
|
+
# Validate role (default to user if invalid)
|
|
358
|
+
if message_role not in ("user", "assistant", "system"):
|
|
359
|
+
message_role = "user"
|
|
360
|
+
|
|
332
361
|
self.messages.append(
|
|
333
362
|
{
|
|
334
|
-
"role":
|
|
363
|
+
"role": message_role,
|
|
335
364
|
"content": f"[WATCHER: {result.metadata.get('triggered_by', 'unknown')}] {result.guidance}",
|
|
336
365
|
}
|
|
337
366
|
)
|
|
@@ -521,6 +550,8 @@ def build_orchestrator(
|
|
|
521
550
|
overrides: list[str] | None = None,
|
|
522
551
|
resume: bool = False,
|
|
523
552
|
output_handler: Callable[[str], None] | None = None,
|
|
553
|
+
instance_id: str | None = None,
|
|
554
|
+
instance_name: str | None = None,
|
|
524
555
|
) -> Orchestrator:
|
|
525
556
|
"""
|
|
526
557
|
Build an orchestrator from configuration.
|
|
@@ -532,10 +563,14 @@ def build_orchestrator(
|
|
|
532
563
|
overrides: CLI overrides (--set key=value)
|
|
533
564
|
resume: Whether to resume from previous state
|
|
534
565
|
output_handler: Function to handle orchestrator output
|
|
566
|
+
instance_id: Unique ID for this instance (enables multi-orchestrator isolation)
|
|
567
|
+
instance_name: Human-readable name for this instance
|
|
535
568
|
|
|
536
569
|
Returns:
|
|
537
570
|
Configured Orchestrator instance
|
|
538
571
|
"""
|
|
572
|
+
from uuid import uuid4
|
|
573
|
+
|
|
539
574
|
# Load configuration
|
|
540
575
|
config = load_config(
|
|
541
576
|
config_path=config_path,
|
|
@@ -545,6 +580,11 @@ def build_orchestrator(
|
|
|
545
580
|
# Resolve working directory
|
|
546
581
|
working_dir = working_dir or Path.cwd()
|
|
547
582
|
|
|
583
|
+
# Generate instance ID if not provided (enables isolation by default for new runs)
|
|
584
|
+
# For resume, instance_id should be provided explicitly
|
|
585
|
+
if instance_id is None and not resume:
|
|
586
|
+
instance_id = str(uuid4())
|
|
587
|
+
|
|
548
588
|
# Build system prompt
|
|
549
589
|
system_prompt = _build_system_prompt(config, working_dir)
|
|
550
590
|
|
|
@@ -565,6 +605,8 @@ def build_orchestrator(
|
|
|
565
605
|
system_prompt=system_prompt,
|
|
566
606
|
maxSteps=config.orchestrator.max_steps,
|
|
567
607
|
env=env,
|
|
608
|
+
instance_id=instance_id,
|
|
609
|
+
instance_name=instance_name,
|
|
568
610
|
)
|
|
569
611
|
|
|
570
612
|
# Resume if requested
|
|
@@ -43,6 +43,24 @@ Your primary tools are for delegation and verification:
|
|
|
43
43
|
|
|
44
44
|
---
|
|
45
45
|
|
|
46
|
+
# Watchers
|
|
47
|
+
|
|
48
|
+
Your execution is monitored by "watchers" - automated systems that observe your trajectory and provide guidance when you may be going off course. Watchers are designed to help you stay aligned with best practices and catch common pitfalls.
|
|
49
|
+
|
|
50
|
+
When you see a message prefixed with `[WATCHER: ...]`, pay attention. These are interventions from the watcher system indicating that your current approach may need adjustment. Watchers might notice:
|
|
51
|
+
|
|
52
|
+
- You're doing direct work (bash commands) when you should be delegating to executors
|
|
53
|
+
- You're spinning or repeating the same actions without making progress
|
|
54
|
+
- You're approaching resource limits (steps, sessions)
|
|
55
|
+
- You're drifting from the original task scope
|
|
56
|
+
- You're making changes without corresponding tests
|
|
57
|
+
|
|
58
|
+
Watcher guidance is not optional advice - treat it as an important course correction. If a watcher tells you to delegate instead of doing work directly, delegate. If a watcher says you're stuck, step back and try a different approach. If a watcher warns about budget limits, prioritize and wrap up.
|
|
59
|
+
|
|
60
|
+
The watchers are on your side. They exist to help you succeed, not to criticize. Heed their guidance promptly.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
46
64
|
# Sync vs Async: Choosing the Right Mode
|
|
47
65
|
|
|
48
66
|
The mode you choose for delegation significantly affects how work proceeds.
|
|
@@ -194,7 +194,7 @@ def delegate(
|
|
|
194
194
|
header = _format_session_header(session.id, adapter_name, mode)
|
|
195
195
|
|
|
196
196
|
if mode == "sync":
|
|
197
|
-
|
|
197
|
+
result = {
|
|
198
198
|
"success": True,
|
|
199
199
|
"session": header,
|
|
200
200
|
"session_id": session.id,
|
|
@@ -204,6 +204,14 @@ def delegate(
|
|
|
204
204
|
"tokens": session.token_usage.get("total_tokens", 0),
|
|
205
205
|
"hint": "Use converse(session_id, message) to continue this conversation",
|
|
206
206
|
}
|
|
207
|
+
# Warn if no conversation ID - converse() won't work
|
|
208
|
+
if not session.conversation_id:
|
|
209
|
+
result["warning"] = "no_conversation_id"
|
|
210
|
+
result["hint"] = (
|
|
211
|
+
"WARNING: MCP didn't return a conversation ID. "
|
|
212
|
+
"You cannot use converse() - send all instructions upfront or use async mode."
|
|
213
|
+
)
|
|
214
|
+
return result
|
|
207
215
|
else:
|
|
208
216
|
return {
|
|
209
217
|
"success": True,
|
|
@@ -263,6 +271,18 @@ def converse(
|
|
|
263
271
|
"hint": "Start a new session with delegate()",
|
|
264
272
|
}
|
|
265
273
|
|
|
274
|
+
# Check for stale/missing conversation_id (common after resume)
|
|
275
|
+
if not session.conversation_id:
|
|
276
|
+
return {
|
|
277
|
+
"success": False,
|
|
278
|
+
"error": "Session has no conversation ID (likely stale after resume)",
|
|
279
|
+
"hint": (
|
|
280
|
+
"This session's conversation was lost (MCP server restarted). "
|
|
281
|
+
"Use end_session() to close it, then delegate() a new task."
|
|
282
|
+
),
|
|
283
|
+
"session_id": session_id,
|
|
284
|
+
}
|
|
285
|
+
|
|
266
286
|
# Get adapter and send message
|
|
267
287
|
executor = self._get_adapter(session.adapter)
|
|
268
288
|
try:
|
|
@@ -288,7 +308,13 @@ def converse(
|
|
|
288
308
|
turn = len([m for m in session.messages if m.role == "user"])
|
|
289
309
|
header = _format_session_header(session.id, session.adapter, session.mode.value)
|
|
290
310
|
|
|
291
|
-
|
|
311
|
+
# Check for conversation loss (indicated by error in response)
|
|
312
|
+
conversation_lost = (
|
|
313
|
+
"[ERROR] Conversation lost" in response
|
|
314
|
+
or session.conversation_id is None
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
result = {
|
|
292
318
|
"success": True,
|
|
293
319
|
"session": header,
|
|
294
320
|
"session_id": session_id,
|
|
@@ -298,6 +324,15 @@ def converse(
|
|
|
298
324
|
"tokens": session.token_usage.get("total_tokens", 0),
|
|
299
325
|
}
|
|
300
326
|
|
|
327
|
+
if conversation_lost:
|
|
328
|
+
result["warning"] = "conversation_lost"
|
|
329
|
+
result["hint"] = (
|
|
330
|
+
"The MCP server lost this conversation. You should end_session() "
|
|
331
|
+
"and delegate() a new task with the full context."
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
return result
|
|
335
|
+
|
|
301
336
|
|
|
302
337
|
@weaveTool
|
|
303
338
|
def check_session(
|
|
@@ -340,3 +340,85 @@ class QualityWatcher(Watcher):
|
|
|
340
340
|
)
|
|
341
341
|
|
|
342
342
|
return WatcherResult.ok()
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@register_watcher("delegation_reminder")
|
|
346
|
+
class DelegationReminderWatcher(Watcher):
|
|
347
|
+
"""
|
|
348
|
+
Reminds the orchestrator to delegate work instead of doing it directly.
|
|
349
|
+
|
|
350
|
+
Counts consecutive non-delegation tool calls (bash commands that aren't
|
|
351
|
+
delegation-related). When the count exceeds a threshold, nudges the
|
|
352
|
+
orchestrator to consider delegating to executors instead.
|
|
353
|
+
|
|
354
|
+
This is a softer reminder than the DelegationWatcher - it doesn't detect
|
|
355
|
+
specific code-writing patterns, just notices when the orchestrator seems
|
|
356
|
+
to be doing a lot of direct work that could potentially be delegated.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
name = "delegation_reminder"
|
|
360
|
+
description = "Reminds orchestrator to delegate after many direct tool calls"
|
|
361
|
+
|
|
362
|
+
# Tools that count as delegation-related (don't count against threshold)
|
|
363
|
+
DELEGATION_TOOLS = {
|
|
364
|
+
"delegate",
|
|
365
|
+
"converse",
|
|
366
|
+
"check_session",
|
|
367
|
+
"end_session",
|
|
368
|
+
"list_sessions",
|
|
369
|
+
"chat", # Talking to user is not direct work
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
async def observe(self, ctx: WatcherContext) -> WatcherResult:
|
|
373
|
+
config = self.config
|
|
374
|
+
threshold = config.get("threshold", 10) # Max consecutive non-delegation calls
|
|
375
|
+
lookback = config.get("lookback", 30) # How many messages to check
|
|
376
|
+
|
|
377
|
+
# Count consecutive non-delegation tool calls from the end
|
|
378
|
+
consecutive_non_delegation = 0
|
|
379
|
+
|
|
380
|
+
# Look through recent messages in reverse order
|
|
381
|
+
for msg in reversed(ctx.messages[-lookback:]):
|
|
382
|
+
if msg.get("role") != "assistant":
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
tool_calls = msg.get("tool_calls", [])
|
|
386
|
+
if not tool_calls:
|
|
387
|
+
# Text-only response doesn't reset counter, but doesn't add to it
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
# Check each tool call in this message
|
|
391
|
+
has_delegation = False
|
|
392
|
+
has_non_delegation = False
|
|
393
|
+
|
|
394
|
+
for tc in tool_calls:
|
|
395
|
+
func = tc.get("function", {})
|
|
396
|
+
name = func.get("name", "")
|
|
397
|
+
|
|
398
|
+
if name in self.DELEGATION_TOOLS:
|
|
399
|
+
has_delegation = True
|
|
400
|
+
elif name: # Any other tool call
|
|
401
|
+
has_non_delegation = True
|
|
402
|
+
|
|
403
|
+
if has_delegation:
|
|
404
|
+
# Found a delegation tool - stop counting
|
|
405
|
+
break
|
|
406
|
+
elif has_non_delegation:
|
|
407
|
+
# Add to consecutive count (one per message, not per tool call)
|
|
408
|
+
consecutive_non_delegation += 1
|
|
409
|
+
|
|
410
|
+
# Check if threshold exceeded
|
|
411
|
+
if consecutive_non_delegation >= threshold:
|
|
412
|
+
return WatcherResult.nudge(
|
|
413
|
+
guidance=(
|
|
414
|
+
f"You've made {consecutive_non_delegation} consecutive direct tool calls "
|
|
415
|
+
"without delegating to an executor. Remember: as the orchestrator, your role "
|
|
416
|
+
"is to delegate coding work to executors, not do it yourself via bash. "
|
|
417
|
+
"Consider whether the work you're doing could be delegated to an executor "
|
|
418
|
+
"using delegate(). Executors can write code, run tests, and handle complex "
|
|
419
|
+
"file operations more effectively than direct bash commands."
|
|
420
|
+
),
|
|
421
|
+
reason=f"Consecutive non-delegation calls: {consecutive_non_delegation}",
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
return WatcherResult.ok()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|