zwarm 1.2.1__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {zwarm-1.2.1 → zwarm-1.3.3}/.gitignore +2 -0
  2. {zwarm-1.2.1 → zwarm-1.3.3}/PKG-INFO +30 -15
  3. {zwarm-1.2.1 → zwarm-1.3.3}/README.md +29 -14
  4. {zwarm-1.2.1 → zwarm-1.3.3}/pyproject.toml +1 -1
  5. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/codex_mcp.py +42 -2
  6. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/cli/main.py +100 -5
  7. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/config.py +9 -1
  8. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/state.py +143 -12
  9. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/orchestrator.py +51 -9
  10. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/prompts/orchestrator.py +18 -0
  11. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/tools/delegation.py +37 -2
  12. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/builtin.py +82 -0
  13. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/__init__.py +0 -0
  14. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/__init__.py +0 -0
  15. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/base.py +0 -0
  16. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/claude_code.py +0 -0
  17. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/registry.py +0 -0
  18. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/test_codex_mcp.py +0 -0
  19. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/adapters/test_registry.py +0 -0
  20. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/cli/__init__.py +0 -0
  21. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/__init__.py +0 -0
  22. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/compact.py +0 -0
  23. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/environment.py +0 -0
  24. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/models.py +0 -0
  25. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/test_compact.py +0 -0
  26. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/test_config.py +0 -0
  27. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/core/test_models.py +0 -0
  28. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/prompts/__init__.py +0 -0
  29. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/test_orchestrator_watchers.py +0 -0
  30. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/tools/__init__.py +0 -0
  31. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/__init__.py +0 -0
  32. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/base.py +0 -0
  33. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/manager.py +0 -0
  34. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/registry.py +0 -0
  35. {zwarm-1.2.1 → zwarm-1.3.3}/src/zwarm/watchers/test_watchers.py +0 -0
@@ -19,3 +19,5 @@ dist-ssr/
19
19
  *.local
20
20
 
21
21
  jobs/
22
+
23
+ .zwarm/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 1.2.1
3
+ Version: 1.3.3
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: python-dotenv>=1.0.0
@@ -136,12 +136,17 @@ state_dir: .zwarm # State directory for sessions/events
136
136
 
137
137
  watchers:
138
138
  enabled: true
139
+ message_role: user # Role for nudge messages: user | assistant | system
139
140
  watchers:
140
141
  - name: progress
141
142
  - name: budget
142
143
  config:
143
144
  max_steps: 50
144
145
  max_sessions: 10
146
+ - name: delegation_reminder
147
+ config:
148
+ threshold: 10 # Nudge after N consecutive non-delegation calls
149
+ lookback: 30 # How many messages to check
145
150
  - name: scope
146
151
  config:
147
152
  keywords: []
@@ -217,28 +222,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
217
222
  | `pattern` | Custom regex pattern matching |
218
223
  | `quality` | Code quality checks |
219
224
  | `delegation` | Ensures orchestrator delegates instead of writing code directly |
225
+ | `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
220
226
 
221
227
  ### Enabling Watchers
222
228
 
223
229
  ```yaml
224
230
  # config.yaml
225
231
  watchers:
226
- enabled:
227
- - progress
228
- - budget
229
- - scope
230
- config:
231
- progress:
232
- stuck_threshold: 5 # Flag after 5 similar steps
233
- budget:
234
- max_steps: 50
235
- max_sessions: 10
236
- scope:
237
- keywords:
238
- - "refactor"
239
- - "rewrite"
232
+ enabled: true
233
+ message_role: user # How nudges appear: user | assistant | system
234
+ watchers:
235
+ - name: progress
236
+ config:
237
+ max_same_calls: 3 # Flag after 3 identical tool calls
238
+ - name: budget
239
+ config:
240
+ max_steps: 50
241
+ max_sessions: 10
242
+ - name: delegation_reminder
243
+ config:
244
+ threshold: 10 # Nudge after 10 non-delegation calls
245
+ - name: scope
246
+ config:
247
+ avoid_keywords:
248
+ - "refactor everything"
249
+ - "rewrite"
240
250
  ```
241
251
 
252
+ The `message_role` setting controls how watcher nudges are injected:
253
+ - `user` (default): Appears as a user message - strong nudge, agent must respond
254
+ - `assistant`: Appears as a previous assistant thought - softer, agent can continue
255
+ - `system`: Appears as system instruction - authoritative guidance
256
+
242
257
  ### Watcher Actions
243
258
 
244
259
  Watchers can return different actions:
@@ -124,12 +124,17 @@ state_dir: .zwarm # State directory for sessions/events
124
124
 
125
125
  watchers:
126
126
  enabled: true
127
+ message_role: user # Role for nudge messages: user | assistant | system
127
128
  watchers:
128
129
  - name: progress
129
130
  - name: budget
130
131
  config:
131
132
  max_steps: 50
132
133
  max_sessions: 10
134
+ - name: delegation_reminder
135
+ config:
136
+ threshold: 10 # Nudge after N consecutive non-delegation calls
137
+ lookback: 30 # How many messages to check
133
138
  - name: scope
134
139
  config:
135
140
  keywords: []
@@ -205,28 +210,38 @@ Watchers are composable guardrails that monitor agent behavior and can intervene
205
210
  | `pattern` | Custom regex pattern matching |
206
211
  | `quality` | Code quality checks |
207
212
  | `delegation` | Ensures orchestrator delegates instead of writing code directly |
213
+ | `delegation_reminder` | Nudges after many consecutive non-delegation tool calls (default: 10) |
208
214
 
209
215
  ### Enabling Watchers
210
216
 
211
217
  ```yaml
212
218
  # config.yaml
213
219
  watchers:
214
- enabled:
215
- - progress
216
- - budget
217
- - scope
218
- config:
219
- progress:
220
- stuck_threshold: 5 # Flag after 5 similar steps
221
- budget:
222
- max_steps: 50
223
- max_sessions: 10
224
- scope:
225
- keywords:
226
- - "refactor"
227
- - "rewrite"
220
+ enabled: true
221
+ message_role: user # How nudges appear: user | assistant | system
222
+ watchers:
223
+ - name: progress
224
+ config:
225
+ max_same_calls: 3 # Flag after 3 identical tool calls
226
+ - name: budget
227
+ config:
228
+ max_steps: 50
229
+ max_sessions: 10
230
+ - name: delegation_reminder
231
+ config:
232
+ threshold: 10 # Nudge after 10 non-delegation calls
233
+ - name: scope
234
+ config:
235
+ avoid_keywords:
236
+ - "refactor everything"
237
+ - "rewrite"
228
238
  ```
229
239
 
240
+ The `message_role` setting controls how watcher nudges are injected:
241
+ - `user` (default): Appears as a user message - strong nudge, agent must respond
242
+ - `assistant`: Appears as a previous assistant thought - softer, agent can continue
243
+ - `system`: Appears as system instruction - authoritative guidance
244
+
230
245
  ### Watcher Actions
231
246
 
232
247
  Watchers can return different actions:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zwarm"
3
- version = "1.2.1"
3
+ version = "1.3.3"
4
4
  description = "Multi-Agent CLI Orchestration Research Platform"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13,<3.14"
@@ -549,20 +549,33 @@ class CodexMCPAdapter(ExecutorAdapter):
549
549
  """
550
550
  client = self._ensure_client()
551
551
 
552
+ logger.debug(f"Calling codex-reply with conversation_id={conversation_id}")
553
+
552
554
  result = client.call_tool("codex-reply", {
553
555
  "conversationId": conversation_id,
554
556
  "prompt": message,
555
557
  })
556
558
 
559
+ # Check for conversation loss - MCP returns empty result when session not found
560
+ if not result.get("messages") and not result.get("output"):
561
+ logger.error(
562
+ f"codex-reply returned empty result for conversation_id={conversation_id}. "
563
+ f"The MCP server may have lost the conversation state. Result: {result}"
564
+ )
565
+
557
566
  # Track usage
558
567
  usage = result.get("usage", {})
559
568
  self._accumulate_usage(usage)
560
569
 
570
+ response = self._extract_response(result)
571
+ logger.debug(f"codex-reply response length: {len(response)} chars")
572
+
561
573
  return {
562
- "response": self._extract_response(result),
574
+ "response": response,
563
575
  "raw_messages": result.get("messages", []),
564
576
  "usage": usage,
565
577
  "total_usage": self.total_usage,
578
+ "conversation_lost": not result.get("messages") and not result.get("output"),
566
579
  }
567
580
 
568
581
  @weave.op()
@@ -598,6 +611,13 @@ class CodexMCPAdapter(ExecutorAdapter):
598
611
  session.conversation_id = result["conversation_id"]
599
612
  if session.conversation_id:
600
613
  self._sessions[session.id] = session.conversation_id
614
+ logger.debug(f"Session {session.id[:8]} mapped to conversation {session.conversation_id}")
615
+ else:
616
+ # This is bad - we won't be able to continue this conversation
617
+ logger.warning(
618
+ f"Session {session.id[:8]} started but MCP didn't return a conversation ID. "
619
+ "Further converse() calls will fail."
620
+ )
601
621
 
602
622
  session.add_message("user", task)
603
623
  session.add_message("assistant", result["response"])
@@ -652,6 +672,16 @@ class CodexMCPAdapter(ExecutorAdapter):
652
672
  )
653
673
 
654
674
  response_text = result["response"]
675
+
676
+ # Check if conversation was lost
677
+ if result.get("conversation_lost"):
678
+ logger.warning(
679
+ f"Conversation {session.conversation_id} was lost. "
680
+ f"Session {session.id} will be marked as needing re-delegation."
681
+ )
682
+ # Mark the session as having a lost conversation so orchestrator can handle it
683
+ session.conversation_id = None # Clear the stale ID
684
+
655
685
  session.add_message("user", message)
656
686
  session.add_message("assistant", response_text)
657
687
 
@@ -797,6 +827,15 @@ class CodexMCPAdapter(ExecutorAdapter):
797
827
 
798
828
  def _extract_response(self, result: dict) -> str:
799
829
  """Extract response text from MCP result."""
830
+ # Check for error indicators - empty result suggests lost conversation
831
+ if (
832
+ result.get("conversationId") is None
833
+ and not result.get("messages")
834
+ and not result.get("output")
835
+ ):
836
+ logger.warning(f"MCP returned empty result - conversation may be lost: {result}")
837
+ return "[ERROR] Conversation lost - the MCP server no longer has this session. Please re-delegate the task."
838
+
800
839
  # First check for our collected output
801
840
  if result.get("output"):
802
841
  return result["output"]
@@ -823,5 +862,6 @@ class CodexMCPAdapter(ExecutorAdapter):
823
862
  if "text" in result:
824
863
  return result["text"]
825
864
 
826
- # Fallback: stringify the result
865
+ # Fallback: stringify the result (but log it as unexpected)
866
+ logger.warning(f"Unexpected MCP result format, returning raw: {list(result.keys())}")
827
867
  return json.dumps(result, indent=2)
@@ -141,6 +141,8 @@ def orchestrate(
141
141
  resume: Annotated[bool, typer.Option("--resume", help="Resume from previous state")] = False,
142
142
  max_steps: Annotated[Optional[int], typer.Option("--max-steps", help="Maximum orchestrator steps")] = None,
143
143
  verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Show detailed output")] = False,
144
+ instance: Annotated[Optional[str], typer.Option("--instance", "-i", help="Instance ID (for isolation/resume)")] = None,
145
+ instance_name: Annotated[Optional[str], typer.Option("--name", "-n", help="Human-readable instance name")] = None,
144
146
  ):
145
147
  """
146
148
  Start an orchestrator session.
@@ -149,6 +151,9 @@ def orchestrate(
149
151
  (Codex, Claude Code). It can have sync conversations or fire-and-forget
150
152
  async delegations.
151
153
 
154
+ Each run creates an isolated instance to prevent conflicts when running
155
+ multiple orchestrators in the same directory.
156
+
152
157
  [bold]Examples:[/]
153
158
  [dim]# Simple task[/]
154
159
  $ zwarm orchestrate --task "Add a logout button to the navbar"
@@ -166,8 +171,14 @@ def orchestrate(
166
171
  [dim]# Override settings[/]
167
172
  $ zwarm orchestrate --task "Fix bug" --set executor.adapter=claude_code
168
173
 
169
- [dim]# Resume interrupted session[/]
170
- $ zwarm orchestrate --task "Continue work" --resume
174
+ [dim]# Named instance (easier to track)[/]
175
+ $ zwarm orchestrate --task "Add tests" --name test-work
176
+
177
+ [dim]# Resume a specific instance[/]
178
+ $ zwarm orchestrate --resume --instance abc123
179
+
180
+ [dim]# List all instances[/]
181
+ $ zwarm instances
171
182
  """
172
183
  from zwarm.orchestrator import build_orchestrator
173
184
 
@@ -187,6 +198,8 @@ def orchestrate(
187
198
  console.print(f"[bold]Starting orchestrator...[/]")
188
199
  console.print(f" Task: {task}")
189
200
  console.print(f" Working dir: {working_dir.absolute()}")
201
+ if instance:
202
+ console.print(f" Instance: {instance}" + (f" ({instance_name})" if instance_name else ""))
190
203
  console.print()
191
204
 
192
205
  # Output handler to show orchestrator messages
@@ -203,11 +216,17 @@ def orchestrate(
203
216
  overrides=override_list,
204
217
  resume=resume,
205
218
  output_handler=output_handler,
219
+ instance_id=instance,
220
+ instance_name=instance_name,
206
221
  )
207
222
 
208
223
  if resume:
209
224
  console.print(" [dim]Resuming from previous state...[/]")
210
225
 
226
+ # Show instance ID if auto-generated
227
+ if orchestrator.instance_id and not instance:
228
+ console.print(f" [dim]Instance: {orchestrator.instance_id[:8]}[/]")
229
+
211
230
  # Run the orchestrator loop
212
231
  console.print("[bold]--- Orchestrator running ---[/]\n")
213
232
  result = orchestrator.run(task=task)
@@ -223,16 +242,35 @@ def orchestrate(
223
242
  # Save state for potential resume
224
243
  orchestrator.save_state()
225
244
 
245
+ # Update instance status
246
+ if orchestrator.instance_id:
247
+ from zwarm.core.state import update_instance_status
248
+ update_instance_status(
249
+ orchestrator.instance_id,
250
+ "completed",
251
+ working_dir / ".zwarm",
252
+ )
253
+ console.print(f" [dim]Instance {orchestrator.instance_id[:8]} marked completed[/]")
254
+
226
255
  except KeyboardInterrupt:
227
256
  console.print("\n\n[yellow]Interrupted.[/]")
228
257
  if orchestrator:
229
258
  orchestrator.save_state()
230
259
  console.print("[dim]State saved. Use --resume to continue.[/]")
260
+ # Keep instance as "active" so it can be resumed
231
261
  sys.exit(1)
232
262
  except Exception as e:
233
263
  console.print(f"\n[red]Error:[/] {e}")
234
264
  if verbose:
235
265
  console.print_exception()
266
+ # Update instance status to failed
267
+ if orchestrator and orchestrator.instance_id:
268
+ from zwarm.core.state import update_instance_status
269
+ update_instance_status(
270
+ orchestrator.instance_id,
271
+ "failed",
272
+ working_dir / ".zwarm",
273
+ )
236
274
  sys.exit(1)
237
275
 
238
276
 
@@ -384,6 +422,63 @@ def status(
384
422
  console.print(" [dim](none)[/]")
385
423
 
386
424
 
425
+ @app.command()
426
+ def instances(
427
+ working_dir: Annotated[Path, typer.Option("--working-dir", "-w", help="Working directory")] = Path("."),
428
+ all_instances: Annotated[bool, typer.Option("--all", "-a", help="Show all instances (including completed)")] = False,
429
+ ):
430
+ """
431
+ List all orchestrator instances.
432
+
433
+ Shows instances that have been run in this directory. Use --all to include
434
+ completed instances.
435
+
436
+ [bold]Examples:[/]
437
+ [dim]# List active instances[/]
438
+ $ zwarm instances
439
+
440
+ [dim]# List all instances[/]
441
+ $ zwarm instances --all
442
+ """
443
+ from zwarm.core.state import list_instances as get_instances
444
+
445
+ state_dir = working_dir / ".zwarm"
446
+ all_inst = get_instances(state_dir)
447
+
448
+ if not all_inst:
449
+ console.print("[dim]No instances found.[/]")
450
+ console.print("[dim]Run 'zwarm orchestrate' to start a new instance.[/]")
451
+ return
452
+
453
+ # Filter if not showing all
454
+ if not all_instances:
455
+ all_inst = [i for i in all_inst if i.get("status") == "active"]
456
+
457
+ if not all_inst:
458
+ console.print("[dim]No active instances. Use --all to see completed ones.[/]")
459
+ return
460
+
461
+ console.print(f"[bold]Instances[/] ({len(all_inst)} total)\n")
462
+
463
+ for inst in all_inst:
464
+ status = inst.get("status", "unknown")
465
+ status_icon = {"active": "[green]●[/]", "completed": "[dim]✓[/]", "failed": "[red]✗[/]"}.get(status, "[dim]?[/]")
466
+
467
+ inst_id = inst.get("id", "unknown")[:8]
468
+ name = inst.get("name", "")
469
+ task = (inst.get("task") or "")[:60]
470
+ updated = inst.get("updated_at", "")[:19] if inst.get("updated_at") else ""
471
+
472
+ console.print(f" {status_icon} [bold]{inst_id}[/]" + (f" ({name})" if name and name != inst_id else ""))
473
+ if task:
474
+ console.print(f" [dim]{task}[/]")
475
+ if updated:
476
+ console.print(f" [dim]Updated: {updated}[/]")
477
+ console.print()
478
+
479
+ console.print("[dim]Use --instance <id> with 'orchestrate --resume' to resume an instance.[/]")
480
+
481
+
387
482
  @app.command()
388
483
  def history(
389
484
  working_dir: Annotated[Path, typer.Option("--working-dir", "-w", help="Working directory")] = Path("."),
@@ -577,7 +672,7 @@ def init(
577
672
  # Gather settings
578
673
  weave_project = ""
579
674
  adapter = "codex_mcp"
580
- watchers_enabled = ["progress", "budget", "delegation"]
675
+ watchers_enabled = ["progress", "budget", "delegation", "delegation_reminder"]
581
676
  create_project_config = with_project
582
677
  project_description = ""
583
678
  project_context = ""
@@ -601,10 +696,10 @@ def init(
601
696
 
602
697
  # Watchers
603
698
  console.print("\n [bold]Watchers[/] (trajectory aligners)")
604
- available_watchers = ["progress", "budget", "delegation", "scope", "pattern", "quality"]
699
+ available_watchers = ["progress", "budget", "delegation", "delegation_reminder", "scope", "pattern", "quality"]
605
700
  watchers_enabled = []
606
701
  for w in available_watchers:
607
- default = w in ["progress", "budget", "delegation"]
702
+ default = w in ["progress", "budget", "delegation", "delegation_reminder"]
608
703
  if typer.confirm(f" Enable {w}?", default=default):
609
704
  watchers_enabled.append(w)
610
705
 
@@ -86,7 +86,13 @@ class WatchersConfig:
86
86
  watchers: list[WatcherConfigItem] = field(default_factory=lambda: [
87
87
  WatcherConfigItem(name="progress"),
88
88
  WatcherConfigItem(name="budget"),
89
+ WatcherConfigItem(name="delegation_reminder"),
89
90
  ])
91
+ # Role for watcher nudge messages: "user" | "assistant" | "system"
92
+ # "user" (default) - Appears as if user sent the message, strong nudge
93
+ # "assistant" - Appears as previous assistant thought, softer nudge
94
+ # "system" - Appears as system instruction, authoritative
95
+ message_role: str = "user"
90
96
 
91
97
 
92
98
  @dataclass
@@ -122,13 +128,14 @@ class ZwarmConfig:
122
128
  ],
123
129
  )
124
130
  else:
125
- # Full format: watchers: {enabled: true, watchers: [...]}
131
+ # Full format: watchers: {enabled: true, watchers: [...], message_role: "user"}
126
132
  watchers_config = WatchersConfig(
127
133
  enabled=watchers_data.get("enabled", True),
128
134
  watchers=[
129
135
  WatcherConfigItem(name=w) if isinstance(w, str) else WatcherConfigItem(**w)
130
136
  for w in watchers_data.get("watchers", [])
131
137
  ] or WatchersConfig().watchers,
138
+ message_role=watchers_data.get("message_role", "user"),
132
139
  )
133
140
 
134
141
  # Build orchestrator config with nested compaction
@@ -180,6 +187,7 @@ class ZwarmConfig:
180
187
  {"name": w.name, "enabled": w.enabled, "config": w.config}
181
188
  for w in self.watchers.watchers
182
189
  ],
190
+ "message_role": self.watchers.message_role,
183
191
  },
184
192
  "state_dir": self.state_dir,
185
193
  }
@@ -1,16 +1,25 @@
1
1
  """
2
2
  Flat-file state management for zwarm.
3
3
 
4
- State structure:
4
+ State structure (with instance isolation):
5
5
  .zwarm/
6
- ├── state.json # Current state (sessions, tasks)
7
- ├── events.jsonl # Append-only event log
8
- ├── sessions/
9
- │ └── <session-id>/
10
- ├── messages.json # Full conversation history
11
- │ └── output.log # Agent stdout/stderr
12
- └── orchestrator/
13
- └── messages.json # Orchestrator's message history (for resume)
6
+ ├── instances.json # Registry of all instances
7
+ └── instances/
8
+ └── <instance-id>/
9
+ ├── state.json # Current state (sessions, tasks)
10
+ ├── events.jsonl # Append-only event log
11
+ ├── sessions/
12
+ └── <session-id>/
13
+ │ ├── messages.json
14
+ │ └── output.log
15
+ └── orchestrator/
16
+ └── messages.json # Orchestrator's message history (for resume)
17
+
18
+ Legacy structure (single instance, for backwards compat):
19
+ .zwarm/
20
+ ├── state.json
21
+ ├── events.jsonl
22
+ └── ...
14
23
  """
15
24
 
16
25
  from __future__ import annotations
@@ -19,10 +28,116 @@ import json
19
28
  from datetime import datetime
20
29
  from pathlib import Path
21
30
  from typing import Any
31
+ from uuid import uuid4
22
32
 
23
33
  from .models import ConversationSession, Event, Task
24
34
 
25
35
 
36
+ # --- Instance Registry ---
37
+
38
+ def get_instances_registry_path(base_dir: Path | str = ".zwarm") -> Path:
39
+ """Get path to the instances registry file."""
40
+ return Path(base_dir) / "instances.json"
41
+
42
+
43
+ def list_instances(base_dir: Path | str = ".zwarm") -> list[dict[str, Any]]:
44
+ """List all registered instances."""
45
+ registry_path = get_instances_registry_path(base_dir)
46
+ if not registry_path.exists():
47
+ return []
48
+ try:
49
+ return json.loads(registry_path.read_text()).get("instances", [])
50
+ except (json.JSONDecodeError, KeyError):
51
+ return []
52
+
53
+
54
+ def register_instance(
55
+ instance_id: str,
56
+ name: str | None = None,
57
+ task: str | None = None,
58
+ base_dir: Path | str = ".zwarm",
59
+ ) -> None:
60
+ """Register an instance in the global registry."""
61
+ base = Path(base_dir)
62
+ base.mkdir(parents=True, exist_ok=True)
63
+
64
+ registry_path = get_instances_registry_path(base_dir)
65
+
66
+ # Load existing registry
67
+ if registry_path.exists():
68
+ try:
69
+ registry = json.loads(registry_path.read_text())
70
+ except json.JSONDecodeError:
71
+ registry = {"instances": []}
72
+ else:
73
+ registry = {"instances": []}
74
+
75
+ # Check if instance already registered
76
+ existing_ids = {inst["id"] for inst in registry["instances"]}
77
+ if instance_id in existing_ids:
78
+ # Update existing entry
79
+ for inst in registry["instances"]:
80
+ if inst["id"] == instance_id:
81
+ inst["updated_at"] = datetime.now().isoformat()
82
+ inst["status"] = "active"
83
+ if name:
84
+ inst["name"] = name
85
+ if task:
86
+ inst["task"] = task[:100] # Truncate
87
+ break
88
+ else:
89
+ # Add new entry
90
+ registry["instances"].append({
91
+ "id": instance_id,
92
+ "name": name or instance_id[:8],
93
+ "task": (task[:100] if task else None),
94
+ "created_at": datetime.now().isoformat(),
95
+ "updated_at": datetime.now().isoformat(),
96
+ "status": "active",
97
+ })
98
+
99
+ registry_path.write_text(json.dumps(registry, indent=2))
100
+
101
+
102
+ def update_instance_status(
103
+ instance_id: str,
104
+ status: str,
105
+ base_dir: Path | str = ".zwarm",
106
+ ) -> None:
107
+ """Update an instance's status in the registry."""
108
+ registry_path = get_instances_registry_path(base_dir)
109
+ if not registry_path.exists():
110
+ return
111
+
112
+ try:
113
+ registry = json.loads(registry_path.read_text())
114
+ except json.JSONDecodeError:
115
+ return
116
+
117
+ for inst in registry.get("instances", []):
118
+ if inst["id"] == instance_id:
119
+ inst["status"] = status
120
+ inst["updated_at"] = datetime.now().isoformat()
121
+ break
122
+
123
+ registry_path.write_text(json.dumps(registry, indent=2))
124
+
125
+
126
+ def get_instance_state_dir(
127
+ instance_id: str | None = None,
128
+ base_dir: Path | str = ".zwarm",
129
+ ) -> Path:
130
+ """
131
+ Get the state directory for an instance.
132
+
133
+ If instance_id is None, returns the legacy path for backwards compat.
134
+ """
135
+ base = Path(base_dir)
136
+ if instance_id is None:
137
+ return base # Legacy: .zwarm/
138
+ return base / "instances" / instance_id
139
+
140
+
26
141
  def _json_serializer(obj: Any) -> Any:
27
142
  """Custom JSON serializer for non-standard types."""
28
143
  # Handle pydantic models
@@ -42,15 +157,31 @@ class StateManager:
42
157
  """
43
158
  Manages flat-file state for zwarm.
44
159
 
45
- All state is stored as JSON files in a directory (default: .zwarm/).
160
+ All state is stored as JSON files in a directory.
161
+ With instance isolation: .zwarm/instances/<instance-id>/
162
+ Legacy (no instance): .zwarm/
163
+
46
164
  This enables:
47
165
  - Git-backed history
48
166
  - Easy debugging (just read the files)
49
167
  - Resume from previous state
168
+ - Multiple concurrent orchestrators (with instance isolation)
50
169
  """
51
170
 
52
- def __init__(self, state_dir: Path | str = ".zwarm"):
53
- self.state_dir = Path(state_dir)
171
+ def __init__(
172
+ self,
173
+ state_dir: Path | str = ".zwarm",
174
+ instance_id: str | None = None,
175
+ ):
176
+ self.base_dir = Path(state_dir)
177
+ self.instance_id = instance_id
178
+
179
+ # Resolve actual state directory
180
+ if instance_id:
181
+ self.state_dir = get_instance_state_dir(instance_id, self.base_dir)
182
+ else:
183
+ self.state_dir = self.base_dir
184
+
54
185
  self._sessions: dict[str, ConversationSession] = {}
55
186
  self._tasks: dict[str, Task] = {}
56
187
  self._orchestrator_messages: list[dict[str, Any]] = []
@@ -52,6 +52,10 @@ class Orchestrator(YamlAgent):
52
52
  config: ZwarmConfig = Field(default_factory=ZwarmConfig)
53
53
  working_dir: Path = Field(default_factory=Path.cwd)
54
54
 
55
+ # Instance identification (for multi-orchestrator isolation)
56
+ instance_id: str | None = Field(default=None)
57
+ instance_name: str | None = Field(default=None)
58
+
55
59
  # Load tools from modules (delegation + bash for verification)
56
60
  agent_tool_modules: list[str] = Field(
57
61
  default=[
@@ -77,11 +81,25 @@ class Orchestrator(YamlAgent):
77
81
  """Initialize state and adapters after model creation."""
78
82
  super().model_post_init(__context)
79
83
 
80
- # Initialize state manager
81
- self._state = StateManager(self.working_dir / self.config.state_dir)
84
+ # Initialize state manager with instance isolation
85
+ base_state_dir = self.working_dir / self.config.state_dir
86
+ self._state = StateManager(
87
+ state_dir=base_state_dir,
88
+ instance_id=self.instance_id,
89
+ )
82
90
  self._state.init()
83
91
  self._state.load()
84
92
 
93
+ # Register instance if using instance isolation
94
+ if self.instance_id:
95
+ from zwarm.core.state import register_instance
96
+ register_instance(
97
+ instance_id=self.instance_id,
98
+ name=self.instance_name,
99
+ task=None, # Will be updated when task is set
100
+ base_dir=base_state_dir,
101
+ )
102
+
85
103
  # Load existing sessions
86
104
  for session in self._state.list_sessions():
87
105
  self._sessions[session.id] = session
@@ -215,12 +233,18 @@ class Orchestrator(YamlAgent):
215
233
  if not self._resumed:
216
234
  return
217
235
 
218
- # Build list of old sessions
236
+ # Build list of old sessions and INVALIDATE their conversation IDs
237
+ # The MCP server was restarted, so all conversation IDs are now stale
219
238
  old_sessions = []
239
+ invalidated_count = 0
220
240
  for sid, session in self._sessions.items():
221
241
  old_sessions.append(
222
242
  f" - {sid[:8]}... ({session.adapter}, {session.status.value})"
223
243
  )
244
+ # Clear stale conversation_id to prevent converse() from trying to use it
245
+ if session.conversation_id:
246
+ session.conversation_id = None
247
+ invalidated_count += 1
224
248
 
225
249
  session_info = "\n".join(old_sessions) if old_sessions else " (none)"
226
250
 
@@ -228,14 +252,14 @@ class Orchestrator(YamlAgent):
228
252
  "role": "user",
229
253
  "content": f"""[SYSTEM NOTICE] You have been resumed from a previous session.
230
254
 
231
- IMPORTANT: Your previous executor sessions are NO LONGER ACTIVE. The MCP connections and subprocess handles were lost when the previous session ended.
255
+ CRITICAL: Your previous executor sessions are NO LONGER USABLE. The MCP server was restarted, so all conversation state was lost. {invalidated_count} conversation ID(s) have been invalidated.
232
256
 
233
- Previous sessions (now stale):
257
+ Previous sessions (conversation IDs cleared):
234
258
  {session_info}
235
259
 
236
- You must start NEW sessions with delegate() if you need to continue work. Do NOT try to use converse() or check_session() with the old session IDs - they will fail.
260
+ You MUST start NEW sessions with delegate() to continue any work. The converse() tool will fail on these old sessions because they have no active conversation.
237
261
 
238
- Continue with your task from where you left off.""",
262
+ Review what was accomplished in the previous session and delegate new tasks as needed.""",
239
263
  }
240
264
 
241
265
  self.messages.append(resume_msg)
@@ -328,10 +352,15 @@ Continue with your task from where you left off.""",
328
352
 
329
353
  # Handle watcher result
330
354
  if result.action == WatcherAction.NUDGE and result.guidance:
331
- # Inject guidance as a system message
355
+ # Inject guidance as a message with configurable role
356
+ message_role = self.config.watchers.message_role
357
+ # Validate role (default to user if invalid)
358
+ if message_role not in ("user", "assistant", "system"):
359
+ message_role = "user"
360
+
332
361
  self.messages.append(
333
362
  {
334
- "role": "user",
363
+ "role": message_role,
335
364
  "content": f"[WATCHER: {result.metadata.get('triggered_by', 'unknown')}] {result.guidance}",
336
365
  }
337
366
  )
@@ -521,6 +550,8 @@ def build_orchestrator(
521
550
  overrides: list[str] | None = None,
522
551
  resume: bool = False,
523
552
  output_handler: Callable[[str], None] | None = None,
553
+ instance_id: str | None = None,
554
+ instance_name: str | None = None,
524
555
  ) -> Orchestrator:
525
556
  """
526
557
  Build an orchestrator from configuration.
@@ -532,10 +563,14 @@ def build_orchestrator(
532
563
  overrides: CLI overrides (--set key=value)
533
564
  resume: Whether to resume from previous state
534
565
  output_handler: Function to handle orchestrator output
566
+ instance_id: Unique ID for this instance (enables multi-orchestrator isolation)
567
+ instance_name: Human-readable name for this instance
535
568
 
536
569
  Returns:
537
570
  Configured Orchestrator instance
538
571
  """
572
+ from uuid import uuid4
573
+
539
574
  # Load configuration
540
575
  config = load_config(
541
576
  config_path=config_path,
@@ -545,6 +580,11 @@ def build_orchestrator(
545
580
  # Resolve working directory
546
581
  working_dir = working_dir or Path.cwd()
547
582
 
583
+ # Generate instance ID if not provided (enables isolation by default for new runs)
584
+ # For resume, instance_id should be provided explicitly
585
+ if instance_id is None and not resume:
586
+ instance_id = str(uuid4())
587
+
548
588
  # Build system prompt
549
589
  system_prompt = _build_system_prompt(config, working_dir)
550
590
 
@@ -565,6 +605,8 @@ def build_orchestrator(
565
605
  system_prompt=system_prompt,
566
606
  maxSteps=config.orchestrator.max_steps,
567
607
  env=env,
608
+ instance_id=instance_id,
609
+ instance_name=instance_name,
568
610
  )
569
611
 
570
612
  # Resume if requested
@@ -43,6 +43,24 @@ Your primary tools are for delegation and verification:
43
43
 
44
44
  ---
45
45
 
46
+ # Watchers
47
+
48
+ Your execution is monitored by "watchers" - automated systems that observe your trajectory and provide guidance when you may be going off course. Watchers are designed to help you stay aligned with best practices and catch common pitfalls.
49
+
50
+ When you see a message prefixed with `[WATCHER: ...]`, pay attention. These are interventions from the watcher system indicating that your current approach may need adjustment. Watchers might notice:
51
+
52
+ - You're doing direct work (bash commands) when you should be delegating to executors
53
+ - You're spinning or repeating the same actions without making progress
54
+ - You're approaching resource limits (steps, sessions)
55
+ - You're drifting from the original task scope
56
+ - You're making changes without corresponding tests
57
+
58
+ Watcher guidance is not optional advice - treat it as an important course correction. If a watcher tells you to delegate instead of doing work directly, delegate. If a watcher says you're stuck, step back and try a different approach. If a watcher warns about budget limits, prioritize and wrap up.
59
+
60
+ The watchers are on your side. They exist to help you succeed, not to criticize. Heed their guidance promptly.
61
+
62
+ ---
63
+
46
64
  # Sync vs Async: Choosing the Right Mode
47
65
 
48
66
  The mode you choose for delegation significantly affects how work proceeds.
@@ -194,7 +194,7 @@ def delegate(
194
194
  header = _format_session_header(session.id, adapter_name, mode)
195
195
 
196
196
  if mode == "sync":
197
- return {
197
+ result = {
198
198
  "success": True,
199
199
  "session": header,
200
200
  "session_id": session.id,
@@ -204,6 +204,14 @@ def delegate(
204
204
  "tokens": session.token_usage.get("total_tokens", 0),
205
205
  "hint": "Use converse(session_id, message) to continue this conversation",
206
206
  }
207
+ # Warn if no conversation ID - converse() won't work
208
+ if not session.conversation_id:
209
+ result["warning"] = "no_conversation_id"
210
+ result["hint"] = (
211
+ "WARNING: MCP didn't return a conversation ID. "
212
+ "You cannot use converse() - send all instructions upfront or use async mode."
213
+ )
214
+ return result
207
215
  else:
208
216
  return {
209
217
  "success": True,
@@ -263,6 +271,18 @@ def converse(
263
271
  "hint": "Start a new session with delegate()",
264
272
  }
265
273
 
274
+ # Check for stale/missing conversation_id (common after resume)
275
+ if not session.conversation_id:
276
+ return {
277
+ "success": False,
278
+ "error": "Session has no conversation ID (likely stale after resume)",
279
+ "hint": (
280
+ "This session's conversation was lost (MCP server restarted). "
281
+ "Use end_session() to close it, then delegate() a new task."
282
+ ),
283
+ "session_id": session_id,
284
+ }
285
+
266
286
  # Get adapter and send message
267
287
  executor = self._get_adapter(session.adapter)
268
288
  try:
@@ -288,7 +308,13 @@ def converse(
288
308
  turn = len([m for m in session.messages if m.role == "user"])
289
309
  header = _format_session_header(session.id, session.adapter, session.mode.value)
290
310
 
291
- return {
311
+ # Check for conversation loss (indicated by error in response)
312
+ conversation_lost = (
313
+ "[ERROR] Conversation lost" in response
314
+ or session.conversation_id is None
315
+ )
316
+
317
+ result = {
292
318
  "success": True,
293
319
  "session": header,
294
320
  "session_id": session_id,
@@ -298,6 +324,15 @@ def converse(
298
324
  "tokens": session.token_usage.get("total_tokens", 0),
299
325
  }
300
326
 
327
+ if conversation_lost:
328
+ result["warning"] = "conversation_lost"
329
+ result["hint"] = (
330
+ "The MCP server lost this conversation. You should end_session() "
331
+ "and delegate() a new task with the full context."
332
+ )
333
+
334
+ return result
335
+
301
336
 
302
337
  @weaveTool
303
338
  def check_session(
@@ -340,3 +340,85 @@ class QualityWatcher(Watcher):
340
340
  )
341
341
 
342
342
  return WatcherResult.ok()
343
+
344
+
345
+ @register_watcher("delegation_reminder")
346
+ class DelegationReminderWatcher(Watcher):
347
+ """
348
+ Reminds the orchestrator to delegate work instead of doing it directly.
349
+
350
+ Counts consecutive non-delegation tool calls (bash commands that aren't
351
+ delegation-related). When the count exceeds a threshold, nudges the
352
+ orchestrator to consider delegating to executors instead.
353
+
354
+ This is a softer reminder than the DelegationWatcher - it doesn't detect
355
+ specific code-writing patterns, just notices when the orchestrator seems
356
+ to be doing a lot of direct work that could potentially be delegated.
357
+ """
358
+
359
+ name = "delegation_reminder"
360
+ description = "Reminds orchestrator to delegate after many direct tool calls"
361
+
362
+ # Tools that count as delegation-related (don't count against threshold)
363
+ DELEGATION_TOOLS = {
364
+ "delegate",
365
+ "converse",
366
+ "check_session",
367
+ "end_session",
368
+ "list_sessions",
369
+ "chat", # Talking to user is not direct work
370
+ }
371
+
372
+ async def observe(self, ctx: WatcherContext) -> WatcherResult:
373
+ config = self.config
374
+ threshold = config.get("threshold", 10) # Max consecutive non-delegation calls
375
+ lookback = config.get("lookback", 30) # How many messages to check
376
+
377
+ # Count consecutive non-delegation tool calls from the end
378
+ consecutive_non_delegation = 0
379
+
380
+ # Look through recent messages in reverse order
381
+ for msg in reversed(ctx.messages[-lookback:]):
382
+ if msg.get("role") != "assistant":
383
+ continue
384
+
385
+ tool_calls = msg.get("tool_calls", [])
386
+ if not tool_calls:
387
+ # Text-only response doesn't reset counter, but doesn't add to it
388
+ continue
389
+
390
+ # Check each tool call in this message
391
+ has_delegation = False
392
+ has_non_delegation = False
393
+
394
+ for tc in tool_calls:
395
+ func = tc.get("function", {})
396
+ name = func.get("name", "")
397
+
398
+ if name in self.DELEGATION_TOOLS:
399
+ has_delegation = True
400
+ elif name: # Any other tool call
401
+ has_non_delegation = True
402
+
403
+ if has_delegation:
404
+ # Found a delegation tool - stop counting
405
+ break
406
+ elif has_non_delegation:
407
+ # Add to consecutive count (one per message, not per tool call)
408
+ consecutive_non_delegation += 1
409
+
410
+ # Check if threshold exceeded
411
+ if consecutive_non_delegation >= threshold:
412
+ return WatcherResult.nudge(
413
+ guidance=(
414
+ f"You've made {consecutive_non_delegation} consecutive direct tool calls "
415
+ "without delegating to an executor. Remember: as the orchestrator, your role "
416
+ "is to delegate coding work to executors, not do it yourself via bash. "
417
+ "Consider whether the work you're doing could be delegated to an executor "
418
+ "using delegate(). Executors can write code, run tests, and handle complex "
419
+ "file operations more effectively than direct bash commands."
420
+ ),
421
+ reason=f"Consecutive non-delegation calls: {consecutive_non_delegation}",
422
+ )
423
+
424
+ return WatcherResult.ok()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes