zwarm 2.3.5__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/cli/pilot.py ADDED
@@ -0,0 +1,1000 @@
1
+ """
2
+ Pilot: Conversational REPL for the zwarm orchestrator.
3
+
4
+ A chatty interface where you guide the orchestrator turn-by-turn,
5
+ with time travel, checkpoints, and streaming event display.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import copy
11
+ import json
12
+ import shlex
13
+ import sys
14
+ from dataclasses import dataclass, field
15
+ from pathlib import Path
16
+ from typing import Any, Callable, Dict, List, Optional
17
+ from uuid import uuid4
18
+
19
+ from rich.console import Console
20
+
21
+ console = Console()
22
+
23
+
24
+ # =============================================================================
25
+ # Build Pilot Orchestrator
26
+ # =============================================================================
27
+
28
+
29
+ def build_pilot_orchestrator(
30
+ config_path: Path | None = None,
31
+ working_dir: Path | None = None,
32
+ overrides: list[str] | None = None,
33
+ instance_id: str | None = None,
34
+ instance_name: str | None = None,
35
+ lm_choice: str = "gpt5-verbose",
36
+ ) -> Any:
37
+ """
38
+ Build an orchestrator configured for pilot mode.
39
+
40
+ Pilot mode differences from regular orchestrator:
41
+ - Uses pilot system prompt (conversational, not autonomous)
42
+ - Only delegation tools (no bash, exit, list_agents, run_agent)
43
+ - LM selection based on user choice
44
+
45
+ Args:
46
+ config_path: Path to YAML config file
47
+ working_dir: Working directory (default: cwd)
48
+ overrides: CLI overrides (--set key=value)
49
+ instance_id: Unique ID for this instance
50
+ instance_name: Human-readable name for this instance
51
+ lm_choice: LM to use (gpt5-mini, gpt5, gpt5-verbose)
52
+
53
+ Returns:
54
+ Configured Orchestrator instance for pilot mode
55
+ """
56
+ from wbal.lm import GPT5Large, GPT5LargeVerbose, GPT5MiniTester
57
+
58
+ from zwarm.core.config import load_config
59
+ from zwarm.core.environment import OrchestratorEnv
60
+ from zwarm.orchestrator import Orchestrator
61
+ from zwarm.prompts import get_pilot_prompt
62
+
63
+ # Select LM based on choice
64
+ lm_map = {
65
+ "gpt5-mini": GPT5MiniTester,
66
+ "gpt5": GPT5Large,
67
+ "gpt5-verbose": GPT5LargeVerbose,
68
+ }
69
+ lm_class = lm_map.get(lm_choice, GPT5LargeVerbose)
70
+ lm = lm_class()
71
+
72
+ # Load configuration
73
+ config = load_config(
74
+ config_path=config_path,
75
+ overrides=overrides,
76
+ )
77
+
78
+ # Resolve working directory
79
+ working_dir = working_dir or Path.cwd()
80
+
81
+ # Generate instance ID if not provided
82
+ if instance_id is None:
83
+ instance_id = str(uuid4())
84
+
85
+ # Build pilot system prompt
86
+ system_prompt = get_pilot_prompt(working_dir=str(working_dir))
87
+
88
+ # Create lean orchestrator environment
89
+ env = OrchestratorEnv(
90
+ task="", # No task - pilot is conversational
91
+ working_dir=working_dir,
92
+ )
93
+
94
+ # Create orchestrator with ONLY delegation tools (no bash)
95
+ orchestrator = Orchestrator(
96
+ config=config,
97
+ working_dir=working_dir,
98
+ system_prompt=system_prompt,
99
+ maxSteps=config.orchestrator.max_steps,
100
+ env=env,
101
+ instance_id=instance_id,
102
+ instance_name=instance_name,
103
+ lm=lm,
104
+ # Only delegation tools - no bash
105
+ agent_tool_modules=["zwarm.tools.delegation"],
106
+ )
107
+
108
+ # Remove unwanted tools that come from YamlAgent/OpenAIWBAgent
109
+ # These are: exit, list_agents, run_agent
110
+ _remove_unwanted_tools(orchestrator)
111
+
112
+ return orchestrator
113
+
114
+
115
+ def _remove_unwanted_tools(orchestrator: Any) -> None:
116
+ """
117
+ Remove tools that aren't appropriate for pilot mode.
118
+
119
+ Removes:
120
+ - exit: Pilot doesn't auto-exit, user controls the session
121
+ - list_agents: No delegate subagents in pilot mode
122
+ - run_agent: No delegate subagents in pilot mode
123
+
124
+ This works by wrapping getToolDefinitions to filter out unwanted tools.
125
+ We use object.__setattr__ to bypass Pydantic's attribute checks.
126
+ """
127
+ import types
128
+
129
+ unwanted = {"exit", "list_agents", "run_agent"}
130
+
131
+ # Store original method
132
+ original_get_tools = orchestrator.getToolDefinitions
133
+
134
+ def filtered_get_tools(self):
135
+ """Wrapped getToolDefinitions that filters out unwanted tools."""
136
+ definitions, callables = original_get_tools()
137
+
138
+ # Filter definitions - handle both OpenAI formats
139
+ filtered_defs = []
140
+ for td in definitions:
141
+ # Check both possible name locations
142
+ name = td.get("name") or td.get("function", {}).get("name")
143
+ if name not in unwanted:
144
+ filtered_defs.append(td)
145
+
146
+ # Filter callables
147
+ filtered_callables = {
148
+ k: v for k, v in callables.items()
149
+ if k not in unwanted
150
+ }
151
+
152
+ return filtered_defs, filtered_callables
153
+
154
+ # Bind the new method to the instance, bypassing Pydantic
155
+ bound_method = types.MethodType(filtered_get_tools, orchestrator)
156
+ object.__setattr__(orchestrator, "getToolDefinitions", bound_method)
157
+
158
+
159
+ # =============================================================================
160
+ # Event Renderer (inspired by improver's run_agent.py)
161
+ # =============================================================================
162
+
163
+
164
+ class EventRenderer:
165
+ """
166
+ Streaming renderer for orchestrator events.
167
+
168
+ Handles different event types with nice formatting:
169
+ - Thinking/reasoning
170
+ - Tool calls (delegate, converse, check_session, etc.)
171
+ - Tool results
172
+ - Assistant messages
173
+ - Status messages
174
+ """
175
+
176
+ def __init__(self, *, show_reasoning: bool = True) -> None:
177
+ self._assistant_open = False
178
+ self._assistant_prefix = " "
179
+ self._thinking_open = False
180
+ self._had_output = False
181
+ self._show_reasoning = show_reasoning
182
+
183
+ # ANSI codes
184
+ self._dim = "\x1b[2m"
185
+ self._italic = "\x1b[3m"
186
+ self._green = "\x1b[32m"
187
+ self._yellow = "\x1b[33m"
188
+ self._cyan = "\x1b[36m"
189
+ self._reset = "\x1b[0m"
190
+ self._bold = "\x1b[1m"
191
+
192
+ # Tool call tracking
193
+ self._tool_names: Dict[str, str] = {}
194
+ self._tool_args: Dict[str, str] = {}
195
+
196
+ def _write(self, text: str) -> None:
197
+ sys.stdout.write(text)
198
+ sys.stdout.flush()
199
+
200
+ def _write_err(self, text: str) -> None:
201
+ sys.stderr.write(text)
202
+ sys.stderr.flush()
203
+
204
+ def _ensure_newline(self) -> None:
205
+ if self._assistant_open:
206
+ self._write("\n")
207
+ self._assistant_open = False
208
+
209
+ def _finish_thinking(self) -> None:
210
+ if self._thinking_open:
211
+ self._write("\n")
212
+ self._thinking_open = False
213
+
214
+ def _line(self, text: str) -> None:
215
+ self._ensure_newline()
216
+ self._write(f"{text}\n")
217
+
218
+ def _style(self, text: str, *, dim: bool = False, italic: bool = False,
219
+ green: bool = False, yellow: bool = False, cyan: bool = False,
220
+ bold: bool = False) -> str:
221
+ if not text:
222
+ return text
223
+ parts = []
224
+ if dim:
225
+ parts.append(self._dim)
226
+ if italic:
227
+ parts.append(self._italic)
228
+ if green:
229
+ parts.append(self._green)
230
+ if yellow:
231
+ parts.append(self._yellow)
232
+ if cyan:
233
+ parts.append(self._cyan)
234
+ if bold:
235
+ parts.append(self._bold)
236
+ parts.append(text)
237
+ parts.append(self._reset)
238
+ return "".join(parts)
239
+
240
+ def _truncate(self, text: str, max_len: int = 120) -> str:
241
+ trimmed = " ".join(text.split())
242
+ if len(trimmed) <= max_len:
243
+ return trimmed
244
+ return trimmed[: max_len - 3].rstrip() + "..."
245
+
246
+ # -------------------------------------------------------------------------
247
+ # Event handlers
248
+ # -------------------------------------------------------------------------
249
+
250
+ def status(self, message: str) -> None:
251
+ """Display a status message."""
252
+ self._finish_thinking()
253
+ self._line(message)
254
+
255
+ def thinking(self, text: str) -> None:
256
+ """Display thinking/reasoning (dim italic)."""
257
+ if not self._show_reasoning:
258
+ return
259
+ if not self._thinking_open:
260
+ self._ensure_newline()
261
+ self._write(self._style(" ", dim=True, italic=True))
262
+ self._thinking_open = True
263
+ formatted = text.replace("\n", f"\n ")
264
+ self._write(self._style(formatted, dim=True, italic=True))
265
+ self._had_output = True
266
+
267
+ def thinking_done(self) -> None:
268
+ """Finish thinking block."""
269
+ self._finish_thinking()
270
+
271
+ def assistant(self, text: str) -> None:
272
+ """Display assistant message."""
273
+ self._finish_thinking()
274
+ if not self._assistant_open:
275
+ self._ensure_newline()
276
+ self._write(self._style("• ", bold=True))
277
+ self._assistant_open = True
278
+ formatted = text.replace("\n", f"\n{self._assistant_prefix}")
279
+ self._write(formatted)
280
+ self._had_output = True
281
+
282
+ def assistant_done(self) -> None:
283
+ """Finish assistant block."""
284
+ self._ensure_newline()
285
+
286
+ def tool_call(self, name: str, args: Any, call_id: str = "") -> None:
287
+ """Display a tool call."""
288
+ self._finish_thinking()
289
+
290
+ # Track for result matching
291
+ if call_id:
292
+ self._tool_names[call_id] = name
293
+ self._tool_args[call_id] = str(args)
294
+
295
+ # Format args based on tool type
296
+ args_str = self._format_tool_args(name, args)
297
+
298
+ prefix = self._style("→ ", green=True)
299
+ tool_name = self._style(name, green=True, bold=True)
300
+
301
+ if args_str:
302
+ self._line(f"{prefix}{tool_name} {self._style(args_str, dim=True)}")
303
+ else:
304
+ self._line(f"{prefix}{tool_name}")
305
+
306
+ self._had_output = True
307
+
308
+ def tool_result(self, name: str, result: Any, call_id: str = "") -> None:
309
+ """Display a tool result (compact)."""
310
+ if result is None:
311
+ return
312
+
313
+ result_str = str(result)
314
+ if len(result_str) > 200:
315
+ result_str = result_str[:200] + "..."
316
+
317
+ # Show first few lines
318
+ lines = result_str.split("\n")
319
+ if len(lines) > 3:
320
+ lines = lines[:3] + ["..."]
321
+
322
+ for i, line in enumerate(lines):
323
+ prefix = " └ " if i == 0 else " "
324
+ self._line(f"{prefix}{self._style(line, dim=True)}")
325
+
326
+ def error(self, message: str) -> None:
327
+ """Display an error."""
328
+ self._ensure_newline()
329
+ self._write_err(f"{self._style('[error]', yellow=True, bold=True)} {message}\n")
330
+
331
+ def _format_tool_args(self, name: str, args: Any) -> str:
332
+ """Format tool arguments based on tool type."""
333
+ if args is None:
334
+ return ""
335
+
336
+ if isinstance(args, str):
337
+ try:
338
+ args = json.loads(args)
339
+ except (json.JSONDecodeError, TypeError):
340
+ return self._truncate(args)
341
+
342
+ if not isinstance(args, dict):
343
+ return self._truncate(str(args))
344
+
345
+ # Tool-specific formatting
346
+ if name == "delegate":
347
+ task = args.get("task", "")[:60]
348
+ mode = args.get("mode", "sync")
349
+ return f"({mode}): {task}..."
350
+ elif name == "converse":
351
+ session_id = args.get("session_id", "")[:8]
352
+ message = args.get("message", "")[:50]
353
+ return f"[{session_id}]: {message}..."
354
+ elif name == "check_session":
355
+ session_id = args.get("session_id", "")[:8]
356
+ return f"({session_id})"
357
+ elif name == "end_session":
358
+ session_id = args.get("session_id", "")[:8]
359
+ return f"({session_id})"
360
+ elif name == "list_sessions":
361
+ return ""
362
+ elif name == "bash":
363
+ cmd = args.get("command", "")[:80]
364
+ return f"$ {cmd}"
365
+ else:
366
+ # Generic: show first value
367
+ first_val = next(iter(args.values()), "") if args else ""
368
+ if isinstance(first_val, str) and len(first_val) > 40:
369
+ first_val = first_val[:40] + "..."
370
+ return str(first_val) if first_val else ""
371
+
372
+ # -------------------------------------------------------------------------
373
+ # State
374
+ # -------------------------------------------------------------------------
375
+
376
+ def reset_turn(self) -> None:
377
+ self._had_output = False
378
+
379
+ def had_output(self) -> bool:
380
+ return self._had_output
381
+
382
+ def set_show_reasoning(self, value: bool) -> None:
383
+ self._show_reasoning = value
384
+
385
+ def show_reasoning(self) -> bool:
386
+ return self._show_reasoning
387
+
388
+
389
+ # =============================================================================
390
+ # Pilot Session State (checkpoints and time travel)
391
+ # =============================================================================
392
+
393
+
394
+ @dataclass
395
+ class Checkpoint:
396
+ """A snapshot of orchestrator state at a specific turn."""
397
+ turn_id: int
398
+ instruction: str # User instruction that led to this state
399
+ messages: List[Dict[str, Any]]
400
+ sessions_snapshot: Dict[str, Any] # Serialized session state
401
+ step_count: int
402
+
403
+
404
+ @dataclass
405
+ class PilotSessionState:
406
+ """
407
+ Manages checkpoints and time travel for the pilot REPL.
408
+
409
+ Each turn (user instruction + orchestrator response) creates a checkpoint
410
+ that can be returned to later.
411
+ """
412
+
413
+ checkpoints: List[Checkpoint] = field(default_factory=list)
414
+ current_index: int = -1 # Index into checkpoints, -1 = root
415
+ next_turn_id: int = 1
416
+
417
+ def record_turn(
418
+ self,
419
+ instruction: str,
420
+ messages: List[Dict[str, Any]],
421
+ sessions_snapshot: Dict[str, Any],
422
+ step_count: int,
423
+ ) -> Checkpoint:
424
+ """Record a new checkpoint after a turn."""
425
+ checkpoint = Checkpoint(
426
+ turn_id=self.next_turn_id,
427
+ instruction=instruction,
428
+ messages=copy.deepcopy(messages),
429
+ sessions_snapshot=copy.deepcopy(sessions_snapshot),
430
+ step_count=step_count,
431
+ )
432
+
433
+ # If we're not at the end, we're branching - truncate future
434
+ if self.current_index < len(self.checkpoints) - 1:
435
+ self.checkpoints = self.checkpoints[:self.current_index + 1]
436
+
437
+ self.checkpoints.append(checkpoint)
438
+ self.current_index = len(self.checkpoints) - 1
439
+ self.next_turn_id += 1
440
+
441
+ return checkpoint
442
+
443
+ def goto_turn(self, turn_id: int) -> Optional[Checkpoint]:
444
+ """Jump to a specific turn. Returns the checkpoint or None if not found."""
445
+ if turn_id == 0:
446
+ # Root state - before any turns
447
+ self.current_index = -1
448
+ return None
449
+
450
+ for i, cp in enumerate(self.checkpoints):
451
+ if cp.turn_id == turn_id:
452
+ self.current_index = i
453
+ return cp
454
+
455
+ return None # Not found
456
+
457
+ def current_checkpoint(self) -> Optional[Checkpoint]:
458
+ """Get the current checkpoint, or None if at root."""
459
+ if self.current_index < 0 or self.current_index >= len(self.checkpoints):
460
+ return None
461
+ return self.checkpoints[self.current_index]
462
+
463
+ def turn_label(self, turn_id: int) -> str:
464
+ """Format turn ID as T1, T2, etc."""
465
+ return f"T{turn_id}" if turn_id > 0 else "root"
466
+
467
+ def history_entries(
468
+ self,
469
+ *,
470
+ show_all: bool = False,
471
+ limit: Optional[int] = None
472
+ ) -> List[Dict[str, Any]]:
473
+ """Get history entries for display."""
474
+ entries = []
475
+ for i, cp in enumerate(self.checkpoints):
476
+ is_current = i == self.current_index
477
+ entries.append({
478
+ "checkpoint": cp,
479
+ "is_current": is_current,
480
+ })
481
+
482
+ if not show_all and limit:
483
+ entries = entries[-limit:]
484
+
485
+ return entries
486
+
487
+
488
+ # =============================================================================
489
+ # Command Parsing
490
+ # =============================================================================
491
+
492
+
493
+ def parse_command(text: str) -> Optional[List[str]]:
494
+ """Parse a :command from user input. Returns None if not a command."""
495
+ if not text.startswith(":"):
496
+ return None
497
+ cmdline = text[1:].strip()
498
+ if not cmdline:
499
+ return None
500
+ try:
501
+ return shlex.split(cmdline)
502
+ except ValueError:
503
+ return None
504
+
505
+
506
+ # =============================================================================
507
+ # Output Handler for Orchestrator Events
508
+ # =============================================================================
509
+
510
+
511
+ def make_event_handler(renderer: EventRenderer) -> Callable[[str], None]:
512
+ """
513
+ Create an output_handler that routes orchestrator output to the renderer.
514
+
515
+ The orchestrator emits text through env.output_handler. We parse it
516
+ to extract event types and route to appropriate renderer methods.
517
+ """
518
+ def handler(text: str) -> None:
519
+ if not text:
520
+ return
521
+
522
+ # Check for reasoning prefix (from OpenAIWBAgent)
523
+ if text.startswith("💭 "):
524
+ renderer.thinking(text[2:])
525
+ return
526
+
527
+ # Default: treat as assistant message
528
+ renderer.assistant(text)
529
+
530
+ return handler
531
+
532
+
533
+ # =============================================================================
534
+ # Step Execution with Event Capture
535
+ # =============================================================================
536
+
537
+
538
+ def extract_events_from_response(response: Any) -> Dict[str, List[Any]]:
539
+ """Extract structured events from an LLM response."""
540
+ events = {
541
+ "reasoning": [],
542
+ "messages": [],
543
+ "tool_calls": [],
544
+ }
545
+
546
+ output = getattr(response, "output", None)
547
+ if not output:
548
+ return events
549
+
550
+ for item in output:
551
+ item_type = getattr(item, "type", None)
552
+ if item_type == "reasoning":
553
+ events["reasoning"].append(item)
554
+ elif item_type == "message":
555
+ events["messages"].append(item)
556
+ elif item_type == "function_call":
557
+ events["tool_calls"].append(item)
558
+
559
+ return events
560
+
561
+
562
+ def execute_step_with_events(
563
+ orchestrator: Any,
564
+ renderer: EventRenderer,
565
+ ) -> tuple[List[tuple], bool]:
566
+ """
567
+ Execute one orchestrator step with event rendering.
568
+
569
+ Returns:
570
+ (tool_results, had_message) - tool call results and whether agent produced a message
571
+
572
+ Note: Watchers are not run in pilot mode - the user is the watcher,
573
+ actively guiding the orchestrator turn-by-turn.
574
+ """
575
+ had_message = False
576
+
577
+ # Execute perceive (updates environment observation)
578
+ orchestrator.perceive()
579
+
580
+ # Execute invoke (calls LLM)
581
+ response = orchestrator.invoke()
582
+
583
+ # Extract and render events from response
584
+ if response:
585
+ events = extract_events_from_response(response)
586
+
587
+ # Render reasoning
588
+ for reasoning in events["reasoning"]:
589
+ summary = getattr(reasoning, "summary", None)
590
+ if summary:
591
+ for item in summary:
592
+ text = getattr(item, "text", "")
593
+ if text:
594
+ renderer.thinking(text)
595
+ renderer.thinking_done()
596
+
597
+ # Render messages
598
+ for msg in events["messages"]:
599
+ content = getattr(msg, "content", [])
600
+ for part in content:
601
+ text = getattr(part, "text", "")
602
+ if text:
603
+ renderer.assistant(text)
604
+ had_message = True
605
+ renderer.assistant_done()
606
+
607
+ # Render tool calls (before execution)
608
+ for tc in events["tool_calls"]:
609
+ name = getattr(tc, "name", "?")
610
+ args = getattr(tc, "arguments", "")
611
+ call_id = getattr(tc, "call_id", "")
612
+ renderer.tool_call(name, args, call_id)
613
+
614
+ # Execute do (runs tool calls)
615
+ results = orchestrator.do()
616
+
617
+ # Increment step count (normally done by step() but we call perceive/invoke/do separately)
618
+ orchestrator._step_count += 1
619
+
620
+ # Render tool results
621
+ for tool_info, result in results:
622
+ name = tool_info.get("name", "?")
623
+ call_id = tool_info.get("call_id", "")
624
+ renderer.tool_result(name, result, call_id)
625
+
626
+ return results, had_message
627
+
628
+
629
+ def run_until_response(
630
+ orchestrator: Any,
631
+ renderer: EventRenderer,
632
+ max_steps: int = 20,
633
+ ) -> List[tuple]:
634
+ """
635
+ Run the orchestrator until it produces a message response.
636
+
637
+ Keeps stepping while the agent only produces tool calls.
638
+ Stops when:
639
+ - Agent produces a text message (returns to user)
640
+ - Max steps reached
641
+ - Stop condition triggered
642
+
643
+ This is wrapped as a weave.op to group all child calls per turn.
644
+
645
+ Args:
646
+ orchestrator: The orchestrator instance
647
+ renderer: Event renderer for output
648
+ max_steps: Safety limit on steps per turn
649
+
650
+ Returns:
651
+ All tool results from the turn
652
+ """
653
+ import weave
654
+
655
+ @weave.op(name="pilot_turn")
656
+ def _run_turn():
657
+ all_results = []
658
+ for step in range(max_steps):
659
+ results, had_message = execute_step_with_events(orchestrator, renderer)
660
+ all_results.extend(results)
661
+
662
+ # Stop if agent produced a message
663
+ if had_message:
664
+ break
665
+
666
+ # Stop if orchestrator signals completion
667
+ if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
668
+ break
669
+
670
+ # Stop if no tool calls (agent is done but didn't message)
671
+ if not results:
672
+ break
673
+
674
+ return all_results
675
+
676
+ return _run_turn()
677
+
678
+
679
+ # =============================================================================
680
+ # Main REPL
681
+ # =============================================================================
682
+
683
+
684
+ def print_help(renderer: EventRenderer) -> None:
685
+ """Print help for pilot commands."""
686
+ lines = [
687
+ "",
688
+ "Commands:",
689
+ " :help Show this help",
690
+ " :history [N|all] Show turn checkpoints",
691
+ " :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
692
+ " :state Show current orchestrator state",
693
+ " :sessions Show active sessions",
694
+ " :reasoning [on|off] Toggle reasoning display",
695
+ " :quit / :exit Exit the pilot",
696
+ "",
697
+ ]
698
+ for line in lines:
699
+ renderer.status(line)
700
+
701
+
702
+ def get_sessions_snapshot(orchestrator: Any) -> Dict[str, Any]:
703
+ """Get a serializable snapshot of session state."""
704
+ if hasattr(orchestrator, "_session_manager"):
705
+ sessions = orchestrator._session_manager.list_sessions()
706
+ return {
707
+ "sessions": [
708
+ {
709
+ "id": s.id,
710
+ "status": s.status.value,
711
+ "task": s.task[:100] if s.task else "",
712
+ "turns": s.turn,
713
+ }
714
+ for s in sessions
715
+ ]
716
+ }
717
+ return {"sessions": []}
718
+
719
+
720
+ def run_pilot(
721
+ orchestrator: Any,
722
+ *,
723
+ initial_task: Optional[str] = None,
724
+ ) -> None:
725
+ """
726
+ Run the pilot REPL.
727
+
728
+ Args:
729
+ orchestrator: A built orchestrator instance
730
+ initial_task: Optional initial task to start with
731
+ """
732
+ import weave
733
+
734
+ @weave.op(name="pilot_session")
735
+ def _run_pilot_session():
736
+ """Inner function wrapped with weave.op for clean logging."""
737
+ _run_pilot_repl(orchestrator, initial_task)
738
+
739
+ _run_pilot_session()
740
+
741
+
742
+ def _run_pilot_repl(
743
+ orchestrator: Any,
744
+ initial_task: Optional[str] = None,
745
+ ) -> None:
746
+ """
747
+ The actual REPL implementation.
748
+ """
749
+ renderer = EventRenderer(show_reasoning=True)
750
+ state = PilotSessionState()
751
+
752
+ # Silence the default output_handler - we render events directly in execute_step_with_events
753
+ # (Otherwise messages would be rendered twice)
754
+ if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "output_handler"):
755
+ orchestrator.env.output_handler = lambda x: None
756
+
757
+ # Welcome message
758
+ renderer.status("")
759
+ renderer.status("╭─────────────────────────────────────────╮")
760
+ renderer.status("│ zwarm pilot │")
761
+ renderer.status("│ Conversational orchestrator REPL │")
762
+ renderer.status("╰─────────────────────────────────────────╯")
763
+ renderer.status("")
764
+ renderer.status("Type :help for commands, :quit to exit.")
765
+ renderer.status("")
766
+
767
+ # Handle initial task if provided
768
+ if initial_task:
769
+ renderer.status(f"Initial task: {initial_task[:80]}...")
770
+ orchestrator.messages.append({
771
+ "role": "user",
772
+ "content": initial_task,
773
+ })
774
+
775
+ renderer.reset_turn()
776
+ results = run_until_response(orchestrator, renderer)
777
+
778
+ # Record checkpoint
779
+ state.record_turn(
780
+ instruction=initial_task,
781
+ messages=orchestrator.messages,
782
+ sessions_snapshot=get_sessions_snapshot(orchestrator),
783
+ step_count=orchestrator._step_count,
784
+ )
785
+
786
+ cp = state.current_checkpoint()
787
+ if cp:
788
+ renderer.status("")
789
+ renderer.status(
790
+ f"[{state.turn_label(cp.turn_id)}] "
791
+ f"step={cp.step_count} "
792
+ f"messages={len(cp.messages)}"
793
+ )
794
+ renderer.status(f":goto {state.turn_label(cp.turn_id)} to return here")
795
+
796
+ # Main REPL loop
797
+ while True:
798
+ try:
799
+ user_input = input("> ").strip()
800
+ except EOFError:
801
+ sys.stdout.write("\n")
802
+ break
803
+ except KeyboardInterrupt:
804
+ sys.stdout.write("\n")
805
+ renderer.status("(Ctrl+C - type :quit to exit)")
806
+ continue
807
+
808
+ if not user_input:
809
+ continue
810
+
811
+ # Parse command
812
+ cmd_parts = parse_command(user_input)
813
+ if cmd_parts:
814
+ cmd = cmd_parts[0].lower()
815
+ args = cmd_parts[1:]
816
+
817
+ # :quit / :exit
818
+ if cmd in ("quit", "exit", "q"):
819
+ renderer.status("Goodbye!")
820
+ break
821
+
822
+ # :help
823
+ if cmd == "help":
824
+ print_help(renderer)
825
+ continue
826
+
827
+ # :history
828
+ if cmd == "history":
829
+ limit = None
830
+ show_all = False
831
+ if args:
832
+ token = args[0].lower()
833
+ if token == "all":
834
+ show_all = True
835
+ elif token.isdigit():
836
+ limit = int(token)
837
+
838
+ entries = state.history_entries(show_all=show_all, limit=limit or 10)
839
+ if not entries:
840
+ renderer.status("No checkpoints yet.")
841
+ else:
842
+ renderer.status("")
843
+ for entry in entries:
844
+ cp = entry["checkpoint"]
845
+ marker = "*" if entry["is_current"] else " "
846
+ instruction_preview = cp.instruction[:60] + "..." if len(cp.instruction) > 60 else cp.instruction
847
+ renderer.status(
848
+ f"{marker}[{state.turn_label(cp.turn_id)}] "
849
+ f"step={cp.step_count} "
850
+ f"msgs={len(cp.messages)} "
851
+ f"| {instruction_preview}"
852
+ )
853
+ renderer.status("")
854
+ continue
855
+
856
+ # :goto
857
+ if cmd == "goto":
858
+ if not args:
859
+ renderer.error("Usage: :goto <turn|root> (e.g., :goto T1)")
860
+ continue
861
+
862
+ token = args[0]
863
+ if token.lower() == "root":
864
+ # Go to root (before any turns)
865
+ state.goto_turn(0)
866
+ # Reset orchestrator to initial state
867
+ if hasattr(orchestrator, "messages"):
868
+ # Keep only system messages
869
+ orchestrator.messages = [
870
+ m for m in orchestrator.messages
871
+ if m.get("role") == "system"
872
+ ][:1]
873
+ renderer.status("Switched to root (initial state).")
874
+ continue
875
+
876
+ # Parse T1, T2, etc. or just numbers
877
+ turn_id = None
878
+ token_upper = token.upper()
879
+ if token_upper.startswith("T") and token_upper[1:].isdigit():
880
+ turn_id = int(token_upper[1:])
881
+ elif token.isdigit():
882
+ turn_id = int(token)
883
+
884
+ if turn_id is None:
885
+ renderer.error(f"Invalid turn: {token}")
886
+ continue
887
+
888
+ cp = state.goto_turn(turn_id)
889
+ if cp is None:
890
+ renderer.error(f"Turn T{turn_id} not found.")
891
+ continue
892
+
893
+ # Restore orchestrator state
894
+ orchestrator.messages = copy.deepcopy(cp.messages)
895
+ orchestrator._step_count = cp.step_count
896
+ renderer.status(f"Switched to {state.turn_label(turn_id)}.")
897
+ renderer.status(f" instruction: {cp.instruction[:60]}...")
898
+ renderer.status(f" messages: {len(cp.messages)}")
899
+ continue
900
+
901
+ # :state
902
+ if cmd == "state":
903
+ renderer.status("")
904
+ renderer.status(f"Step count: {orchestrator._step_count}")
905
+ renderer.status(f"Messages: {len(orchestrator.messages)}")
906
+ if hasattr(orchestrator, "_total_tokens"):
907
+ renderer.status(f"Total tokens: {orchestrator._total_tokens}")
908
+ cp = state.current_checkpoint()
909
+ if cp:
910
+ renderer.status(f"Current turn: {state.turn_label(cp.turn_id)}")
911
+ else:
912
+ renderer.status("Current turn: root")
913
+ renderer.status("")
914
+ continue
915
+
916
+ # :sessions
917
+ if cmd == "sessions":
918
+ snapshot = get_sessions_snapshot(orchestrator)
919
+ sessions = snapshot.get("sessions", [])
920
+ if not sessions:
921
+ renderer.status("No sessions.")
922
+ else:
923
+ renderer.status("")
924
+ for s in sessions:
925
+ renderer.status(
926
+ f" [{s['id'][:8]}] {s['status']} "
927
+ f"turns={s['turns']} | {s['task'][:50]}"
928
+ )
929
+ renderer.status("")
930
+ continue
931
+
932
+ # :reasoning
933
+ if cmd == "reasoning":
934
+ if not args:
935
+ current = "on" if renderer.show_reasoning() else "off"
936
+ renderer.status(f"Reasoning display: {current}")
937
+ continue
938
+
939
+ value = args[0].lower()
940
+ if value in ("on", "true", "yes", "1"):
941
+ renderer.set_show_reasoning(True)
942
+ elif value in ("off", "false", "no", "0"):
943
+ renderer.set_show_reasoning(False)
944
+ else:
945
+ renderer.error("Usage: :reasoning [on|off]")
946
+ continue
947
+
948
+ current = "on" if renderer.show_reasoning() else "off"
949
+ renderer.status(f"Reasoning display: {current}")
950
+ continue
951
+
952
+ # Unknown command
953
+ renderer.error(f"Unknown command: {cmd}")
954
+ renderer.status("Type :help for available commands.")
955
+ continue
956
+
957
+ # Not a command - send to orchestrator as instruction
958
+ renderer.status("")
959
+
960
+ # Inject user message
961
+ orchestrator.messages.append({
962
+ "role": "user",
963
+ "content": user_input,
964
+ })
965
+
966
+ # Execute steps until agent responds with a message
967
+ renderer.reset_turn()
968
+ try:
969
+ results = run_until_response(orchestrator, renderer)
970
+ except Exception as e:
971
+ renderer.error(f"Step failed: {e}")
972
+ # Remove the user message on failure
973
+ if orchestrator.messages and orchestrator.messages[-1].get("role") == "user":
974
+ orchestrator.messages.pop()
975
+ continue
976
+
977
+ # Record checkpoint
978
+ state.record_turn(
979
+ instruction=user_input,
980
+ messages=orchestrator.messages,
981
+ sessions_snapshot=get_sessions_snapshot(orchestrator),
982
+ step_count=orchestrator._step_count,
983
+ )
984
+
985
+ # Show turn info
986
+ cp = state.current_checkpoint()
987
+ if cp:
988
+ renderer.status("")
989
+ renderer.status(
990
+ f"[{state.turn_label(cp.turn_id)}] "
991
+ f"step={cp.step_count} "
992
+ f"messages={len(cp.messages)}"
993
+ )
994
+ renderer.status(f":goto {state.turn_label(cp.turn_id)} to return here, :history for timeline")
995
+
996
+ # Check stop condition
997
+ if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
998
+ renderer.status("")
999
+ renderer.status("Orchestrator signaled completion.")
1000
+ break