zwarm 2.3.5__py3-none-any.whl → 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/cli/pilot.py ADDED
@@ -0,0 +1,1142 @@
1
+ """
2
+ Pilot: Conversational REPL for the zwarm orchestrator.
3
+
4
+ A chatty interface where you guide the orchestrator turn-by-turn,
5
+ with time travel, checkpoints, and streaming event display.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import copy
11
+ import json
12
+ import shlex
13
+ import sys
14
+ import threading
15
+ import time
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Any, Callable, Dict, List, Optional
19
+ from uuid import uuid4
20
+
21
+ from rich.console import Console
22
+
23
+ from zwarm.core.checkpoints import CheckpointManager
24
+ from zwarm.core.costs import estimate_session_cost, format_cost, get_pricing
25
+
26
+ console = Console()
27
+
28
+
29
+ class ChoogingSpinner:
30
+ """
31
+ A spinner that displays "Chooching" while waiting, adding an 'o' every second.
32
+
33
+ Chooching → Choooching → Chooooching → ...
34
+ """
35
+
36
+ def __init__(self, base_word: str = "Chooching"):
37
+ self._stop_event = threading.Event()
38
+ self._thread: Optional[threading.Thread] = None
39
+ self._base = base_word
40
+ # Find where to insert extra 'o's (after "Ch" and before "ching")
41
+ # "Chooching" -> insert after index 2
42
+ self._prefix = "Ch"
43
+ self._suffix = "ching"
44
+ self._min_o = 2 # Start with "oo"
45
+
46
+ def _spin(self):
47
+ o_count = self._min_o
48
+ while not self._stop_event.is_set():
49
+ word = f"{self._prefix}{'o' * o_count}{self._suffix}"
50
+ # Write with carriage return to overwrite, dim styling
51
+ sys.stdout.write(f"\r\033[2m{word}\033[0m")
52
+ sys.stdout.flush()
53
+ o_count += 1
54
+ # Wait 1 second, but check for stop every 100ms
55
+ for _ in range(10):
56
+ if self._stop_event.is_set():
57
+ break
58
+ time.sleep(0.1)
59
+
60
+ def start(self):
61
+ """Start the spinner in a background thread."""
62
+ self._stop_event.clear()
63
+ self._thread = threading.Thread(target=self._spin, daemon=True)
64
+ self._thread.start()
65
+
66
+ def stop(self):
67
+ """Stop the spinner and clear the line."""
68
+ self._stop_event.set()
69
+ if self._thread:
70
+ self._thread.join(timeout=0.5)
71
+ # Clear the line
72
+ sys.stdout.write("\r\033[K")
73
+ sys.stdout.flush()
74
+
75
+ def __enter__(self):
76
+ self.start()
77
+ return self
78
+
79
+ def __exit__(self, *args):
80
+ self.stop()
81
+
82
+
83
+ # Context window sizes for different models (in tokens)
84
+ MODEL_CONTEXT_WINDOWS = {
85
+ "gpt-5.1-codex": 200_000,
86
+ "gpt-5.1-codex-mini": 200_000,
87
+ "gpt-5.1-codex-max": 400_000,
88
+ "gpt-5": 200_000,
89
+ "gpt-5-mini": 200_000,
90
+ "claude-sonnet-4": 200_000,
91
+ "claude-opus-4": 200_000,
92
+ # Fallback
93
+ "default": 128_000,
94
+ }
95
+
96
+
97
+ def get_context_window(model: str) -> int:
98
+ """Get context window size for a model."""
99
+ model_lower = model.lower()
100
+ for prefix, size in MODEL_CONTEXT_WINDOWS.items():
101
+ if model_lower.startswith(prefix):
102
+ return size
103
+ return MODEL_CONTEXT_WINDOWS["default"]
104
+
105
+
106
+ def render_context_bar(used: int, total: int, width: int = 30) -> str:
107
+ """
108
+ Render a visual context window usage bar.
109
+
110
+ Args:
111
+ used: Tokens used
112
+ total: Total context window
113
+ width: Bar width in characters
114
+
115
+ Returns:
116
+ Colored bar string like: [████████░░░░░░░░░░░░] 40%
117
+ """
118
+ if total <= 0:
119
+ return "[dim]?[/]"
120
+
121
+ pct = min(used / total, 1.0)
122
+ filled = int(pct * width)
123
+ empty = width - filled
124
+
125
+ # Color based on usage
126
+ if pct < 0.5:
127
+ color = "green"
128
+ elif pct < 0.75:
129
+ color = "yellow"
130
+ elif pct < 0.9:
131
+ color = "red"
132
+ else:
133
+ color = "red bold"
134
+
135
+ bar = f"[{color}]{'█' * filled}[/][dim]{'░' * empty}[/]"
136
+ pct_str = f"{pct * 100:.0f}%"
137
+
138
+ return f"{bar} {pct_str}"
139
+
140
+
141
+ # =============================================================================
142
+ # Build Pilot Orchestrator
143
+ # =============================================================================
144
+
145
+
146
+ def build_pilot_orchestrator(
147
+ config_path: Path | None = None,
148
+ working_dir: Path | None = None,
149
+ overrides: list[str] | None = None,
150
+ instance_id: str | None = None,
151
+ instance_name: str | None = None,
152
+ lm_choice: str = "gpt5-verbose",
153
+ ) -> Any:
154
+ """
155
+ Build an orchestrator configured for pilot mode.
156
+
157
+ Pilot mode differences from regular orchestrator:
158
+ - Uses pilot system prompt (conversational, not autonomous)
159
+ - Only delegation tools (no bash, exit, list_agents, run_agent)
160
+ - LM selection based on user choice
161
+
162
+ Args:
163
+ config_path: Path to YAML config file
164
+ working_dir: Working directory (default: cwd)
165
+ overrides: CLI overrides (--set key=value)
166
+ instance_id: Unique ID for this instance
167
+ instance_name: Human-readable name for this instance
168
+ lm_choice: LM to use (gpt5-mini, gpt5, gpt5-verbose)
169
+
170
+ Returns:
171
+ Configured Orchestrator instance for pilot mode
172
+ """
173
+ from wbal.lm import GPT5Large, GPT5LargeVerbose, GPT5MiniTester
174
+
175
+ from zwarm.core.config import load_config
176
+ from zwarm.core.environment import OrchestratorEnv
177
+ from zwarm.orchestrator import Orchestrator
178
+ from zwarm.prompts import get_pilot_prompt
179
+
180
+ # Select LM based on choice
181
+ lm_map = {
182
+ "gpt5-mini": GPT5MiniTester,
183
+ "gpt5": GPT5Large,
184
+ "gpt5-verbose": GPT5LargeVerbose,
185
+ }
186
+ lm_class = lm_map.get(lm_choice, GPT5LargeVerbose)
187
+ lm = lm_class()
188
+
189
+ # Load configuration
190
+ config = load_config(
191
+ config_path=config_path,
192
+ overrides=overrides,
193
+ )
194
+
195
+ # Resolve working directory
196
+ working_dir = working_dir or Path.cwd()
197
+
198
+ # Generate instance ID if not provided
199
+ if instance_id is None:
200
+ instance_id = str(uuid4())
201
+
202
+ # Build pilot system prompt
203
+ system_prompt = get_pilot_prompt(working_dir=str(working_dir))
204
+
205
+ # Create lean orchestrator environment
206
+ env = OrchestratorEnv(
207
+ task="", # No task - pilot is conversational
208
+ working_dir=working_dir,
209
+ )
210
+
211
+ # Create orchestrator with ONLY delegation tools (no bash)
212
+ orchestrator = Orchestrator(
213
+ config=config,
214
+ working_dir=working_dir,
215
+ system_prompt=system_prompt,
216
+ maxSteps=config.orchestrator.max_steps,
217
+ env=env,
218
+ instance_id=instance_id,
219
+ instance_name=instance_name,
220
+ lm=lm,
221
+ # Only delegation tools - no bash
222
+ agent_tool_modules=["zwarm.tools.delegation"],
223
+ )
224
+
225
+ # Remove unwanted tools that come from YamlAgent/OpenAIWBAgent
226
+ # These are: exit, list_agents, run_agent
227
+ _remove_unwanted_tools(orchestrator)
228
+
229
+ return orchestrator
230
+
231
+
232
+ def _remove_unwanted_tools(orchestrator: Any) -> None:
233
+ """
234
+ Remove tools that aren't appropriate for pilot mode.
235
+
236
+ Removes:
237
+ - exit: Pilot doesn't auto-exit, user controls the session
238
+ - list_agents: No delegate subagents in pilot mode
239
+ - run_agent: No delegate subagents in pilot mode
240
+
241
+ This works by wrapping getToolDefinitions to filter out unwanted tools.
242
+ We use object.__setattr__ to bypass Pydantic's attribute checks.
243
+ """
244
+ import types
245
+
246
+ unwanted = {"exit", "list_agents", "run_agent"}
247
+
248
+ # Store original method
249
+ original_get_tools = orchestrator.getToolDefinitions
250
+
251
+ def filtered_get_tools(self):
252
+ """Wrapped getToolDefinitions that filters out unwanted tools."""
253
+ definitions, callables = original_get_tools()
254
+
255
+ # Filter definitions - handle both OpenAI formats
256
+ filtered_defs = []
257
+ for td in definitions:
258
+ # Check both possible name locations
259
+ name = td.get("name") or td.get("function", {}).get("name")
260
+ if name not in unwanted:
261
+ filtered_defs.append(td)
262
+
263
+ # Filter callables
264
+ filtered_callables = {
265
+ k: v for k, v in callables.items()
266
+ if k not in unwanted
267
+ }
268
+
269
+ return filtered_defs, filtered_callables
270
+
271
+ # Bind the new method to the instance, bypassing Pydantic
272
+ bound_method = types.MethodType(filtered_get_tools, orchestrator)
273
+ object.__setattr__(orchestrator, "getToolDefinitions", bound_method)
274
+
275
+
276
+ # =============================================================================
277
+ # Event Renderer (inspired by improver's run_agent.py)
278
+ # =============================================================================
279
+
280
+
281
+ class EventRenderer:
282
+ """
283
+ Streaming renderer for orchestrator events.
284
+
285
+ Handles different event types with nice formatting:
286
+ - Thinking/reasoning
287
+ - Tool calls (delegate, converse, check_session, etc.)
288
+ - Tool results
289
+ - Assistant messages
290
+ - Status messages
291
+ """
292
+
293
+ def __init__(self, *, show_reasoning: bool = True) -> None:
294
+ self._assistant_open = False
295
+ self._assistant_prefix = " "
296
+ self._thinking_open = False
297
+ self._had_output = False
298
+ self._show_reasoning = show_reasoning
299
+
300
+ # ANSI codes
301
+ self._dim = "\x1b[2m"
302
+ self._italic = "\x1b[3m"
303
+ self._green = "\x1b[32m"
304
+ self._yellow = "\x1b[33m"
305
+ self._cyan = "\x1b[36m"
306
+ self._reset = "\x1b[0m"
307
+ self._bold = "\x1b[1m"
308
+
309
+ # Tool call tracking
310
+ self._tool_names: Dict[str, str] = {}
311
+ self._tool_args: Dict[str, str] = {}
312
+
313
+ def _write(self, text: str) -> None:
314
+ sys.stdout.write(text)
315
+ sys.stdout.flush()
316
+
317
+ def _write_err(self, text: str) -> None:
318
+ sys.stderr.write(text)
319
+ sys.stderr.flush()
320
+
321
+ def _ensure_newline(self) -> None:
322
+ if self._assistant_open:
323
+ self._write("\n")
324
+ self._assistant_open = False
325
+
326
+ def _finish_thinking(self) -> None:
327
+ if self._thinking_open:
328
+ self._write("\n")
329
+ self._thinking_open = False
330
+
331
+ def _line(self, text: str) -> None:
332
+ self._ensure_newline()
333
+ self._write(f"{text}\n")
334
+
335
+ def _style(self, text: str, *, dim: bool = False, italic: bool = False,
336
+ green: bool = False, yellow: bool = False, cyan: bool = False,
337
+ bold: bool = False) -> str:
338
+ if not text:
339
+ return text
340
+ parts = []
341
+ if dim:
342
+ parts.append(self._dim)
343
+ if italic:
344
+ parts.append(self._italic)
345
+ if green:
346
+ parts.append(self._green)
347
+ if yellow:
348
+ parts.append(self._yellow)
349
+ if cyan:
350
+ parts.append(self._cyan)
351
+ if bold:
352
+ parts.append(self._bold)
353
+ parts.append(text)
354
+ parts.append(self._reset)
355
+ return "".join(parts)
356
+
357
+ def _truncate(self, text: str, max_len: int = 120) -> str:
358
+ trimmed = " ".join(text.split())
359
+ if len(trimmed) <= max_len:
360
+ return trimmed
361
+ return trimmed[: max_len - 3].rstrip() + "..."
362
+
363
+ # -------------------------------------------------------------------------
364
+ # Event handlers
365
+ # -------------------------------------------------------------------------
366
+
367
+ def status(self, message: str) -> None:
368
+ """Display a status message."""
369
+ self._finish_thinking()
370
+ self._line(message)
371
+
372
+ def thinking(self, text: str) -> None:
373
+ """Display thinking/reasoning (dim italic)."""
374
+ if not self._show_reasoning:
375
+ return
376
+ if not self._thinking_open:
377
+ self._ensure_newline()
378
+ self._write(self._style(" ", dim=True, italic=True))
379
+ self._thinking_open = True
380
+ formatted = text.replace("\n", f"\n ")
381
+ self._write(self._style(formatted, dim=True, italic=True))
382
+ self._had_output = True
383
+
384
+ def thinking_done(self) -> None:
385
+ """Finish thinking block."""
386
+ self._finish_thinking()
387
+
388
+ def assistant(self, text: str) -> None:
389
+ """Display assistant message."""
390
+ self._finish_thinking()
391
+ if not self._assistant_open:
392
+ self._ensure_newline()
393
+ self._write(self._style("• ", bold=True))
394
+ self._assistant_open = True
395
+ formatted = text.replace("\n", f"\n{self._assistant_prefix}")
396
+ self._write(formatted)
397
+ self._had_output = True
398
+
399
+ def assistant_done(self) -> None:
400
+ """Finish assistant block."""
401
+ self._ensure_newline()
402
+
403
+ def tool_call(self, name: str, args: Any, call_id: str = "") -> None:
404
+ """Display a tool call."""
405
+ self._finish_thinking()
406
+
407
+ # Track for result matching
408
+ if call_id:
409
+ self._tool_names[call_id] = name
410
+ self._tool_args[call_id] = str(args)
411
+
412
+ # Format args based on tool type
413
+ args_str = self._format_tool_args(name, args)
414
+
415
+ prefix = self._style("→ ", green=True)
416
+ tool_name = self._style(name, green=True, bold=True)
417
+
418
+ if args_str:
419
+ self._line(f"{prefix}{tool_name} {self._style(args_str, dim=True)}")
420
+ else:
421
+ self._line(f"{prefix}{tool_name}")
422
+
423
+ self._had_output = True
424
+
425
+ def tool_result(self, name: str, result: Any, call_id: str = "") -> None:
426
+ """Display a tool result (compact)."""
427
+ if result is None:
428
+ return
429
+
430
+ result_str = str(result)
431
+ if len(result_str) > 200:
432
+ result_str = result_str[:200] + "..."
433
+
434
+ # Show first few lines
435
+ lines = result_str.split("\n")
436
+ if len(lines) > 3:
437
+ lines = lines[:3] + ["..."]
438
+
439
+ for i, line in enumerate(lines):
440
+ prefix = " └ " if i == 0 else " "
441
+ self._line(f"{prefix}{self._style(line, dim=True)}")
442
+
443
+ def error(self, message: str) -> None:
444
+ """Display an error."""
445
+ self._ensure_newline()
446
+ self._write_err(f"{self._style('[error]', yellow=True, bold=True)} {message}\n")
447
+
448
+ def _format_tool_args(self, name: str, args: Any) -> str:
449
+ """Format tool arguments based on tool type."""
450
+ if args is None:
451
+ return ""
452
+
453
+ if isinstance(args, str):
454
+ try:
455
+ args = json.loads(args)
456
+ except (json.JSONDecodeError, TypeError):
457
+ return self._truncate(args)
458
+
459
+ if not isinstance(args, dict):
460
+ return self._truncate(str(args))
461
+
462
+ # Tool-specific formatting
463
+ if name == "delegate":
464
+ task = args.get("task", "")[:60]
465
+ mode = args.get("mode", "sync")
466
+ return f"({mode}): {task}..."
467
+ elif name == "converse":
468
+ session_id = args.get("session_id", "")[:8]
469
+ message = args.get("message", "")[:50]
470
+ return f"[{session_id}]: {message}..."
471
+ elif name == "check_session":
472
+ session_id = args.get("session_id", "")[:8]
473
+ return f"({session_id})"
474
+ elif name == "end_session":
475
+ session_id = args.get("session_id", "")[:8]
476
+ return f"({session_id})"
477
+ elif name == "list_sessions":
478
+ return ""
479
+ elif name == "bash":
480
+ cmd = args.get("command", "")[:80]
481
+ return f"$ {cmd}"
482
+ else:
483
+ # Generic: show first value
484
+ first_val = next(iter(args.values()), "") if args else ""
485
+ if isinstance(first_val, str) and len(first_val) > 40:
486
+ first_val = first_val[:40] + "..."
487
+ return str(first_val) if first_val else ""
488
+
489
+ # -------------------------------------------------------------------------
490
+ # State
491
+ # -------------------------------------------------------------------------
492
+
493
+ def reset_turn(self) -> None:
494
+ self._had_output = False
495
+
496
+ def had_output(self) -> bool:
497
+ return self._had_output
498
+
499
+ def set_show_reasoning(self, value: bool) -> None:
500
+ self._show_reasoning = value
501
+
502
+ def show_reasoning(self) -> bool:
503
+ return self._show_reasoning
504
+
505
+
506
+ # =============================================================================
507
+ # Command Parsing
508
+ # =============================================================================
509
+
510
+
511
+ def parse_command(text: str) -> Optional[List[str]]:
512
+ """Parse a :command from user input. Returns None if not a command."""
513
+ if not text.startswith(":"):
514
+ return None
515
+ cmdline = text[1:].strip()
516
+ if not cmdline:
517
+ return None
518
+ try:
519
+ return shlex.split(cmdline)
520
+ except ValueError:
521
+ return None
522
+
523
+
524
+ # =============================================================================
525
+ # Output Handler for Orchestrator Events
526
+ # =============================================================================
527
+
528
+
529
+ def make_event_handler(renderer: EventRenderer) -> Callable[[str], None]:
530
+ """
531
+ Create an output_handler that routes orchestrator output to the renderer.
532
+
533
+ The orchestrator emits text through env.output_handler. We parse it
534
+ to extract event types and route to appropriate renderer methods.
535
+ """
536
+ def handler(text: str) -> None:
537
+ if not text:
538
+ return
539
+
540
+ # Check for reasoning prefix (from OpenAIWBAgent)
541
+ if text.startswith("💭 "):
542
+ renderer.thinking(text[2:])
543
+ return
544
+
545
+ # Default: treat as assistant message
546
+ renderer.assistant(text)
547
+
548
+ return handler
549
+
550
+
551
+ # =============================================================================
552
+ # Step Execution with Event Capture
553
+ # =============================================================================
554
+
555
+
556
+ def extract_events_from_response(response: Any) -> Dict[str, List[Any]]:
557
+ """Extract structured events from an LLM response."""
558
+ events = {
559
+ "reasoning": [],
560
+ "messages": [],
561
+ "tool_calls": [],
562
+ }
563
+
564
+ output = getattr(response, "output", None)
565
+ if not output:
566
+ return events
567
+
568
+ for item in output:
569
+ item_type = getattr(item, "type", None)
570
+ if item_type == "reasoning":
571
+ events["reasoning"].append(item)
572
+ elif item_type == "message":
573
+ events["messages"].append(item)
574
+ elif item_type == "function_call":
575
+ events["tool_calls"].append(item)
576
+
577
+ return events
578
+
579
+
580
+ def execute_step_with_events(
581
+ orchestrator: Any,
582
+ renderer: EventRenderer,
583
+ ) -> tuple[List[tuple], bool]:
584
+ """
585
+ Execute one orchestrator step with event rendering.
586
+
587
+ Returns:
588
+ (tool_results, had_message) - tool call results and whether agent produced a message
589
+
590
+ Note: Watchers are not run in pilot mode - the user is the watcher,
591
+ actively guiding the orchestrator turn-by-turn.
592
+ """
593
+ had_message = False
594
+
595
+ # Execute perceive (updates environment observation)
596
+ orchestrator.perceive()
597
+
598
+ # Execute invoke (calls LLM)
599
+ response = orchestrator.invoke()
600
+
601
+ # Extract and render events from response
602
+ if response:
603
+ events = extract_events_from_response(response)
604
+
605
+ # Render reasoning
606
+ for reasoning in events["reasoning"]:
607
+ summary = getattr(reasoning, "summary", None)
608
+ if summary:
609
+ for item in summary:
610
+ text = getattr(item, "text", "")
611
+ if text:
612
+ renderer.thinking(text)
613
+ renderer.thinking_done()
614
+
615
+ # Render messages
616
+ for msg in events["messages"]:
617
+ content = getattr(msg, "content", [])
618
+ for part in content:
619
+ text = getattr(part, "text", "")
620
+ if text:
621
+ renderer.assistant(text)
622
+ had_message = True
623
+ renderer.assistant_done()
624
+
625
+ # Render tool calls (before execution)
626
+ for tc in events["tool_calls"]:
627
+ name = getattr(tc, "name", "?")
628
+ args = getattr(tc, "arguments", "")
629
+ call_id = getattr(tc, "call_id", "")
630
+ renderer.tool_call(name, args, call_id)
631
+
632
+ # Execute do (runs tool calls)
633
+ results = orchestrator.do()
634
+
635
+ # Increment step count (normally done by step() but we call perceive/invoke/do separately)
636
+ orchestrator._step_count += 1
637
+
638
+ # Render tool results
639
+ for tool_info, result in results:
640
+ name = tool_info.get("name", "?")
641
+ call_id = tool_info.get("call_id", "")
642
+ renderer.tool_result(name, result, call_id)
643
+
644
+ return results, had_message
645
+
646
+
647
+ def run_until_response(
648
+ orchestrator: Any,
649
+ renderer: EventRenderer,
650
+ max_steps: int = 20,
651
+ ) -> List[tuple]:
652
+ """
653
+ Run the orchestrator until it produces a message response.
654
+
655
+ Keeps stepping while the agent only produces tool calls.
656
+ Stops when:
657
+ - Agent produces a text message (returns to user)
658
+ - Max steps reached
659
+ - Stop condition triggered
660
+
661
+ This is wrapped as a weave.op to group all child calls per turn.
662
+
663
+ Args:
664
+ orchestrator: The orchestrator instance
665
+ renderer: Event renderer for output
666
+ max_steps: Safety limit on steps per turn
667
+
668
+ Returns:
669
+ All tool results from the turn
670
+ """
671
+ import weave
672
+
673
+ @weave.op(name="pilot_turn")
674
+ def _run_turn():
675
+ all_results = []
676
+ spinner = ChoogingSpinner()
677
+
678
+ for step in range(max_steps):
679
+ # Show spinner only for the first step (initial LLM call after user message)
680
+ # Subsequent steps have visible tool activity so no spinner needed
681
+ if step == 0:
682
+ spinner.start()
683
+
684
+ try:
685
+ results, had_message = execute_step_with_events(orchestrator, renderer)
686
+ finally:
687
+ if step == 0:
688
+ spinner.stop()
689
+
690
+ all_results.extend(results)
691
+
692
+ # Stop if agent produced a message
693
+ if had_message:
694
+ break
695
+
696
+ # Stop if orchestrator signals completion
697
+ if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
698
+ break
699
+
700
+ # Stop if no tool calls (agent is done but didn't message)
701
+ if not results:
702
+ break
703
+
704
+ return all_results
705
+
706
+ return _run_turn()
707
+
708
+
709
+ # =============================================================================
710
+ # Main REPL
711
+ # =============================================================================
712
+
713
+
714
+ def print_help(renderer: EventRenderer) -> None:
715
+ """Print help for pilot commands."""
716
+ lines = [
717
+ "",
718
+ "Commands:",
719
+ " :help Show this help",
720
+ " :status Show pilot status (tokens, cost, context)",
721
+ " :history [N|all] Show turn checkpoints",
722
+ " :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
723
+ " :sessions Show executor sessions",
724
+ " :reasoning [on|off] Toggle reasoning display",
725
+ " :quit / :exit Exit the pilot",
726
+ "",
727
+ "Multiline input:",
728
+ ' Start with """ and end with """ to enter multiple lines.',
729
+ ' Example: """',
730
+ " paste your",
731
+ " content here",
732
+ ' """',
733
+ "",
734
+ ]
735
+ for line in lines:
736
+ renderer.status(line)
737
+
738
+
739
+ def get_sessions_snapshot(orchestrator: Any) -> Dict[str, Any]:
740
+ """Get a serializable snapshot of session state."""
741
+ if hasattr(orchestrator, "_session_manager"):
742
+ sessions = orchestrator._session_manager.list_sessions()
743
+ return {
744
+ "sessions": [
745
+ {
746
+ "id": s.id,
747
+ "status": s.status.value,
748
+ "task": s.task[:100] if s.task else "",
749
+ "turns": s.turn,
750
+ "tokens": s.token_usage.get("total_tokens", 0),
751
+ "model": s.model,
752
+ }
753
+ for s in sessions
754
+ ]
755
+ }
756
+ return {"sessions": []}
757
+
758
+
759
+ def run_pilot(
760
+ orchestrator: Any,
761
+ *,
762
+ initial_task: Optional[str] = None,
763
+ ) -> None:
764
+ """
765
+ Run the pilot REPL.
766
+
767
+ Args:
768
+ orchestrator: A built orchestrator instance
769
+ initial_task: Optional initial task to start with
770
+ """
771
+ import weave
772
+
773
+ @weave.op(name="pilot_session")
774
+ def _run_pilot_session():
775
+ """Inner function wrapped with weave.op for clean logging."""
776
+ _run_pilot_repl(orchestrator, initial_task)
777
+
778
+ _run_pilot_session()
779
+
780
+
781
+ def _run_pilot_repl(
782
+ orchestrator: Any,
783
+ initial_task: Optional[str] = None,
784
+ ) -> None:
785
+ """
786
+ The actual REPL implementation.
787
+ """
788
+ renderer = EventRenderer(show_reasoning=True)
789
+ state = CheckpointManager()
790
+
791
+ # Silence the default output_handler - we render events directly in execute_step_with_events
792
+ # (Otherwise messages would be rendered twice)
793
+ if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "output_handler"):
794
+ orchestrator.env.output_handler = lambda x: None
795
+
796
+ # Welcome message
797
+ renderer.status("")
798
+ renderer.status("╭─────────────────────────────────────────╮")
799
+ renderer.status("│ zwarm pilot │")
800
+ renderer.status("│ Conversational orchestrator REPL │")
801
+ renderer.status("╰─────────────────────────────────────────╯")
802
+ renderer.status("")
803
+ renderer.status("Type :help for commands, :quit to exit.")
804
+ renderer.status("")
805
+
806
+ # Handle initial task if provided
807
+ if initial_task:
808
+ renderer.status(f"Initial task: {initial_task[:80]}...")
809
+ orchestrator.messages.append({
810
+ "role": "user",
811
+ "content": initial_task,
812
+ })
813
+
814
+ renderer.reset_turn()
815
+ results = run_until_response(orchestrator, renderer)
816
+
817
+ # Record checkpoint
818
+ state.record(
819
+ description=initial_task,
820
+ state={
821
+ "messages": orchestrator.messages,
822
+ "sessions_snapshot": get_sessions_snapshot(orchestrator),
823
+ "step_count": orchestrator._step_count,
824
+ },
825
+ metadata={
826
+ "step_count": orchestrator._step_count,
827
+ "message_count": len(orchestrator.messages),
828
+ },
829
+ )
830
+
831
+ cp = state.current()
832
+ if cp:
833
+ renderer.status("")
834
+ renderer.status(
835
+ f"[{cp.label}] "
836
+ f"step={cp.state['step_count']} "
837
+ f"messages={len(cp.state['messages'])}"
838
+ )
839
+ renderer.status(f":goto {cp.label} to return here")
840
+
841
+ # Main REPL loop
842
+ while True:
843
+ try:
844
+ user_input = input("> ").strip()
845
+ except EOFError:
846
+ sys.stdout.write("\n")
847
+ break
848
+ except KeyboardInterrupt:
849
+ sys.stdout.write("\n")
850
+ renderer.status("(Ctrl+C - type :quit to exit)")
851
+ continue
852
+
853
+ if not user_input:
854
+ continue
855
+
856
+ # Multiline input: if starts with """, collect until closing """
857
+ if user_input.startswith('"""'):
858
+ # Check if closing """ is on the same line (e.g., """hello""")
859
+ rest = user_input[3:]
860
+ if '"""' in rest:
861
+ # Single line with both opening and closing
862
+ user_input = rest[: rest.index('"""')]
863
+ else:
864
+ # Multiline mode - collect until we see """
865
+ lines = [rest] if rest else []
866
+ try:
867
+ while True:
868
+ line = input("... ")
869
+ if '"""' in line:
870
+ # Found closing quotes
871
+ idx = line.index('"""')
872
+ if idx > 0:
873
+ lines.append(line[:idx])
874
+ break
875
+ lines.append(line)
876
+ except EOFError:
877
+ renderer.error("Multiline input interrupted (EOF)")
878
+ continue
879
+ except KeyboardInterrupt:
880
+ sys.stdout.write("\n")
881
+ renderer.status("(Multiline cancelled)")
882
+ continue
883
+ user_input = "\n".join(lines)
884
+
885
+ if not user_input:
886
+ continue
887
+
888
+ # Parse command
889
+ cmd_parts = parse_command(user_input)
890
+ if cmd_parts:
891
+ cmd = cmd_parts[0].lower()
892
+ args = cmd_parts[1:]
893
+
894
+ # :quit / :exit
895
+ if cmd in ("quit", "exit", "q"):
896
+ renderer.status("Goodbye!")
897
+ break
898
+
899
+ # :help
900
+ if cmd == "help":
901
+ print_help(renderer)
902
+ continue
903
+
904
+ # :history
905
+ if cmd == "history":
906
+ limit = None
907
+ if args:
908
+ token = args[0].lower()
909
+ if token == "all":
910
+ limit = None # Show all
911
+ elif token.isdigit():
912
+ limit = int(token)
913
+ else:
914
+ limit = 10
915
+
916
+ entries = state.history(limit=limit)
917
+ if not entries:
918
+ renderer.status("No checkpoints yet.")
919
+ else:
920
+ renderer.status("")
921
+ for entry in entries:
922
+ marker = "*" if entry["is_current"] else " "
923
+ desc = entry["description"]
924
+ desc_preview = desc[:60] + "..." if len(desc) > 60 else desc
925
+ renderer.status(
926
+ f"{marker}[{entry['label']}] "
927
+ f"step={entry['metadata'].get('step_count', '?')} "
928
+ f"msgs={entry['metadata'].get('message_count', '?')} "
929
+ f"| {desc_preview}"
930
+ )
931
+ renderer.status("")
932
+ continue
933
+
934
+ # :goto
935
+ if cmd == "goto":
936
+ if not args:
937
+ renderer.error("Usage: :goto <turn|root> (e.g., :goto T1)")
938
+ continue
939
+
940
+ token = args[0]
941
+ if token.lower() == "root":
942
+ # Go to root (before any turns)
943
+ state.goto(0)
944
+ # Reset orchestrator to initial state
945
+ if hasattr(orchestrator, "messages"):
946
+ # Keep only system messages
947
+ orchestrator.messages = [
948
+ m for m in orchestrator.messages
949
+ if m.get("role") == "system"
950
+ ][:1]
951
+ renderer.status("Switched to root (initial state).")
952
+ continue
953
+
954
+ # Parse T1, T2, etc. or just numbers
955
+ turn_id = None
956
+ token_upper = token.upper()
957
+ if token_upper.startswith("T") and token_upper[1:].isdigit():
958
+ turn_id = int(token_upper[1:])
959
+ elif token.isdigit():
960
+ turn_id = int(token)
961
+
962
+ if turn_id is None:
963
+ renderer.error(f"Invalid turn: {token}")
964
+ continue
965
+
966
+ cp = state.goto(turn_id)
967
+ if cp is None:
968
+ renderer.error(f"Turn T{turn_id} not found.")
969
+ continue
970
+
971
+ # Restore orchestrator state
972
+ orchestrator.messages = copy.deepcopy(cp.state["messages"])
973
+ orchestrator._step_count = cp.state["step_count"]
974
+ renderer.status(f"Switched to {cp.label}.")
975
+ renderer.status(f" instruction: {cp.description[:60]}...")
976
+ renderer.status(f" messages: {len(cp.state['messages'])}")
977
+ continue
978
+
979
+ # :state / :status
980
+ if cmd in ("state", "status"):
981
+ renderer.status("")
982
+ renderer.status("[bold]Pilot Status[/]")
983
+ renderer.status("")
984
+
985
+ # Basic stats
986
+ step_count = getattr(orchestrator, "_step_count", 0)
987
+ msg_count = len(orchestrator.messages)
988
+ total_tokens = getattr(orchestrator, "_total_tokens", 0)
989
+
990
+ renderer.status(f" Steps: {step_count}")
991
+ renderer.status(f" Messages: {msg_count}")
992
+
993
+ # Checkpoint
994
+ cp = state.current()
995
+ turn_label = cp.label if cp else "root"
996
+ renderer.status(f" Turn: {turn_label}")
997
+
998
+ # Token usage and context
999
+ renderer.status("")
1000
+ renderer.status("[bold]Token Usage[/]")
1001
+ renderer.status("")
1002
+
1003
+ # Get model from orchestrator if available
1004
+ model = "gpt-5.1-codex" # Default
1005
+ if hasattr(orchestrator, "lm") and hasattr(orchestrator.lm, "model"):
1006
+ model = orchestrator.lm.model
1007
+ elif hasattr(orchestrator, "config"):
1008
+ model = getattr(orchestrator.config, "model", model)
1009
+
1010
+ context_window = get_context_window(model)
1011
+ context_bar = render_context_bar(total_tokens, context_window)
1012
+
1013
+ renderer.status(f" Model: {model}")
1014
+ renderer.status(f" Tokens: {total_tokens:,} / {context_window:,}")
1015
+ renderer.status(f" Context: {context_bar}")
1016
+
1017
+ # Cost estimate for orchestrator
1018
+ pricing = get_pricing(model)
1019
+ if pricing and total_tokens > 0:
1020
+ # Estimate assuming 30% input, 70% output (typical for agentic)
1021
+ est_input = int(total_tokens * 0.3)
1022
+ est_output = total_tokens - est_input
1023
+ cost = pricing.estimate_cost(est_input, est_output)
1024
+ renderer.status(f" Est Cost: [green]{format_cost(cost)}[/] (pilot LLM)")
1025
+
1026
+ # Executor sessions summary
1027
+ snapshot = get_sessions_snapshot(orchestrator)
1028
+ sessions = snapshot.get("sessions", [])
1029
+ if sessions:
1030
+ renderer.status("")
1031
+ renderer.status("[bold]Executor Sessions[/]")
1032
+ renderer.status("")
1033
+
1034
+ exec_tokens = 0
1035
+ exec_cost = 0.0
1036
+ running = 0
1037
+ completed = 0
1038
+
1039
+ for s in sessions:
1040
+ exec_tokens += s.get("tokens", 0)
1041
+ if s.get("status") == "running":
1042
+ running += 1
1043
+ elif s.get("status") == "completed":
1044
+ completed += 1
1045
+
1046
+ renderer.status(f" Sessions: {len(sessions)} ({running} running, {completed} done)")
1047
+ renderer.status(f" Tokens: {exec_tokens:,}")
1048
+
1049
+ renderer.status("")
1050
+ continue
1051
+
1052
+ # :sessions
1053
+ if cmd == "sessions":
1054
+ snapshot = get_sessions_snapshot(orchestrator)
1055
+ sessions = snapshot.get("sessions", [])
1056
+ if not sessions:
1057
+ renderer.status("No sessions.")
1058
+ else:
1059
+ renderer.status("")
1060
+ for s in sessions:
1061
+ renderer.status(
1062
+ f" [{s['id'][:8]}] {s['status']} "
1063
+ f"turns={s['turns']} | {s['task'][:50]}"
1064
+ )
1065
+ renderer.status("")
1066
+ continue
1067
+
1068
+ # :reasoning
1069
+ if cmd == "reasoning":
1070
+ if not args:
1071
+ current = "on" if renderer.show_reasoning() else "off"
1072
+ renderer.status(f"Reasoning display: {current}")
1073
+ continue
1074
+
1075
+ value = args[0].lower()
1076
+ if value in ("on", "true", "yes", "1"):
1077
+ renderer.set_show_reasoning(True)
1078
+ elif value in ("off", "false", "no", "0"):
1079
+ renderer.set_show_reasoning(False)
1080
+ else:
1081
+ renderer.error("Usage: :reasoning [on|off]")
1082
+ continue
1083
+
1084
+ current = "on" if renderer.show_reasoning() else "off"
1085
+ renderer.status(f"Reasoning display: {current}")
1086
+ continue
1087
+
1088
+ # Unknown command
1089
+ renderer.error(f"Unknown command: {cmd}")
1090
+ renderer.status("Type :help for available commands.")
1091
+ continue
1092
+
1093
+ # Not a command - send to orchestrator as instruction
1094
+ renderer.status("")
1095
+
1096
+ # Inject user message
1097
+ orchestrator.messages.append({
1098
+ "role": "user",
1099
+ "content": user_input,
1100
+ })
1101
+
1102
+ # Execute steps until agent responds with a message
1103
+ renderer.reset_turn()
1104
+ try:
1105
+ results = run_until_response(orchestrator, renderer)
1106
+ except Exception as e:
1107
+ renderer.error(f"Step failed: {e}")
1108
+ # Remove the user message on failure
1109
+ if orchestrator.messages and orchestrator.messages[-1].get("role") == "user":
1110
+ orchestrator.messages.pop()
1111
+ continue
1112
+
1113
+ # Record checkpoint
1114
+ state.record(
1115
+ description=user_input,
1116
+ state={
1117
+ "messages": orchestrator.messages,
1118
+ "sessions_snapshot": get_sessions_snapshot(orchestrator),
1119
+ "step_count": orchestrator._step_count,
1120
+ },
1121
+ metadata={
1122
+ "step_count": orchestrator._step_count,
1123
+ "message_count": len(orchestrator.messages),
1124
+ },
1125
+ )
1126
+
1127
+ # Show turn info
1128
+ cp = state.current()
1129
+ if cp:
1130
+ renderer.status("")
1131
+ renderer.status(
1132
+ f"[{cp.label}] "
1133
+ f"step={cp.state['step_count']} "
1134
+ f"messages={len(cp.state['messages'])}"
1135
+ )
1136
+ renderer.status(f":goto {cp.label} to return here, :history for timeline")
1137
+
1138
+ # Check stop condition
1139
+ if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
1140
+ renderer.status("")
1141
+ renderer.status("Orchestrator signaled completion.")
1142
+ break