zwarm 2.3.5__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/cli/pilot.py ADDED
@@ -0,0 +1,1240 @@
1
+ """
2
+ Pilot: Conversational REPL for the zwarm orchestrator.
3
+
4
+ A chatty interface where you guide the orchestrator turn-by-turn,
5
+ with time travel, checkpoints, and streaming event display.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import copy
11
+ import json
12
+ import shlex
13
+ import sys
14
+ import threading
15
+ import time
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Any, Callable, Dict, List, Optional
19
+ from uuid import uuid4
20
+
21
+ from rich.console import Console
22
+
23
+ from zwarm.core.checkpoints import CheckpointManager
24
+ from zwarm.core.costs import estimate_session_cost, format_cost, get_pricing
25
+
26
+ console = Console()
27
+
28
+
29
+ class ChoogingSpinner:
30
+ """
31
+ A spinner that displays "Chooching" while waiting, adding an 'o' every second.
32
+
33
+ Chooching → Choooching → Chooooching → ...
34
+ """
35
+
36
+ def __init__(self, base_word: str = "Chooching"):
37
+ self._stop_event = threading.Event()
38
+ self._thread: Optional[threading.Thread] = None
39
+ self._base = base_word
40
+ # Find where to insert extra 'o's (after "Ch" and before "ching")
41
+ # "Chooching" -> insert after index 2
42
+ self._prefix = "Ch"
43
+ self._suffix = "ching"
44
+ self._min_o = 2 # Start with "oo"
45
+
46
+ def _spin(self):
47
+ o_count = self._min_o
48
+ while not self._stop_event.is_set():
49
+ word = f"{self._prefix}{'o' * o_count}{self._suffix}"
50
+ # Write with carriage return to overwrite, dim styling
51
+ sys.stdout.write(f"\r\033[2m{word}\033[0m")
52
+ sys.stdout.flush()
53
+ o_count += 1
54
+ # Wait 1 second, but check for stop every 100ms
55
+ for _ in range(10):
56
+ if self._stop_event.is_set():
57
+ break
58
+ time.sleep(0.1)
59
+
60
+ def start(self):
61
+ """Start the spinner in a background thread."""
62
+ self._stop_event.clear()
63
+ self._thread = threading.Thread(target=self._spin, daemon=True)
64
+ self._thread.start()
65
+
66
+ def stop(self):
67
+ """Stop the spinner and clear the line."""
68
+ self._stop_event.set()
69
+ if self._thread:
70
+ self._thread.join(timeout=0.5)
71
+ # Clear the line
72
+ sys.stdout.write("\r\033[K")
73
+ sys.stdout.flush()
74
+
75
+ def __enter__(self):
76
+ self.start()
77
+ return self
78
+
79
+ def __exit__(self, *args):
80
+ self.stop()
81
+
82
+
83
+ # Context window sizes for different models (in tokens)
84
+ # These are for the ORCHESTRATOR LLM, not the executors
85
+ MODEL_CONTEXT_WINDOWS = {
86
+ # OpenAI models
87
+ "gpt-5.1-codex": 200_000,
88
+ "gpt-5.1-codex-mini": 200_000,
89
+ "gpt-5.1-codex-max": 400_000,
90
+ "gpt-5": 200_000,
91
+ "gpt-5-mini": 200_000,
92
+ "o3": 200_000,
93
+ "o3-mini": 200_000,
94
+ # Claude models (if used as orchestrator)
95
+ "claude-sonnet": 200_000,
96
+ "claude-opus": 200_000,
97
+ "claude-haiku": 200_000,
98
+ "sonnet": 200_000,
99
+ "opus": 200_000,
100
+ "haiku": 200_000,
101
+ # Fallback
102
+ "default": 128_000,
103
+ }
104
+
105
+
106
+ def get_context_window(model: str) -> int:
107
+ """Get context window size for a model."""
108
+ model_lower = model.lower()
109
+ for prefix, size in MODEL_CONTEXT_WINDOWS.items():
110
+ if model_lower.startswith(prefix):
111
+ return size
112
+ return MODEL_CONTEXT_WINDOWS["default"]
113
+
114
+
115
+ def render_context_bar(used: int, total: int, width: int = 30) -> str:
116
+ """
117
+ Render a visual context window usage bar.
118
+
119
+ Args:
120
+ used: Tokens used
121
+ total: Total context window
122
+ width: Bar width in characters
123
+
124
+ Returns:
125
+ Colored bar string like: [████████░░░░░░░░░░░░] 40%
126
+ """
127
+ if total <= 0:
128
+ return "[dim]?[/]"
129
+
130
+ pct = min(used / total, 1.0)
131
+ filled = int(pct * width)
132
+ empty = width - filled
133
+
134
+ # Color based on usage
135
+ if pct < 0.5:
136
+ color = "green"
137
+ elif pct < 0.75:
138
+ color = "yellow"
139
+ elif pct < 0.9:
140
+ color = "red"
141
+ else:
142
+ color = "red bold"
143
+
144
+ bar = f"[{color}]{'█' * filled}[/][dim]{'░' * empty}[/]"
145
+ pct_str = f"{pct * 100:.0f}%"
146
+
147
+ return f"{bar} {pct_str}"
148
+
149
+
150
+ # =============================================================================
151
+ # Build Pilot Orchestrator
152
+ # =============================================================================
153
+
154
+
155
+ def build_pilot_orchestrator(
156
+ config_path: Path | None = None,
157
+ working_dir: Path | None = None,
158
+ overrides: list[str] | None = None,
159
+ instance_id: str | None = None,
160
+ instance_name: str | None = None,
161
+ lm_choice: str = "gpt5-verbose",
162
+ ) -> Any:
163
+ """
164
+ Build an orchestrator configured for pilot mode.
165
+
166
+ Pilot mode differences from regular orchestrator:
167
+ - Uses pilot system prompt (conversational, not autonomous)
168
+ - Only delegation tools (no bash, exit, list_agents, run_agent)
169
+ - LM selection based on user choice
170
+
171
+ Args:
172
+ config_path: Path to YAML config file
173
+ working_dir: Working directory (default: cwd)
174
+ overrides: CLI overrides (--set key=value)
175
+ instance_id: Unique ID for this instance
176
+ instance_name: Human-readable name for this instance
177
+ lm_choice: LM to use (gpt5-mini, gpt5, gpt5-verbose)
178
+
179
+ Returns:
180
+ Configured Orchestrator instance for pilot mode
181
+ """
182
+ from wbal.lm import GPT5Large, GPT5LargeVerbose, GPT5MiniTester
183
+
184
+ from zwarm.core.config import load_config
185
+ from zwarm.core.environment import OrchestratorEnv
186
+ from zwarm.orchestrator import Orchestrator
187
+ from zwarm.prompts import get_pilot_prompt
188
+
189
+ # Select LM based on choice
190
+ lm_map = {
191
+ "gpt5-mini": GPT5MiniTester,
192
+ "gpt5": GPT5Large,
193
+ "gpt5-verbose": GPT5LargeVerbose,
194
+ }
195
+ lm_class = lm_map.get(lm_choice, GPT5LargeVerbose)
196
+ lm = lm_class()
197
+
198
+ # Load configuration from working_dir (not cwd!)
199
+ # This ensures config.toml and .env are loaded from the project being worked on
200
+ config = load_config(
201
+ config_path=config_path,
202
+ overrides=overrides,
203
+ working_dir=working_dir,
204
+ )
205
+
206
+ # Resolve working directory
207
+ working_dir = working_dir or Path.cwd()
208
+
209
+ # Generate instance ID if not provided
210
+ if instance_id is None:
211
+ instance_id = str(uuid4())
212
+
213
+ # Build pilot system prompt
214
+ system_prompt = get_pilot_prompt(working_dir=str(working_dir))
215
+
216
+ # Create lean orchestrator environment
217
+ env = OrchestratorEnv(
218
+ task="", # No task - pilot is conversational
219
+ working_dir=working_dir,
220
+ )
221
+
222
+ # Create orchestrator with ONLY delegation tools (no bash)
223
+ orchestrator = Orchestrator(
224
+ config=config,
225
+ working_dir=working_dir,
226
+ system_prompt=system_prompt,
227
+ maxSteps=config.orchestrator.max_steps,
228
+ env=env,
229
+ instance_id=instance_id,
230
+ instance_name=instance_name,
231
+ lm=lm,
232
+ # Only delegation tools - no bash
233
+ agent_tool_modules=["zwarm.tools.delegation"],
234
+ )
235
+
236
+ # Remove unwanted tools that come from YamlAgent/OpenAIWBAgent
237
+ # These are: exit, list_agents, run_agent
238
+ _remove_unwanted_tools(orchestrator)
239
+
240
+ return orchestrator
241
+
242
+
243
+ def _remove_unwanted_tools(orchestrator: Any) -> None:
244
+ """
245
+ Remove tools that aren't appropriate for pilot mode.
246
+
247
+ Removes:
248
+ - exit: Pilot doesn't auto-exit, user controls the session
249
+ - list_agents: No delegate subagents in pilot mode
250
+ - run_agent: No delegate subagents in pilot mode
251
+
252
+ This works by wrapping getToolDefinitions to filter out unwanted tools.
253
+ We use object.__setattr__ to bypass Pydantic's attribute checks.
254
+ """
255
+ import types
256
+
257
+ unwanted = {"exit", "list_agents", "run_agent"}
258
+
259
+ # Store original method
260
+ original_get_tools = orchestrator.getToolDefinitions
261
+
262
+ def filtered_get_tools(self):
263
+ """Wrapped getToolDefinitions that filters out unwanted tools."""
264
+ definitions, callables = original_get_tools()
265
+
266
+ # Filter definitions - handle both OpenAI formats
267
+ filtered_defs = []
268
+ for td in definitions:
269
+ # Check both possible name locations
270
+ name = td.get("name") or td.get("function", {}).get("name")
271
+ if name not in unwanted:
272
+ filtered_defs.append(td)
273
+
274
+ # Filter callables
275
+ filtered_callables = {
276
+ k: v for k, v in callables.items()
277
+ if k not in unwanted
278
+ }
279
+
280
+ return filtered_defs, filtered_callables
281
+
282
+ # Bind the new method to the instance, bypassing Pydantic
283
+ bound_method = types.MethodType(filtered_get_tools, orchestrator)
284
+ object.__setattr__(orchestrator, "getToolDefinitions", bound_method)
285
+
286
+
287
+ # =============================================================================
288
+ # Event Renderer (inspired by improver's run_agent.py)
289
+ # =============================================================================
290
+
291
+
292
+ class EventRenderer:
293
+ """
294
+ Streaming renderer for orchestrator events.
295
+
296
+ Handles different event types with nice formatting:
297
+ - Thinking/reasoning
298
+ - Tool calls (delegate, converse, check_session, etc.)
299
+ - Tool results
300
+ - Assistant messages
301
+ - Status messages
302
+ """
303
+
304
+ def __init__(self, *, show_reasoning: bool = True) -> None:
305
+ self._assistant_open = False
306
+ self._assistant_prefix = " "
307
+ self._thinking_open = False
308
+ self._had_output = False
309
+ self._show_reasoning = show_reasoning
310
+
311
+ # ANSI codes
312
+ self._dim = "\x1b[2m"
313
+ self._italic = "\x1b[3m"
314
+ self._green = "\x1b[32m"
315
+ self._yellow = "\x1b[33m"
316
+ self._cyan = "\x1b[36m"
317
+ self._reset = "\x1b[0m"
318
+ self._bold = "\x1b[1m"
319
+
320
+ # Tool call tracking
321
+ self._tool_names: Dict[str, str] = {}
322
+ self._tool_args: Dict[str, str] = {}
323
+
324
+ def _write(self, text: str) -> None:
325
+ sys.stdout.write(text)
326
+ sys.stdout.flush()
327
+
328
+ def _write_err(self, text: str) -> None:
329
+ sys.stderr.write(text)
330
+ sys.stderr.flush()
331
+
332
+ def _ensure_newline(self) -> None:
333
+ if self._assistant_open:
334
+ self._write("\n")
335
+ self._assistant_open = False
336
+
337
+ def _finish_thinking(self) -> None:
338
+ if self._thinking_open:
339
+ self._write("\n")
340
+ self._thinking_open = False
341
+
342
+ def _line(self, text: str) -> None:
343
+ self._ensure_newline()
344
+ self._write(f"{text}\n")
345
+
346
+ def _style(self, text: str, *, dim: bool = False, italic: bool = False,
347
+ green: bool = False, yellow: bool = False, cyan: bool = False,
348
+ bold: bool = False) -> str:
349
+ if not text:
350
+ return text
351
+ parts = []
352
+ if dim:
353
+ parts.append(self._dim)
354
+ if italic:
355
+ parts.append(self._italic)
356
+ if green:
357
+ parts.append(self._green)
358
+ if yellow:
359
+ parts.append(self._yellow)
360
+ if cyan:
361
+ parts.append(self._cyan)
362
+ if bold:
363
+ parts.append(self._bold)
364
+ parts.append(text)
365
+ parts.append(self._reset)
366
+ return "".join(parts)
367
+
368
+ def _truncate(self, text: str, max_len: int = 120) -> str:
369
+ trimmed = " ".join(text.split())
370
+ if len(trimmed) <= max_len:
371
+ return trimmed
372
+ return trimmed[: max_len - 3].rstrip() + "..."
373
+
374
+ # -------------------------------------------------------------------------
375
+ # Event handlers
376
+ # -------------------------------------------------------------------------
377
+
378
+ def status(self, message: str) -> None:
379
+ """Display a status message."""
380
+ self._finish_thinking()
381
+ self._line(message)
382
+
383
+ def thinking(self, text: str) -> None:
384
+ """Display thinking/reasoning (dim italic)."""
385
+ if not self._show_reasoning:
386
+ return
387
+ if not self._thinking_open:
388
+ self._ensure_newline()
389
+ self._write(self._style(" ", dim=True, italic=True))
390
+ self._thinking_open = True
391
+ formatted = text.replace("\n", f"\n ")
392
+ self._write(self._style(formatted, dim=True, italic=True))
393
+ self._had_output = True
394
+
395
+ def thinking_done(self) -> None:
396
+ """Finish thinking block."""
397
+ self._finish_thinking()
398
+
399
+ def assistant(self, text: str) -> None:
400
+ """Display assistant message."""
401
+ self._finish_thinking()
402
+ if not self._assistant_open:
403
+ self._ensure_newline()
404
+ self._write(self._style("• ", bold=True))
405
+ self._assistant_open = True
406
+ formatted = text.replace("\n", f"\n{self._assistant_prefix}")
407
+ self._write(formatted)
408
+ self._had_output = True
409
+
410
+ def assistant_done(self) -> None:
411
+ """Finish assistant block."""
412
+ self._ensure_newline()
413
+
414
+ def tool_call(self, name: str, args: Any, call_id: str = "") -> None:
415
+ """Display a tool call."""
416
+ self._finish_thinking()
417
+
418
+ # Track for result matching
419
+ if call_id:
420
+ self._tool_names[call_id] = name
421
+ self._tool_args[call_id] = str(args)
422
+
423
+ # Format args based on tool type
424
+ args_str = self._format_tool_args(name, args)
425
+
426
+ prefix = self._style("→ ", green=True)
427
+ tool_name = self._style(name, green=True, bold=True)
428
+
429
+ if args_str:
430
+ self._line(f"{prefix}{tool_name} {self._style(args_str, dim=True)}")
431
+ else:
432
+ self._line(f"{prefix}{tool_name}")
433
+
434
+ self._had_output = True
435
+
436
+ def tool_result(self, name: str, result: Any, call_id: str = "") -> None:
437
+ """Display a tool result (compact)."""
438
+ if result is None:
439
+ return
440
+
441
+ result_str = str(result)
442
+ if len(result_str) > 200:
443
+ result_str = result_str[:200] + "..."
444
+
445
+ # Show first few lines
446
+ lines = result_str.split("\n")
447
+ if len(lines) > 3:
448
+ lines = lines[:3] + ["..."]
449
+
450
+ for i, line in enumerate(lines):
451
+ prefix = " └ " if i == 0 else " "
452
+ self._line(f"{prefix}{self._style(line, dim=True)}")
453
+
454
+ def error(self, message: str) -> None:
455
+ """Display an error."""
456
+ self._ensure_newline()
457
+ self._write_err(f"{self._style('[error]', yellow=True, bold=True)} {message}\n")
458
+
459
+ def _format_tool_args(self, name: str, args: Any) -> str:
460
+ """Format tool arguments based on tool type."""
461
+ if args is None:
462
+ return ""
463
+
464
+ if isinstance(args, str):
465
+ try:
466
+ args = json.loads(args)
467
+ except (json.JSONDecodeError, TypeError):
468
+ return self._truncate(args)
469
+
470
+ if not isinstance(args, dict):
471
+ return self._truncate(str(args))
472
+
473
+ # Tool-specific formatting
474
+ if name == "delegate":
475
+ task = args.get("task", "")[:60]
476
+ mode = args.get("mode", "sync")
477
+ return f"({mode}): {task}..."
478
+ elif name == "converse":
479
+ session_id = args.get("session_id", "")[:8]
480
+ message = args.get("message", "")[:50]
481
+ return f"[{session_id}]: {message}..."
482
+ elif name == "check_session":
483
+ session_id = args.get("session_id", "")[:8]
484
+ return f"({session_id})"
485
+ elif name == "end_session":
486
+ session_id = args.get("session_id", "")[:8]
487
+ return f"({session_id})"
488
+ elif name == "list_sessions":
489
+ return ""
490
+ elif name == "bash":
491
+ cmd = args.get("command", "")[:80]
492
+ return f"$ {cmd}"
493
+ else:
494
+ # Generic: show first value
495
+ first_val = next(iter(args.values()), "") if args else ""
496
+ if isinstance(first_val, str) and len(first_val) > 40:
497
+ first_val = first_val[:40] + "..."
498
+ return str(first_val) if first_val else ""
499
+
500
+ # -------------------------------------------------------------------------
501
+ # State
502
+ # -------------------------------------------------------------------------
503
+
504
+ def reset_turn(self) -> None:
505
+ self._had_output = False
506
+
507
+ def had_output(self) -> bool:
508
+ return self._had_output
509
+
510
+ def set_show_reasoning(self, value: bool) -> None:
511
+ self._show_reasoning = value
512
+
513
+ def show_reasoning(self) -> bool:
514
+ return self._show_reasoning
515
+
516
+
517
+ # =============================================================================
518
+ # Command Parsing
519
+ # =============================================================================
520
+
521
+
522
+ def parse_command(text: str) -> Optional[List[str]]:
523
+ """Parse a :command from user input. Returns None if not a command."""
524
+ if not text.startswith(":"):
525
+ return None
526
+ cmdline = text[1:].strip()
527
+ if not cmdline:
528
+ return None
529
+ try:
530
+ return shlex.split(cmdline)
531
+ except ValueError:
532
+ return None
533
+
534
+
535
+ # =============================================================================
536
+ # Output Handler for Orchestrator Events
537
+ # =============================================================================
538
+
539
+
540
+ def make_event_handler(renderer: EventRenderer) -> Callable[[str], None]:
541
+ """
542
+ Create an output_handler that routes orchestrator output to the renderer.
543
+
544
+ The orchestrator emits text through env.output_handler. We parse it
545
+ to extract event types and route to appropriate renderer methods.
546
+ """
547
+ def handler(text: str) -> None:
548
+ if not text:
549
+ return
550
+
551
+ # Check for reasoning prefix (from OpenAIWBAgent)
552
+ if text.startswith("💭 "):
553
+ renderer.thinking(text[2:])
554
+ return
555
+
556
+ # Default: treat as assistant message
557
+ renderer.assistant(text)
558
+
559
+ return handler
560
+
561
+
562
+ # =============================================================================
563
+ # Step Execution with Event Capture
564
+ # =============================================================================
565
+
566
+
567
+ def extract_events_from_response(response: Any) -> Dict[str, List[Any]]:
568
+ """Extract structured events from an LLM response."""
569
+ events = {
570
+ "reasoning": [],
571
+ "messages": [],
572
+ "tool_calls": [],
573
+ }
574
+
575
+ output = getattr(response, "output", None)
576
+ if not output:
577
+ return events
578
+
579
+ for item in output:
580
+ item_type = getattr(item, "type", None)
581
+ if item_type == "reasoning":
582
+ events["reasoning"].append(item)
583
+ elif item_type == "message":
584
+ events["messages"].append(item)
585
+ elif item_type == "function_call":
586
+ events["tool_calls"].append(item)
587
+
588
+ return events
589
+
590
+
591
+ def execute_step_with_events(
592
+ orchestrator: Any,
593
+ renderer: EventRenderer,
594
+ ) -> tuple[List[tuple], bool]:
595
+ """
596
+ Execute one orchestrator step with event rendering.
597
+
598
+ Returns:
599
+ (tool_results, had_message) - tool call results and whether agent produced a message
600
+
601
+ Note: Watchers are not run in pilot mode - the user is the watcher,
602
+ actively guiding the orchestrator turn-by-turn.
603
+ """
604
+ had_message = False
605
+
606
+ # Update environment with current progress before perceive
607
+ # This ensures the observation has fresh step/token counts
608
+ if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "update_progress"):
609
+ total_tokens = getattr(orchestrator, "_total_tokens", 0)
610
+ executor_usage = orchestrator.get_executor_usage() if hasattr(orchestrator, "get_executor_usage") else {}
611
+ orchestrator.env.update_progress(
612
+ step_count=getattr(orchestrator, "_step_count", 0),
613
+ max_steps=getattr(orchestrator, "maxSteps", 50),
614
+ total_tokens=total_tokens,
615
+ executor_tokens=executor_usage.get("total_tokens", 0),
616
+ )
617
+
618
+ # Execute perceive (updates environment observation)
619
+ orchestrator.perceive()
620
+
621
+ # Execute invoke (calls LLM)
622
+ response = orchestrator.invoke()
623
+
624
+ # Track cumulative token usage from the API response
625
+ # (This mirrors what step() does in orchestrator.py)
626
+ if hasattr(orchestrator, "_last_response") and orchestrator._last_response:
627
+ last_response = orchestrator._last_response
628
+ if hasattr(last_response, "usage") and last_response.usage:
629
+ usage = last_response.usage
630
+ tokens_this_call = getattr(usage, "total_tokens", 0)
631
+ orchestrator._total_tokens = getattr(orchestrator, "_total_tokens", 0) + tokens_this_call
632
+
633
+ # Extract and render events from response
634
+ if response:
635
+ events = extract_events_from_response(response)
636
+
637
+ # Render reasoning
638
+ for reasoning in events["reasoning"]:
639
+ summary = getattr(reasoning, "summary", None)
640
+ if summary:
641
+ for item in summary:
642
+ text = getattr(item, "text", "")
643
+ if text:
644
+ renderer.thinking(text)
645
+ renderer.thinking_done()
646
+
647
+ # Render messages
648
+ for msg in events["messages"]:
649
+ content = getattr(msg, "content", [])
650
+ for part in content:
651
+ text = getattr(part, "text", "")
652
+ if text:
653
+ renderer.assistant(text)
654
+ had_message = True
655
+ renderer.assistant_done()
656
+
657
+ # Render tool calls (before execution)
658
+ for tc in events["tool_calls"]:
659
+ name = getattr(tc, "name", "?")
660
+ args = getattr(tc, "arguments", "")
661
+ call_id = getattr(tc, "call_id", "")
662
+ renderer.tool_call(name, args, call_id)
663
+
664
+ # Execute do (runs tool calls)
665
+ results = orchestrator.do()
666
+
667
+ # Increment step count (normally done by step() but we call perceive/invoke/do separately)
668
+ orchestrator._step_count += 1
669
+
670
+ # Render tool results
671
+ for tool_info, result in results:
672
+ name = tool_info.get("name", "?")
673
+ call_id = tool_info.get("call_id", "")
674
+ renderer.tool_result(name, result, call_id)
675
+
676
+ return results, had_message
677
+
678
+
679
+ def run_until_response(
680
+ orchestrator: Any,
681
+ renderer: EventRenderer,
682
+ max_steps: int = 60,
683
+ ) -> List[tuple]:
684
+ """
685
+ Run the orchestrator until it produces a message response.
686
+
687
+ Keeps stepping while the agent only produces tool calls.
688
+ Stops when:
689
+ - Agent produces a text message (returns to user)
690
+ - Max steps reached (configurable via orchestrator.max_steps_per_turn)
691
+ - Stop condition triggered
692
+
693
+ This is wrapped as a weave.op to group all child calls per turn.
694
+
695
+ Args:
696
+ orchestrator: The orchestrator instance
697
+ renderer: Event renderer for output
698
+ max_steps: Safety limit on steps per turn (default: 60)
699
+
700
+ Returns:
701
+ All tool results from the turn
702
+ """
703
+ import weave
704
+
705
+ @weave.op(name="pilot_turn")
706
+ def _run_turn():
707
+ all_results = []
708
+ spinner = ChoogingSpinner()
709
+
710
+ for step in range(max_steps):
711
+ # Show spinner only for the first step (initial LLM call after user message)
712
+ # Subsequent steps have visible tool activity so no spinner needed
713
+ if step == 0:
714
+ spinner.start()
715
+
716
+ try:
717
+ results, had_message = execute_step_with_events(orchestrator, renderer)
718
+ finally:
719
+ if step == 0:
720
+ spinner.stop()
721
+
722
+ all_results.extend(results)
723
+
724
+ # Stop if agent produced a message
725
+ if had_message:
726
+ break
727
+
728
+ # Stop if orchestrator signals completion
729
+ if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
730
+ break
731
+
732
+ # Stop if no tool calls (agent is done but didn't message)
733
+ if not results:
734
+ break
735
+
736
+ # Show session status at end of turn (if there are any sessions)
737
+ render_session_status(orchestrator, renderer)
738
+
739
+ return all_results
740
+
741
+ return _run_turn()
742
+
743
+
744
+ # =============================================================================
745
+ # Main REPL
746
+ # =============================================================================
747
+
748
+
749
+ def print_help(renderer: EventRenderer) -> None:
750
+ """Print help for pilot commands."""
751
+ lines = [
752
+ "",
753
+ "Commands:",
754
+ " :help Show this help",
755
+ " :status Show pilot status (tokens, cost, context)",
756
+ " :history [N|all] Show turn checkpoints",
757
+ " :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
758
+ " :sessions Show executor sessions",
759
+ " :reasoning [on|off] Toggle reasoning display",
760
+ " :save Save state (for later resume)",
761
+ " :quit / :exit Exit the pilot (auto-saves)",
762
+ "",
763
+ "Resume:",
764
+ " State is auto-saved after each turn. To resume a session:",
765
+ " $ zwarm pilot --resume --instance <instance_id>",
766
+ "",
767
+ "Multiline input:",
768
+ ' Start with """ and end with """ to enter multiple lines.',
769
+ ' Example: """',
770
+ " paste your",
771
+ " content here",
772
+ ' """',
773
+ "",
774
+ ]
775
+ for line in lines:
776
+ renderer.status(line)
777
+
778
+
779
+ def get_sessions_snapshot(orchestrator: Any) -> Dict[str, Any]:
780
+ """Get a serializable snapshot of session state."""
781
+ if hasattr(orchestrator, "_session_manager"):
782
+ sessions = orchestrator._session_manager.list_sessions()
783
+ return {
784
+ "sessions": [
785
+ {
786
+ "id": s.id,
787
+ "status": s.status.value,
788
+ "task": s.task[:100] if s.task else "",
789
+ "turns": s.turn,
790
+ "tokens": s.token_usage.get("total_tokens", 0),
791
+ "model": s.model,
792
+ }
793
+ for s in sessions
794
+ ]
795
+ }
796
+ return {"sessions": []}
797
+
798
+
799
+ def render_session_status(orchestrator: Any, renderer: EventRenderer) -> None:
800
+ """
801
+ Render a compact session status line if there are active sessions.
802
+
803
+ Shows: "Sessions: 2 running, 1 done, 0 failed"
804
+ Only displays if there are any sessions.
805
+ """
806
+ if not hasattr(orchestrator, "_session_manager"):
807
+ return
808
+
809
+ sessions = orchestrator._session_manager.list_sessions()
810
+ if not sessions:
811
+ return
812
+
813
+ running = sum(1 for s in sessions if s.status.value == "running")
814
+ completed = sum(1 for s in sessions if s.status.value == "completed")
815
+ failed = sum(1 for s in sessions if s.status.value == "failed")
816
+
817
+ # Build status line with colors
818
+ parts = []
819
+ if running > 0:
820
+ parts.append(f"[cyan]{running} running[/]")
821
+ if completed > 0:
822
+ parts.append(f"[green]{completed} done[/]")
823
+ if failed > 0:
824
+ parts.append(f"[red]{failed} failed[/]")
825
+
826
+ if parts:
827
+ status_line = ", ".join(parts)
828
+ console.print(f"[dim]Sessions:[/] {status_line}")
829
+
830
+
831
+ def run_pilot(
832
+ orchestrator: Any,
833
+ *,
834
+ initial_task: Optional[str] = None,
835
+ ) -> None:
836
+ """
837
+ Run the pilot REPL.
838
+
839
+ Args:
840
+ orchestrator: A built orchestrator instance
841
+ initial_task: Optional initial task to start with
842
+ """
843
+ import weave
844
+
845
+ @weave.op(name="pilot_session")
846
+ def _run_pilot_session():
847
+ """Inner function wrapped with weave.op for clean logging."""
848
+ _run_pilot_repl(orchestrator, initial_task)
849
+
850
+ _run_pilot_session()
851
+
852
+
853
+ def _run_pilot_repl(
854
+ orchestrator: Any,
855
+ initial_task: Optional[str] = None,
856
+ ) -> None:
857
+ """
858
+ The actual REPL implementation.
859
+ """
860
+ renderer = EventRenderer(show_reasoning=True)
861
+ state = CheckpointManager()
862
+
863
+ # Silence the default output_handler - we render events directly in execute_step_with_events
864
+ # (Otherwise messages would be rendered twice)
865
+ if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "output_handler"):
866
+ orchestrator.env.output_handler = lambda x: None
867
+
868
+ # Welcome message
869
+ renderer.status("")
870
+ renderer.status("╭─────────────────────────────────────────╮")
871
+ renderer.status("│ zwarm pilot │")
872
+ renderer.status("│ Conversational orchestrator REPL │")
873
+ renderer.status("╰─────────────────────────────────────────╯")
874
+ renderer.status("")
875
+ renderer.status("Type :help for commands, :quit to exit.")
876
+ renderer.status("")
877
+
878
+ # Handle initial task if provided
879
+ if initial_task:
880
+ renderer.status(f"Initial task: {initial_task[:80]}...")
881
+ orchestrator.messages.append({
882
+ "role": "user",
883
+ "content": initial_task,
884
+ })
885
+
886
+ renderer.reset_turn()
887
+ max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
888
+ results = run_until_response(orchestrator, renderer, max_steps=max_steps)
889
+
890
+ # Record checkpoint
891
+ state.record(
892
+ description=initial_task,
893
+ state={
894
+ "messages": orchestrator.messages,
895
+ "sessions_snapshot": get_sessions_snapshot(orchestrator),
896
+ "step_count": orchestrator._step_count,
897
+ },
898
+ metadata={
899
+ "step_count": orchestrator._step_count,
900
+ "message_count": len(orchestrator.messages),
901
+ },
902
+ )
903
+
904
+ cp = state.current()
905
+ if cp:
906
+ renderer.status("")
907
+ renderer.status(
908
+ f"[{cp.label}] "
909
+ f"step={cp.state['step_count']} "
910
+ f"messages={len(cp.state['messages'])}"
911
+ )
912
+ renderer.status(f":goto {cp.label} to return here")
913
+
914
+ # Main REPL loop
915
+ while True:
916
+ try:
917
+ user_input = input("> ").strip()
918
+ except EOFError:
919
+ sys.stdout.write("\n")
920
+ break
921
+ except KeyboardInterrupt:
922
+ sys.stdout.write("\n")
923
+ renderer.status("(Ctrl+C - type :quit to exit)")
924
+ continue
925
+
926
+ if not user_input:
927
+ continue
928
+
929
+ # Multiline input: if starts with """, collect until closing """
930
+ if user_input.startswith('"""'):
931
+ # Check if closing """ is on the same line (e.g., """hello""")
932
+ rest = user_input[3:]
933
+ if '"""' in rest:
934
+ # Single line with both opening and closing
935
+ user_input = rest[: rest.index('"""')]
936
+ else:
937
+ # Multiline mode - collect until we see """
938
+ lines = [rest] if rest else []
939
+ try:
940
+ while True:
941
+ line = input("... ")
942
+ if '"""' in line:
943
+ # Found closing quotes
944
+ idx = line.index('"""')
945
+ if idx > 0:
946
+ lines.append(line[:idx])
947
+ break
948
+ lines.append(line)
949
+ except EOFError:
950
+ renderer.error("Multiline input interrupted (EOF)")
951
+ continue
952
+ except KeyboardInterrupt:
953
+ sys.stdout.write("\n")
954
+ renderer.status("(Multiline cancelled)")
955
+ continue
956
+ user_input = "\n".join(lines)
957
+
958
+ if not user_input:
959
+ continue
960
+
961
+ # Parse command
962
+ cmd_parts = parse_command(user_input)
963
+ if cmd_parts:
964
+ cmd = cmd_parts[0].lower()
965
+ args = cmd_parts[1:]
966
+
967
+ # :quit / :exit
968
+ if cmd in ("quit", "exit", "q"):
969
+ # Save state before exiting
970
+ if hasattr(orchestrator, "save_state"):
971
+ orchestrator.save_state()
972
+ renderer.status("[dim]State saved.[/]")
973
+ renderer.status("Goodbye!")
974
+ break
975
+
976
+ # :help
977
+ if cmd == "help":
978
+ print_help(renderer)
979
+ continue
980
+
981
+ # :history
982
+ if cmd == "history":
983
+ limit = None
984
+ if args:
985
+ token = args[0].lower()
986
+ if token == "all":
987
+ limit = None # Show all
988
+ elif token.isdigit():
989
+ limit = int(token)
990
+ else:
991
+ limit = 10
992
+
993
+ entries = state.history(limit=limit)
994
+ if not entries:
995
+ renderer.status("No checkpoints yet.")
996
+ else:
997
+ renderer.status("")
998
+ for entry in entries:
999
+ marker = "*" if entry["is_current"] else " "
1000
+ desc = entry["description"]
1001
+ desc_preview = desc[:60] + "..." if len(desc) > 60 else desc
1002
+ renderer.status(
1003
+ f"{marker}[{entry['label']}] "
1004
+ f"step={entry['metadata'].get('step_count', '?')} "
1005
+ f"msgs={entry['metadata'].get('message_count', '?')} "
1006
+ f"| {desc_preview}"
1007
+ )
1008
+ renderer.status("")
1009
+ continue
1010
+
1011
+ # :goto
1012
+ if cmd == "goto":
1013
+ if not args:
1014
+ renderer.error("Usage: :goto <turn|root> (e.g., :goto T1)")
1015
+ continue
1016
+
1017
+ token = args[0]
1018
+ if token.lower() == "root":
1019
+ # Go to root (before any turns)
1020
+ state.goto(0)
1021
+ # Reset orchestrator to initial state
1022
+ if hasattr(orchestrator, "messages"):
1023
+ # Keep only system messages
1024
+ orchestrator.messages = [
1025
+ m for m in orchestrator.messages
1026
+ if m.get("role") == "system"
1027
+ ][:1]
1028
+ renderer.status("Switched to root (initial state).")
1029
+ continue
1030
+
1031
+ # Parse T1, T2, etc. or just numbers
1032
+ turn_id = None
1033
+ token_upper = token.upper()
1034
+ if token_upper.startswith("T") and token_upper[1:].isdigit():
1035
+ turn_id = int(token_upper[1:])
1036
+ elif token.isdigit():
1037
+ turn_id = int(token)
1038
+
1039
+ if turn_id is None:
1040
+ renderer.error(f"Invalid turn: {token}")
1041
+ continue
1042
+
1043
+ cp = state.goto(turn_id)
1044
+ if cp is None:
1045
+ renderer.error(f"Turn T{turn_id} not found.")
1046
+ continue
1047
+
1048
+ # Restore orchestrator state
1049
+ orchestrator.messages = copy.deepcopy(cp.state["messages"])
1050
+ orchestrator._step_count = cp.state["step_count"]
1051
+ renderer.status(f"Switched to {cp.label}.")
1052
+ renderer.status(f" instruction: {cp.description[:60]}...")
1053
+ renderer.status(f" messages: {len(cp.state['messages'])}")
1054
+ continue
1055
+
1056
+ # :state / :status
1057
+ if cmd in ("state", "status"):
1058
+ renderer.status("")
1059
+ renderer.status("[bold]Pilot Status[/]")
1060
+ renderer.status("")
1061
+
1062
+ # Basic stats
1063
+ step_count = getattr(orchestrator, "_step_count", 0)
1064
+ msg_count = len(orchestrator.messages)
1065
+ total_tokens = getattr(orchestrator, "_total_tokens", 0)
1066
+
1067
+ renderer.status(f" Steps: {step_count}")
1068
+ renderer.status(f" Messages: {msg_count}")
1069
+
1070
+ # Checkpoint
1071
+ cp = state.current()
1072
+ turn_label = cp.label if cp else "root"
1073
+ renderer.status(f" Turn: {turn_label}")
1074
+
1075
+ # Token usage and context
1076
+ renderer.status("")
1077
+ renderer.status("[bold]Token Usage[/]")
1078
+ renderer.status("")
1079
+
1080
+ # Get model from orchestrator if available
1081
+ model = "gpt-5.1-codex" # Default
1082
+ if hasattr(orchestrator, "lm") and hasattr(orchestrator.lm, "model"):
1083
+ model = orchestrator.lm.model
1084
+ elif hasattr(orchestrator, "config"):
1085
+ model = getattr(orchestrator.config, "model", model)
1086
+
1087
+ context_window = get_context_window(model)
1088
+ context_bar = render_context_bar(total_tokens, context_window)
1089
+
1090
+ renderer.status(f" Model: {model}")
1091
+ renderer.status(f" Tokens: {total_tokens:,} / {context_window:,}")
1092
+ renderer.status(f" Context: {context_bar}")
1093
+
1094
+ # Cost estimate for orchestrator
1095
+ pricing = get_pricing(model)
1096
+ if pricing and total_tokens > 0:
1097
+ # Estimate assuming 30% input, 70% output (typical for agentic)
1098
+ est_input = int(total_tokens * 0.3)
1099
+ est_output = total_tokens - est_input
1100
+ cost = pricing.estimate_cost(est_input, est_output)
1101
+ renderer.status(f" Est Cost: [green]{format_cost(cost)}[/] (pilot LLM)")
1102
+
1103
+ # Executor sessions summary
1104
+ snapshot = get_sessions_snapshot(orchestrator)
1105
+ sessions = snapshot.get("sessions", [])
1106
+ if sessions:
1107
+ renderer.status("")
1108
+ renderer.status("[bold]Executor Sessions[/]")
1109
+ renderer.status("")
1110
+
1111
+ exec_tokens = 0
1112
+ exec_cost = 0.0
1113
+ running = 0
1114
+ completed = 0
1115
+
1116
+ for s in sessions:
1117
+ exec_tokens += s.get("tokens", 0)
1118
+ if s.get("status") == "running":
1119
+ running += 1
1120
+ elif s.get("status") == "completed":
1121
+ completed += 1
1122
+
1123
+ renderer.status(f" Sessions: {len(sessions)} ({running} running, {completed} done)")
1124
+ renderer.status(f" Tokens: {exec_tokens:,}")
1125
+
1126
+ renderer.status("")
1127
+ continue
1128
+
1129
+ # :sessions
1130
+ if cmd == "sessions":
1131
+ snapshot = get_sessions_snapshot(orchestrator)
1132
+ sessions = snapshot.get("sessions", [])
1133
+ if not sessions:
1134
+ renderer.status("No sessions.")
1135
+ else:
1136
+ renderer.status("")
1137
+ for s in sessions:
1138
+ renderer.status(
1139
+ f" [{s['id'][:8]}] {s['status']} "
1140
+ f"turns={s['turns']} | {s['task'][:50]}"
1141
+ )
1142
+ renderer.status("")
1143
+ continue
1144
+
1145
+ # :reasoning
1146
+ if cmd == "reasoning":
1147
+ if not args:
1148
+ current = "on" if renderer.show_reasoning() else "off"
1149
+ renderer.status(f"Reasoning display: {current}")
1150
+ continue
1151
+
1152
+ value = args[0].lower()
1153
+ if value in ("on", "true", "yes", "1"):
1154
+ renderer.set_show_reasoning(True)
1155
+ elif value in ("off", "false", "no", "0"):
1156
+ renderer.set_show_reasoning(False)
1157
+ else:
1158
+ renderer.error("Usage: :reasoning [on|off]")
1159
+ continue
1160
+
1161
+ current = "on" if renderer.show_reasoning() else "off"
1162
+ renderer.status(f"Reasoning display: {current}")
1163
+ continue
1164
+
1165
+ # :save
1166
+ if cmd == "save":
1167
+ if hasattr(orchestrator, "save_state"):
1168
+ orchestrator.save_state()
1169
+ instance_id = getattr(orchestrator, "instance_id", None)
1170
+ if instance_id:
1171
+ renderer.status(f"[green]✓[/] State saved (instance: {instance_id[:8]})")
1172
+ renderer.status(f" [dim]Resume with: zwarm pilot --resume --instance {instance_id[:8]}[/]")
1173
+ else:
1174
+ renderer.status("[green]✓[/] State saved")
1175
+ else:
1176
+ renderer.error("State saving not available")
1177
+ continue
1178
+
1179
+ # Unknown command
1180
+ renderer.error(f"Unknown command: {cmd}")
1181
+ renderer.status("Type :help for available commands.")
1182
+ continue
1183
+
1184
+ # Not a command - send to orchestrator as instruction
1185
+ renderer.status("")
1186
+
1187
+ # Inject user message
1188
+ orchestrator.messages.append({
1189
+ "role": "user",
1190
+ "content": user_input,
1191
+ })
1192
+
1193
+ # Execute steps until agent responds with a message
1194
+ renderer.reset_turn()
1195
+ max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
1196
+ try:
1197
+ results = run_until_response(orchestrator, renderer, max_steps=max_steps)
1198
+ except Exception as e:
1199
+ renderer.error(f"Step failed: {e}")
1200
+ # Remove the user message on failure
1201
+ if orchestrator.messages and orchestrator.messages[-1].get("role") == "user":
1202
+ orchestrator.messages.pop()
1203
+ continue
1204
+
1205
+ # Record checkpoint
1206
+ state.record(
1207
+ description=user_input,
1208
+ state={
1209
+ "messages": orchestrator.messages,
1210
+ "sessions_snapshot": get_sessions_snapshot(orchestrator),
1211
+ "step_count": orchestrator._step_count,
1212
+ },
1213
+ metadata={
1214
+ "step_count": orchestrator._step_count,
1215
+ "message_count": len(orchestrator.messages),
1216
+ },
1217
+ )
1218
+
1219
+ # Save state for resume capability
1220
+ if hasattr(orchestrator, "save_state"):
1221
+ orchestrator.save_state()
1222
+
1223
+ # Show turn info
1224
+ cp = state.current()
1225
+ if cp:
1226
+ renderer.status("")
1227
+ renderer.status(
1228
+ f"[{cp.label}] "
1229
+ f"step={cp.state['step_count']} "
1230
+ f"messages={len(cp.state['messages'])}"
1231
+ )
1232
+ renderer.status(f":goto {cp.label} to return here, :history for timeline")
1233
+
1234
+ # Check stop condition
1235
+ if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
1236
+ renderer.status("")
1237
+ renderer.status("Orchestrator signaled completion.")
1238
+ if hasattr(orchestrator, "save_state"):
1239
+ orchestrator.save_state()
1240
+ break