zwarm 3.4.0__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/cli/main.py CHANGED
@@ -299,7 +299,8 @@ def pilot(
299
299
  config: Annotated[Optional[Path], typer.Option("--config", "-c", help="Path to config YAML")] = None,
300
300
  overrides: Annotated[Optional[list[str]], typer.Option("--set", help="Override config (key=value)")] = None,
301
301
  working_dir: Annotated[Path, typer.Option("--working-dir", "-w", help="Working directory")] = Path("."),
302
- instance: Annotated[Optional[str], typer.Option("--instance", "-i", help="Instance ID (for isolation)")] = None,
302
+ resume: Annotated[bool, typer.Option("--resume", help="Resume from previous state")] = False,
303
+ instance: Annotated[Optional[str], typer.Option("--instance", "-i", help="Instance ID (for isolation/resume)")] = None,
303
304
  instance_name: Annotated[Optional[str], typer.Option("--name", "-n", help="Human-readable instance name")] = None,
304
305
  model: Annotated[PilotLM, typer.Option("--model", "-m", help="LM to use")] = PilotLM.gpt5_verbose,
305
306
  ):
@@ -341,19 +342,30 @@ def pilot(
341
342
 
342
343
  [dim]# Named instance[/]
343
344
  $ zwarm pilot --name my-feature
345
+
346
+ [dim]# Resume a previous session[/]
347
+ $ zwarm pilot --resume --instance abc123
344
348
  """
345
349
  from zwarm.cli.pilot import run_pilot, build_pilot_orchestrator
346
350
 
347
351
  # Resolve task (optional for pilot)
348
352
  resolved_task = _resolve_task(task, task_file)
349
353
 
350
- console.print(f"[bold]Starting pilot session...[/]")
354
+ # Validate resume requirements
355
+ if resume and not instance:
356
+ console.print("[red]Error:[/] --resume requires --instance to specify which session to resume")
357
+ console.print(" [dim]Use 'zwarm instances' to list available instances[/]")
358
+ raise typer.Exit(1)
359
+
360
+ console.print(f"[bold]{'Resuming' if resume else 'Starting'} pilot session...[/]")
351
361
  console.print(f" Working dir: {working_dir.absolute()}")
352
362
  console.print(f" Model: {model.value}")
353
363
  if resolved_task:
354
364
  console.print(f" Initial task: {resolved_task[:60]}...")
355
365
  if instance:
356
366
  console.print(f" Instance: {instance}" + (f" ({instance_name})" if instance_name else ""))
367
+ if resume:
368
+ console.print(f" [yellow]Resuming from saved state...[/]")
357
369
  console.print()
358
370
 
359
371
  orchestrator = None
@@ -371,6 +383,12 @@ def pilot(
371
383
  if orchestrator.instance_id and not instance:
372
384
  console.print(f" [dim]Instance: {orchestrator.instance_id[:8]}[/]")
373
385
 
386
+ # Resume from saved state if requested
387
+ if resume:
388
+ orchestrator.load_state()
389
+ msg_count = len(orchestrator.messages)
390
+ console.print(f" [green]✓[/] Resumed with {msg_count} messages")
391
+
374
392
  # Run the pilot REPL
375
393
  run_pilot(orchestrator, initial_task=resolved_task)
376
394
 
@@ -917,6 +935,23 @@ def init(
917
935
  codex_toml_path.write_text(codex_content)
918
936
  console.print(f" [green]✓[/] Created .zwarm/codex.toml")
919
937
 
938
+ # Create claude.toml for isolated Claude Code configuration
939
+ claude_toml_path = state_dir / "claude.toml"
940
+ write_claude_toml = True
941
+ if claude_toml_path.exists():
942
+ if not non_interactive:
943
+ overwrite_claude = typer.confirm(" .zwarm/claude.toml exists. Overwrite?", default=False)
944
+ if not overwrite_claude:
945
+ write_claude_toml = False
946
+ console.print(" [dim]Skipping claude.toml[/]")
947
+ else:
948
+ write_claude_toml = False # Don't overwrite in non-interactive mode
949
+
950
+ if write_claude_toml:
951
+ claude_content = _generate_claude_toml(model="sonnet")
952
+ claude_toml_path.write_text(claude_content)
953
+ console.print(f" [green]✓[/] Created .zwarm/claude.toml")
954
+
920
955
  # Create zwarm.yaml
921
956
  if create_project_config:
922
957
  if zwarm_yaml_path.exists() and not non_interactive:
@@ -959,41 +994,87 @@ def _generate_config_toml(
959
994
  adapter: str = "codex_mcp",
960
995
  watchers: list[str] | None = None,
961
996
  ) -> str:
962
- """Generate config.toml content."""
997
+ """Generate config.toml content with all options at their defaults."""
963
998
  watchers = watchers or []
964
999
 
965
1000
  lines = [
966
1001
  "# zwarm configuration",
967
1002
  "# Generated by 'zwarm init'",
1003
+ "# All values shown are defaults - uncomment and modify as needed",
968
1004
  "",
1005
+ "# ============================================================================",
1006
+ "# Weave Integration (optional tracing/observability)",
1007
+ "# ============================================================================",
969
1008
  "[weave]",
970
1009
  ]
971
1010
 
972
1011
  if weave_project:
973
1012
  lines.append(f'project = "{weave_project}"')
974
1013
  else:
975
- lines.append("# project = \"your-entity/your-project\" # Uncomment to enable Weave tracing")
1014
+ lines.append('# project = "your-entity/your-project" # Uncomment to enable Weave tracing')
976
1015
 
977
1016
  lines.extend([
1017
+ "enabled = true",
978
1018
  "",
1019
+ "# ============================================================================",
1020
+ "# Orchestrator Settings",
1021
+ "# ============================================================================",
979
1022
  "[orchestrator]",
980
- "max_steps = 50",
1023
+ '# lm = "gpt-5-mini" # LLM for orchestrator (gpt-5-mini, gpt-5, claude-sonnet-4)',
1024
+ "max_steps = 50 # Max steps for orchestrate command",
1025
+ "max_steps_per_turn = 60 # Max steps per turn in pilot mode",
1026
+ "parallel_delegations = 4 # Max concurrent delegations",
1027
+ '# prompt = "path/to/prompt.yaml" # Custom prompt file (optional)',
1028
+ '# allowed_dirs = ["*"] # Directories agent can delegate to (default: working_dir only)',
1029
+ "",
1030
+ "# Context window compaction (prevents overflow on long tasks)",
1031
+ "[orchestrator.compaction]",
1032
+ "enabled = true",
1033
+ "max_tokens = 100000 # Trigger compaction above this",
1034
+ "threshold_pct = 0.85 # Compact when at this % of max_tokens",
1035
+ "target_pct = 0.7 # Target this % after compaction",
1036
+ "keep_first_n = 2 # Always keep first N messages (system + task)",
1037
+ "keep_last_n = 10 # Always keep last N messages (recent context)",
981
1038
  "",
1039
+ "# ============================================================================",
1040
+ "# Executor Settings (codex agent configuration)",
1041
+ "# ============================================================================",
982
1042
  "[executor]",
983
- f'adapter = "{adapter}"',
984
- "# model = \"\" # Optional model override",
985
- "# web_search = false # Enable web search for delegated sessions",
1043
+ f'adapter = "{adapter}" # codex_mcp | codex_exec | claude_code',
1044
+ '# model = "gpt-5.1-codex-mini" # Model for delegated sessions (uses codex.toml default if not set)',
1045
+ 'sandbox = "workspace-write" # read-only | workspace-write | danger-full-access',
1046
+ "timeout = 3600 # Session timeout in seconds",
1047
+ 'reasoning_effort = "high" # low | medium | high',
986
1048
  "",
1049
+ "# ============================================================================",
1050
+ "# Watchers (automated monitoring and nudges)",
1051
+ "# ============================================================================",
987
1052
  "[watchers]",
988
- f"enabled = {watchers}",
1053
+ f"enabled = {str(bool(watchers)).lower()}",
1054
+ 'message_role = "user" # Role for nudge messages: user | assistant | system',
989
1055
  "",
990
- "# Watcher-specific configuration",
991
- "# [watchers.budget]",
992
- "# max_steps = 50",
1056
+ "# Default watchers: progress, budget, delegation_reminder",
1057
+ "# Uncomment below to customize:",
1058
+ "",
1059
+ "# [[watchers.watchers]]",
1060
+ '# name = "progress"',
1061
+ "# enabled = true",
1062
+ "",
1063
+ "# [[watchers.watchers]]",
1064
+ '# name = "budget"',
1065
+ "# enabled = true",
1066
+ "# [watchers.watchers.config]",
1067
+ "# max_sessions = 10",
993
1068
  "# warn_at_percent = 80",
994
1069
  "",
995
- "# [watchers.pattern]",
996
- "# patterns = [\"DROP TABLE\", \"rm -rf\"]",
1070
+ "# [[watchers.watchers]]",
1071
+ '# name = "delegation_reminder"',
1072
+ "# enabled = true",
1073
+ "",
1074
+ "# ============================================================================",
1075
+ "# State Directory",
1076
+ "# ============================================================================",
1077
+ '# state_dir = ".zwarm" # Where to store session data',
997
1078
  "",
998
1079
  ])
999
1080
 
@@ -1042,6 +1123,38 @@ def _generate_codex_toml(
1042
1123
  return "\n".join(lines)
1043
1124
 
1044
1125
 
1126
+ def _generate_claude_toml(
1127
+ model: str = "sonnet",
1128
+ ) -> str:
1129
+ """
1130
+ Generate claude.toml for isolated Claude Code configuration.
1131
+
1132
+ This file is parsed by zwarm and settings are passed to claude via CLI flags.
1133
+ Each .zwarm directory has its own claude config.
1134
+ """
1135
+ lines = [
1136
+ "# Claude Code configuration for zwarm",
1137
+ "# zwarm parses this file and passes settings to claude via CLI flags",
1138
+ "# Each .zwarm dir has its own config",
1139
+ "# Generated by 'zwarm init'",
1140
+ "",
1141
+ "# Model settings",
1142
+ f'model = "{model}" # sonnet | opus | haiku',
1143
+ "",
1144
+ "# DANGER MODE - bypasses all permission checks",
1145
+ "# Set to true to use --dangerously-skip-permissions",
1146
+ "full_danger = true",
1147
+ "",
1148
+ "# Note: Claude Code uses different CLI flags than Codex",
1149
+ "# Common options:",
1150
+ "# --model <model> Model to use (sonnet, opus, haiku)",
1151
+ "# --add-dir <path> Additional directories to allow",
1152
+ "# --allowed-tools <tools> Restrict available tools",
1153
+ "",
1154
+ ]
1155
+ return "\n".join(lines)
1156
+
1157
+
1045
1158
  def _generate_zwarm_yaml(
1046
1159
  description: str = "",
1047
1160
  context: str = "",
zwarm/cli/pilot.py CHANGED
@@ -81,14 +81,23 @@ class ChoogingSpinner:
81
81
 
82
82
 
83
83
  # Context window sizes for different models (in tokens)
84
+ # These are for the ORCHESTRATOR LLM, not the executors
84
85
  MODEL_CONTEXT_WINDOWS = {
86
+ # OpenAI models
85
87
  "gpt-5.1-codex": 200_000,
86
88
  "gpt-5.1-codex-mini": 200_000,
87
89
  "gpt-5.1-codex-max": 400_000,
88
90
  "gpt-5": 200_000,
89
91
  "gpt-5-mini": 200_000,
90
- "claude-sonnet-4": 200_000,
91
- "claude-opus-4": 200_000,
92
+ "o3": 200_000,
93
+ "o3-mini": 200_000,
94
+ # Claude models (if used as orchestrator)
95
+ "claude-sonnet": 200_000,
96
+ "claude-opus": 200_000,
97
+ "claude-haiku": 200_000,
98
+ "sonnet": 200_000,
99
+ "opus": 200_000,
100
+ "haiku": 200_000,
92
101
  # Fallback
93
102
  "default": 128_000,
94
103
  }
@@ -204,11 +213,12 @@ def build_pilot_orchestrator(
204
213
  # Build pilot system prompt
205
214
  system_prompt = get_pilot_prompt(working_dir=str(working_dir))
206
215
 
207
- # Create lean orchestrator environment
216
+ # Create lean orchestrator environment (pilot mode = simpler observation)
208
217
  env = OrchestratorEnv(
209
218
  task="", # No task - pilot is conversational
210
219
  working_dir=working_dir,
211
220
  )
221
+ env.set_pilot_mode(True) # Human is in control, use lean observation
212
222
 
213
223
  # Create orchestrator with ONLY delegation tools (no bash)
214
224
  orchestrator = Orchestrator(
@@ -612,6 +622,15 @@ def execute_step_with_events(
612
622
  # Execute invoke (calls LLM)
613
623
  response = orchestrator.invoke()
614
624
 
625
+ # Track cumulative token usage from the API response
626
+ # (This mirrors what step() does in orchestrator.py)
627
+ if hasattr(orchestrator, "_last_response") and orchestrator._last_response:
628
+ last_response = orchestrator._last_response
629
+ if hasattr(last_response, "usage") and last_response.usage:
630
+ usage = last_response.usage
631
+ tokens_this_call = getattr(usage, "total_tokens", 0)
632
+ orchestrator._total_tokens = getattr(orchestrator, "_total_tokens", 0) + tokens_this_call
633
+
615
634
  # Extract and render events from response
616
635
  if response:
617
636
  events = extract_events_from_response(response)
@@ -739,7 +758,12 @@ def print_help(renderer: EventRenderer) -> None:
739
758
  " :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
740
759
  " :sessions Show executor sessions",
741
760
  " :reasoning [on|off] Toggle reasoning display",
742
- " :quit / :exit Exit the pilot",
761
+ " :save Save state (for later resume)",
762
+ " :quit / :exit Exit the pilot (auto-saves)",
763
+ "",
764
+ "Resume:",
765
+ " State is auto-saved after each turn. To resume a session:",
766
+ " $ zwarm pilot --resume --instance <instance_id>",
743
767
  "",
744
768
  "Multiline input:",
745
769
  ' Start with """ and end with """ to enter multiple lines.',
@@ -943,6 +967,10 @@ def _run_pilot_repl(
943
967
 
944
968
  # :quit / :exit
945
969
  if cmd in ("quit", "exit", "q"):
970
+ # Save state before exiting
971
+ if hasattr(orchestrator, "save_state"):
972
+ orchestrator.save_state()
973
+ renderer.status("[dim]State saved.[/]")
946
974
  renderer.status("Goodbye!")
947
975
  break
948
976
 
@@ -1135,6 +1163,20 @@ def _run_pilot_repl(
1135
1163
  renderer.status(f"Reasoning display: {current}")
1136
1164
  continue
1137
1165
 
1166
+ # :save
1167
+ if cmd == "save":
1168
+ if hasattr(orchestrator, "save_state"):
1169
+ orchestrator.save_state()
1170
+ instance_id = getattr(orchestrator, "instance_id", None)
1171
+ if instance_id:
1172
+ renderer.status(f"[green]✓[/] State saved (instance: {instance_id[:8]})")
1173
+ renderer.status(f" [dim]Resume with: zwarm pilot --resume --instance {instance_id[:8]}[/]")
1174
+ else:
1175
+ renderer.status("[green]✓[/] State saved")
1176
+ else:
1177
+ renderer.error("State saving not available")
1178
+ continue
1179
+
1138
1180
  # Unknown command
1139
1181
  renderer.error(f"Unknown command: {cmd}")
1140
1182
  renderer.status("Type :help for available commands.")
@@ -1175,6 +1217,10 @@ def _run_pilot_repl(
1175
1217
  },
1176
1218
  )
1177
1219
 
1220
+ # Save state for resume capability
1221
+ if hasattr(orchestrator, "save_state"):
1222
+ orchestrator.save_state()
1223
+
1178
1224
  # Show turn info
1179
1225
  cp = state.current()
1180
1226
  if cp:
@@ -1190,4 +1236,6 @@ def _run_pilot_repl(
1190
1236
  if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
1191
1237
  renderer.status("")
1192
1238
  renderer.status("Orchestrator signaled completion.")
1239
+ if hasattr(orchestrator, "save_state"):
1240
+ orchestrator.save_state()
1193
1241
  break
zwarm/core/costs.py CHANGED
@@ -1,109 +1,42 @@
1
1
  """
2
2
  Token cost estimation for LLM models.
3
3
 
4
- Pricing data is hardcoded and may become stale. Last updated: 2026-01.
4
+ This module re-exports from the centralized model registry.
5
+ For adding new models, edit: zwarm/core/registry.py
5
6
 
6
- Sources:
7
- - https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
8
- - https://pricepertoken.com/pricing-page/model/openai-codex-mini
7
+ Backwards-compatible API preserved for existing code.
9
8
  """
10
9
 
11
10
  from __future__ import annotations
12
11
 
13
- from dataclasses import dataclass
14
- from typing import Any
15
-
16
-
17
- @dataclass
18
- class ModelPricing:
19
- """Pricing for a model in $ per million tokens."""
20
- input_per_million: float
21
- output_per_million: float
22
- cached_input_per_million: float | None = None # Some models have cached input discount
23
-
24
- def estimate_cost(
25
- self,
26
- input_tokens: int,
27
- output_tokens: int,
28
- cached_tokens: int = 0,
29
- ) -> float:
30
- """
31
- Estimate cost in dollars.
32
-
33
- Args:
34
- input_tokens: Number of input tokens
35
- output_tokens: Number of output tokens
36
- cached_tokens: Number of cached input tokens (if applicable)
37
-
38
- Returns:
39
- Estimated cost in USD
40
- """
41
- input_cost = (input_tokens / 1_000_000) * self.input_per_million
42
- output_cost = (output_tokens / 1_000_000) * self.output_per_million
43
-
44
- cached_cost = 0.0
45
- if cached_tokens and self.cached_input_per_million:
46
- cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
47
-
48
- return input_cost + output_cost + cached_cost
49
-
50
-
51
- # Model pricing table ($ per million tokens)
52
- # Last updated: 2026-01
53
- MODEL_PRICING: dict[str, ModelPricing] = {
54
- # OpenAI Codex models
55
- "gpt-5.1-codex": ModelPricing(
56
- input_per_million=1.25,
57
- output_per_million=10.00,
58
- cached_input_per_million=0.125, # 90% discount for cached
59
- ),
60
- "gpt-5.1-codex-mini": ModelPricing(
61
- input_per_million=0.25,
62
- output_per_million=2.00,
63
- cached_input_per_million=0.025,
64
- ),
65
- "gpt-5.1-codex-max": ModelPricing(
66
- input_per_million=1.25,
67
- output_per_million=10.00,
68
- cached_input_per_million=0.125,
69
- ),
70
- # GPT-5 base models (for reference)
71
- "gpt-5": ModelPricing(
72
- input_per_million=1.25,
73
- output_per_million=10.00,
74
- ),
75
- "gpt-5-mini": ModelPricing(
76
- input_per_million=0.25,
77
- output_per_million=2.00,
78
- ),
79
- # Claude models (Anthropic)
80
- "claude-sonnet-4-20250514": ModelPricing(
81
- input_per_million=3.00,
82
- output_per_million=15.00,
83
- ),
84
- "claude-opus-4-20250514": ModelPricing(
85
- input_per_million=15.00,
86
- output_per_million=75.00,
87
- ),
88
- "claude-3-5-sonnet-20241022": ModelPricing(
89
- input_per_million=3.00,
90
- output_per_million=15.00,
91
- ),
92
- }
93
-
94
- # Aliases for common model names
95
- MODEL_ALIASES: dict[str, str] = {
96
- "codex": "gpt-5.1-codex",
97
- "codex-mini": "gpt-5.1-codex-mini",
98
- "codex-max": "gpt-5.1-codex-max",
99
- "gpt5": "gpt-5",
100
- "gpt5-mini": "gpt-5-mini",
101
- "sonnet": "claude-sonnet-4-20250514",
102
- "opus": "claude-opus-4-20250514",
103
- }
104
-
105
-
106
- def get_pricing(model: str) -> ModelPricing | None:
12
+ # Re-export everything from registry for backwards compatibility
13
+ from zwarm.core.registry import (
14
+ ModelInfo,
15
+ MODELS,
16
+ resolve_model,
17
+ get_adapter_for_model,
18
+ get_default_model,
19
+ list_models,
20
+ list_adapters,
21
+ get_models_help_text,
22
+ get_models_table_data,
23
+ estimate_cost,
24
+ format_cost,
25
+ estimate_session_cost,
26
+ )
27
+
28
+ # Backwards compatibility alias
29
+ ModelPricing = ModelInfo
30
+
31
+ # Legacy aliases for backwards compatibility
32
+ MODEL_PRICING = {m.canonical: m for m in MODELS}
33
+ MODEL_ALIASES = {}
34
+ for m in MODELS:
35
+ for alias in m.aliases:
36
+ MODEL_ALIASES[alias] = m.canonical
37
+
38
+
39
+ def get_pricing(model: str) -> ModelInfo | None:
107
40
  """
108
41
  Get pricing for a model.
109
42
 
@@ -111,89 +44,28 @@ def get_pricing(model: str) -> ModelPricing | None:
111
44
  model: Model name or alias
112
45
 
113
46
  Returns:
114
- ModelPricing or None if unknown
115
- """
116
- # Check aliases first
117
- resolved = MODEL_ALIASES.get(model.lower(), model)
118
-
119
- # Exact match
120
- if resolved in MODEL_PRICING:
121
- return MODEL_PRICING[resolved]
122
-
123
- # Try lowercase
124
- if resolved.lower() in MODEL_PRICING:
125
- return MODEL_PRICING[resolved.lower()]
126
-
127
- # Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
128
- for known_model in MODEL_PRICING:
129
- if resolved.lower().startswith(known_model.lower()):
130
- return MODEL_PRICING[known_model]
131
-
132
- return None
133
-
134
-
135
- def estimate_cost(
136
- model: str,
137
- input_tokens: int,
138
- output_tokens: int,
139
- cached_tokens: int = 0,
140
- ) -> float | None:
47
+ ModelInfo or None if unknown
141
48
  """
142
- Estimate cost for a model run.
143
-
144
- Args:
145
- model: Model name
146
- input_tokens: Number of input tokens
147
- output_tokens: Number of output tokens
148
- cached_tokens: Number of cached input tokens
149
-
150
- Returns:
151
- Cost in USD, or None if model pricing unknown
152
- """
153
- pricing = get_pricing(model)
154
- if pricing is None:
155
- return None
156
-
157
- return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
158
-
159
-
160
- def format_cost(cost: float | None) -> str:
161
- """Format cost as a human-readable string."""
162
- if cost is None:
163
- return "?"
164
- if cost < 0.01:
165
- return f"${cost:.4f}"
166
- elif cost < 1.00:
167
- return f"${cost:.3f}"
168
- else:
169
- return f"${cost:.2f}"
170
-
171
-
172
- def estimate_session_cost(
173
- model: str,
174
- token_usage: dict[str, Any],
175
- ) -> dict[str, Any]:
176
- """
177
- Estimate cost for a session given its token usage.
178
-
179
- Args:
180
- model: Model used
181
- token_usage: Dict with input_tokens, output_tokens, etc.
182
-
183
- Returns:
184
- Dict with cost info: {cost, cost_formatted, pricing_known}
185
- """
186
- input_tokens = token_usage.get("input_tokens", 0)
187
- output_tokens = token_usage.get("output_tokens", 0)
188
- cached_tokens = token_usage.get("cached_tokens", 0)
189
-
190
- cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
191
-
192
- return {
193
- "cost": cost,
194
- "cost_formatted": format_cost(cost),
195
- "pricing_known": cost is not None,
196
- "model": model,
197
- "input_tokens": input_tokens,
198
- "output_tokens": output_tokens,
199
- }
49
+ return resolve_model(model)
50
+
51
+
52
+ __all__ = [
53
+ # New API
54
+ "ModelInfo",
55
+ "MODELS",
56
+ "resolve_model",
57
+ "get_adapter_for_model",
58
+ "get_default_model",
59
+ "list_models",
60
+ "list_adapters",
61
+ "get_models_help_text",
62
+ "get_models_table_data",
63
+ "estimate_cost",
64
+ "format_cost",
65
+ "estimate_session_cost",
66
+ # Legacy API
67
+ "MODEL_PRICING",
68
+ "MODEL_ALIASES",
69
+ "ModelPricing",
70
+ "get_pricing",
71
+ ]
zwarm/core/environment.py CHANGED
@@ -52,6 +52,9 @@ class OrchestratorEnv(Environment):
52
52
  # Budget config (set from config)
53
53
  _budget_max_sessions: int | None = PrivateAttr(default=None)
54
54
 
55
+ # Pilot mode: simpler observation since human is in control
56
+ _pilot_mode: bool = PrivateAttr(default=False)
57
+
55
58
  def set_session_manager(self, manager: "CodexSessionManager") -> None:
56
59
  """Set the session manager for live session visibility in observe()."""
57
60
  self._session_manager = manager
@@ -77,18 +80,69 @@ class OrchestratorEnv(Environment):
77
80
  """Set budget limits from config."""
78
81
  self._budget_max_sessions = max_sessions
79
82
 
83
+ def set_pilot_mode(self, enabled: bool = True) -> None:
84
+ """
85
+ Enable pilot mode for simpler env observation.
86
+
87
+ In pilot mode, the human is in control and can use :status/:sessions
88
+ commands to see detailed progress. The LLM only needs a brief context.
89
+ """
90
+ self._pilot_mode = enabled
91
+
80
92
  def observe(self) -> str:
81
93
  """
82
94
  Return observable state for the orchestrator.
83
95
 
84
- Shows:
96
+ In full mode (autonomous orchestrator):
85
97
  - Progress (steps, tokens)
86
98
  - Session summary (pulled LIVE from CodexSessionManager)
87
99
  - Active sessions with their status
88
100
  - Working directory
89
101
 
102
+ In pilot mode (human in control):
103
+ - Brief session status (just what's active)
104
+ - Working directory
105
+
90
106
  Note: Task is NOT included here as it's already in the user message.
91
107
  """
108
+ if self._pilot_mode:
109
+ return self._observe_pilot()
110
+ return self._observe_full()
111
+
112
+ def _observe_pilot(self) -> str:
113
+ """Lean observation for pilot mode (human is in control)."""
114
+ parts = []
115
+
116
+ # Brief session status - just enough for context
117
+ if self._session_manager is not None:
118
+ sessions = self._session_manager.list_sessions()
119
+
120
+ running = [s for s in sessions if s.status.value == "running"]
121
+ if running:
122
+ session_lines = []
123
+ for s in running:
124
+ task_preview = s.task[:40] + "..." if len(s.task) > 40 else s.task
125
+ session_lines.append(f" • {s.short_id}: {task_preview}")
126
+ parts.append("## Active Sessions\n" + "\n".join(session_lines))
127
+
128
+ # Just show counts for completed/failed
129
+ completed = sum(1 for s in sessions if s.status.value == "completed")
130
+ failed = sum(1 for s in sessions if s.status.value == "failed")
131
+ if completed or failed:
132
+ status = []
133
+ if completed:
134
+ status.append(f"{completed} completed")
135
+ if failed:
136
+ status.append(f"{failed} failed")
137
+ parts.append(f"Previous: {', '.join(status)}")
138
+
139
+ # Working directory
140
+ parts.append(f"Working dir: {self.working_dir.absolute()}")
141
+
142
+ return "\n\n".join(parts) if parts else ""
143
+
144
+ def _observe_full(self) -> str:
145
+ """Full observation for autonomous orchestrator runs."""
92
146
  parts = []
93
147
 
94
148
  # Progress bar and stats