zwarm 3.2.1__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/cli/pilot.py CHANGED
@@ -81,14 +81,23 @@ class ChoogingSpinner:
81
81
 
82
82
 
83
83
  # Context window sizes for different models (in tokens)
84
+ # These are for the ORCHESTRATOR LLM, not the executors
84
85
  MODEL_CONTEXT_WINDOWS = {
86
+ # OpenAI models
85
87
  "gpt-5.1-codex": 200_000,
86
88
  "gpt-5.1-codex-mini": 200_000,
87
89
  "gpt-5.1-codex-max": 400_000,
88
90
  "gpt-5": 200_000,
89
91
  "gpt-5-mini": 200_000,
90
- "claude-sonnet-4": 200_000,
91
- "claude-opus-4": 200_000,
92
+ "o3": 200_000,
93
+ "o3-mini": 200_000,
94
+ # Claude models (if used as orchestrator)
95
+ "claude-sonnet": 200_000,
96
+ "claude-opus": 200_000,
97
+ "claude-haiku": 200_000,
98
+ "sonnet": 200_000,
99
+ "opus": 200_000,
100
+ "haiku": 200_000,
92
101
  # Fallback
93
102
  "default": 128_000,
94
103
  }
@@ -186,10 +195,12 @@ def build_pilot_orchestrator(
186
195
  lm_class = lm_map.get(lm_choice, GPT5LargeVerbose)
187
196
  lm = lm_class()
188
197
 
189
- # Load configuration
198
+ # Load configuration from working_dir (not cwd!)
199
+ # This ensures config.toml and .env are loaded from the project being worked on
190
200
  config = load_config(
191
201
  config_path=config_path,
192
202
  overrides=overrides,
203
+ working_dir=working_dir,
193
204
  )
194
205
 
195
206
  # Resolve working directory
@@ -592,12 +603,33 @@ def execute_step_with_events(
592
603
  """
593
604
  had_message = False
594
605
 
606
+ # Update environment with current progress before perceive
607
+ # This ensures the observation has fresh step/token counts
608
+ if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "update_progress"):
609
+ total_tokens = getattr(orchestrator, "_total_tokens", 0)
610
+ executor_usage = orchestrator.get_executor_usage() if hasattr(orchestrator, "get_executor_usage") else {}
611
+ orchestrator.env.update_progress(
612
+ step_count=getattr(orchestrator, "_step_count", 0),
613
+ max_steps=getattr(orchestrator, "maxSteps", 50),
614
+ total_tokens=total_tokens,
615
+ executor_tokens=executor_usage.get("total_tokens", 0),
616
+ )
617
+
595
618
  # Execute perceive (updates environment observation)
596
619
  orchestrator.perceive()
597
620
 
598
621
  # Execute invoke (calls LLM)
599
622
  response = orchestrator.invoke()
600
623
 
624
+ # Track cumulative token usage from the API response
625
+ # (This mirrors what step() does in orchestrator.py)
626
+ if hasattr(orchestrator, "_last_response") and orchestrator._last_response:
627
+ last_response = orchestrator._last_response
628
+ if hasattr(last_response, "usage") and last_response.usage:
629
+ usage = last_response.usage
630
+ tokens_this_call = getattr(usage, "total_tokens", 0)
631
+ orchestrator._total_tokens = getattr(orchestrator, "_total_tokens", 0) + tokens_this_call
632
+
601
633
  # Extract and render events from response
602
634
  if response:
603
635
  events = extract_events_from_response(response)
@@ -647,7 +679,7 @@ def execute_step_with_events(
647
679
  def run_until_response(
648
680
  orchestrator: Any,
649
681
  renderer: EventRenderer,
650
- max_steps: int = 20,
682
+ max_steps: int = 60,
651
683
  ) -> List[tuple]:
652
684
  """
653
685
  Run the orchestrator until it produces a message response.
@@ -655,7 +687,7 @@ def run_until_response(
655
687
  Keeps stepping while the agent only produces tool calls.
656
688
  Stops when:
657
689
  - Agent produces a text message (returns to user)
658
- - Max steps reached
690
+ - Max steps reached (configurable via orchestrator.max_steps_per_turn)
659
691
  - Stop condition triggered
660
692
 
661
693
  This is wrapped as a weave.op to group all child calls per turn.
@@ -663,7 +695,7 @@ def run_until_response(
663
695
  Args:
664
696
  orchestrator: The orchestrator instance
665
697
  renderer: Event renderer for output
666
- max_steps: Safety limit on steps per turn
698
+ max_steps: Safety limit on steps per turn (default: 60)
667
699
 
668
700
  Returns:
669
701
  All tool results from the turn
@@ -701,6 +733,9 @@ def run_until_response(
701
733
  if not results:
702
734
  break
703
735
 
736
+ # Show session status at end of turn (if there are any sessions)
737
+ render_session_status(orchestrator, renderer)
738
+
704
739
  return all_results
705
740
 
706
741
  return _run_turn()
@@ -722,7 +757,12 @@ def print_help(renderer: EventRenderer) -> None:
722
757
  " :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
723
758
  " :sessions Show executor sessions",
724
759
  " :reasoning [on|off] Toggle reasoning display",
725
- " :quit / :exit Exit the pilot",
760
+ " :save Save state (for later resume)",
761
+ " :quit / :exit Exit the pilot (auto-saves)",
762
+ "",
763
+ "Resume:",
764
+ " State is auto-saved after each turn. To resume a session:",
765
+ " $ zwarm pilot --resume --instance <instance_id>",
726
766
  "",
727
767
  "Multiline input:",
728
768
  ' Start with """ and end with """ to enter multiple lines.',
@@ -756,6 +796,38 @@ def get_sessions_snapshot(orchestrator: Any) -> Dict[str, Any]:
756
796
  return {"sessions": []}
757
797
 
758
798
 
799
+ def render_session_status(orchestrator: Any, renderer: EventRenderer) -> None:
800
+ """
801
+ Render a compact session status line if there are active sessions.
802
+
803
+ Shows: "Sessions: 2 running, 1 done, 0 failed"
804
+ Only displays if there are any sessions.
805
+ """
806
+ if not hasattr(orchestrator, "_session_manager"):
807
+ return
808
+
809
+ sessions = orchestrator._session_manager.list_sessions()
810
+ if not sessions:
811
+ return
812
+
813
+ running = sum(1 for s in sessions if s.status.value == "running")
814
+ completed = sum(1 for s in sessions if s.status.value == "completed")
815
+ failed = sum(1 for s in sessions if s.status.value == "failed")
816
+
817
+ # Build status line with colors
818
+ parts = []
819
+ if running > 0:
820
+ parts.append(f"[cyan]{running} running[/]")
821
+ if completed > 0:
822
+ parts.append(f"[green]{completed} done[/]")
823
+ if failed > 0:
824
+ parts.append(f"[red]{failed} failed[/]")
825
+
826
+ if parts:
827
+ status_line = ", ".join(parts)
828
+ console.print(f"[dim]Sessions:[/] {status_line}")
829
+
830
+
759
831
  def run_pilot(
760
832
  orchestrator: Any,
761
833
  *,
@@ -812,7 +884,8 @@ def _run_pilot_repl(
812
884
  })
813
885
 
814
886
  renderer.reset_turn()
815
- results = run_until_response(orchestrator, renderer)
887
+ max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
888
+ results = run_until_response(orchestrator, renderer, max_steps=max_steps)
816
889
 
817
890
  # Record checkpoint
818
891
  state.record(
@@ -893,6 +966,10 @@ def _run_pilot_repl(
893
966
 
894
967
  # :quit / :exit
895
968
  if cmd in ("quit", "exit", "q"):
969
+ # Save state before exiting
970
+ if hasattr(orchestrator, "save_state"):
971
+ orchestrator.save_state()
972
+ renderer.status("[dim]State saved.[/]")
896
973
  renderer.status("Goodbye!")
897
974
  break
898
975
 
@@ -1085,6 +1162,20 @@ def _run_pilot_repl(
1085
1162
  renderer.status(f"Reasoning display: {current}")
1086
1163
  continue
1087
1164
 
1165
+ # :save
1166
+ if cmd == "save":
1167
+ if hasattr(orchestrator, "save_state"):
1168
+ orchestrator.save_state()
1169
+ instance_id = getattr(orchestrator, "instance_id", None)
1170
+ if instance_id:
1171
+ renderer.status(f"[green]✓[/] State saved (instance: {instance_id[:8]})")
1172
+ renderer.status(f" [dim]Resume with: zwarm pilot --resume --instance {instance_id[:8]}[/]")
1173
+ else:
1174
+ renderer.status("[green]✓[/] State saved")
1175
+ else:
1176
+ renderer.error("State saving not available")
1177
+ continue
1178
+
1088
1179
  # Unknown command
1089
1180
  renderer.error(f"Unknown command: {cmd}")
1090
1181
  renderer.status("Type :help for available commands.")
@@ -1101,8 +1192,9 @@ def _run_pilot_repl(
1101
1192
 
1102
1193
  # Execute steps until agent responds with a message
1103
1194
  renderer.reset_turn()
1195
+ max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
1104
1196
  try:
1105
- results = run_until_response(orchestrator, renderer)
1197
+ results = run_until_response(orchestrator, renderer, max_steps=max_steps)
1106
1198
  except Exception as e:
1107
1199
  renderer.error(f"Step failed: {e}")
1108
1200
  # Remove the user message on failure
@@ -1124,6 +1216,10 @@ def _run_pilot_repl(
1124
1216
  },
1125
1217
  )
1126
1218
 
1219
+ # Save state for resume capability
1220
+ if hasattr(orchestrator, "save_state"):
1221
+ orchestrator.save_state()
1222
+
1127
1223
  # Show turn info
1128
1224
  cp = state.current()
1129
1225
  if cp:
@@ -1139,4 +1235,6 @@ def _run_pilot_repl(
1139
1235
  if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
1140
1236
  renderer.status("")
1141
1237
  renderer.status("Orchestrator signaled completion.")
1238
+ if hasattr(orchestrator, "save_state"):
1239
+ orchestrator.save_state()
1142
1240
  break
zwarm/core/config.py CHANGED
@@ -37,6 +37,7 @@ class ExecutorConfig:
37
37
  sandbox: str = "workspace-write" # read-only | workspace-write | danger-full-access
38
38
  timeout: int = 3600
39
39
  reasoning_effort: str | None = "high" # low | medium | high (default to high for compatibility)
40
+ # Note: web_search is always enabled via .codex/config.toml (set up by `zwarm init`)
40
41
 
41
42
 
42
43
  @dataclass
@@ -59,8 +60,8 @@ class OrchestratorConfig:
59
60
  prompt: str | None = None # path to prompt yaml
60
61
  tools: list[str] = field(default_factory=lambda: ["delegate", "converse", "check_session", "end_session", "bash"])
61
62
  max_steps: int = 50
63
+ max_steps_per_turn: int = 60 # Max tool-call steps before returning to user (pilot mode)
62
64
  parallel_delegations: int = 4
63
- sync_first: bool = True # prefer sync mode by default
64
65
  compaction: CompactionConfig = field(default_factory=CompactionConfig)
65
66
 
66
67
  # Directory restrictions for agent delegations
@@ -172,8 +173,8 @@ class ZwarmConfig:
172
173
  "prompt": self.orchestrator.prompt,
173
174
  "tools": self.orchestrator.tools,
174
175
  "max_steps": self.orchestrator.max_steps,
176
+ "max_steps_per_turn": self.orchestrator.max_steps_per_turn,
175
177
  "parallel_delegations": self.orchestrator.parallel_delegations,
176
- "sync_first": self.orchestrator.sync_first,
177
178
  "compaction": {
178
179
  "enabled": self.orchestrator.compaction.enabled,
179
180
  "max_tokens": self.orchestrator.compaction.max_tokens,
@@ -195,15 +196,16 @@ class ZwarmConfig:
195
196
  }
196
197
 
197
198
 
198
- def load_env(path: Path | None = None) -> None:
199
+ def load_env(path: Path | None = None, base_dir: Path | None = None) -> None:
199
200
  """Load .env file if it exists."""
200
201
  if path is None:
201
- path = Path.cwd() / ".env"
202
+ base = base_dir or Path.cwd()
203
+ path = base / ".env"
202
204
  if path.exists():
203
205
  load_dotenv(path)
204
206
 
205
207
 
206
- def load_toml_config(path: Path | None = None) -> dict[str, Any]:
208
+ def load_toml_config(path: Path | None = None, base_dir: Path | None = None) -> dict[str, Any]:
207
209
  """
208
210
  Load config.toml file.
209
211
 
@@ -211,11 +213,16 @@ def load_toml_config(path: Path | None = None) -> dict[str, Any]:
211
213
  1. Explicit path (if provided)
212
214
  2. .zwarm/config.toml (new standard location)
213
215
  3. config.toml (legacy location for backwards compat)
216
+
217
+ Args:
218
+ path: Explicit path to config.toml
219
+ base_dir: Base directory to search in (defaults to cwd)
214
220
  """
215
221
  if path is None:
222
+ base = base_dir or Path.cwd()
216
223
  # Try new location first
217
- new_path = Path.cwd() / ".zwarm" / "config.toml"
218
- legacy_path = Path.cwd() / "config.toml"
224
+ new_path = base / ".zwarm" / "config.toml"
225
+ legacy_path = base / "config.toml"
219
226
  if new_path.exists():
220
227
  path = new_path
221
228
  elif legacy_path.exists():
@@ -306,6 +313,7 @@ def load_config(
306
313
  toml_path: Path | None = None,
307
314
  env_path: Path | None = None,
308
315
  overrides: list[str] | None = None,
316
+ working_dir: Path | None = None,
309
317
  ) -> ZwarmConfig:
310
318
  """
311
319
  Load configuration with full precedence chain:
@@ -314,15 +322,24 @@ def load_config(
314
322
  3. YAML config file (if provided)
315
323
  4. CLI overrides (--set key=value)
316
324
  5. Environment variables (for secrets)
325
+
326
+ Args:
327
+ config_path: Path to YAML config file
328
+ toml_path: Explicit path to config.toml
329
+ env_path: Explicit path to .env file
330
+ overrides: CLI overrides (--set key=value)
331
+ working_dir: Working directory to search for config files (defaults to cwd).
332
+ This is important when using --working-dir flag to ensure
333
+ config is loaded from the project directory, not invoke directory.
317
334
  """
318
335
  # Load .env first (for secrets)
319
- load_env(env_path)
336
+ load_env(env_path, base_dir=working_dir)
320
337
 
321
338
  # Start with defaults
322
339
  config_dict: dict[str, Any] = {}
323
340
 
324
341
  # Layer in config.toml
325
- toml_config = load_toml_config(toml_path)
342
+ toml_config = load_toml_config(toml_path, base_dir=working_dir)
326
343
  if toml_config:
327
344
  config_dict = deep_merge(config_dict, toml_config)
328
345
 
zwarm/core/costs.py CHANGED
@@ -1,109 +1,42 @@
1
1
  """
2
2
  Token cost estimation for LLM models.
3
3
 
4
- Pricing data is hardcoded and may become stale. Last updated: 2026-01.
4
+ This module re-exports from the centralized model registry.
5
+ For adding new models, edit: zwarm/core/registry.py
5
6
 
6
- Sources:
7
- - https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
8
- - https://pricepertoken.com/pricing-page/model/openai-codex-mini
7
+ Backwards-compatible API preserved for existing code.
9
8
  """
10
9
 
11
10
  from __future__ import annotations
12
11
 
13
- from dataclasses import dataclass
14
- from typing import Any
15
-
16
-
17
- @dataclass
18
- class ModelPricing:
19
- """Pricing for a model in $ per million tokens."""
20
- input_per_million: float
21
- output_per_million: float
22
- cached_input_per_million: float | None = None # Some models have cached input discount
23
-
24
- def estimate_cost(
25
- self,
26
- input_tokens: int,
27
- output_tokens: int,
28
- cached_tokens: int = 0,
29
- ) -> float:
30
- """
31
- Estimate cost in dollars.
32
-
33
- Args:
34
- input_tokens: Number of input tokens
35
- output_tokens: Number of output tokens
36
- cached_tokens: Number of cached input tokens (if applicable)
37
-
38
- Returns:
39
- Estimated cost in USD
40
- """
41
- input_cost = (input_tokens / 1_000_000) * self.input_per_million
42
- output_cost = (output_tokens / 1_000_000) * self.output_per_million
43
-
44
- cached_cost = 0.0
45
- if cached_tokens and self.cached_input_per_million:
46
- cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
47
-
48
- return input_cost + output_cost + cached_cost
49
-
50
-
51
- # Model pricing table ($ per million tokens)
52
- # Last updated: 2026-01
53
- MODEL_PRICING: dict[str, ModelPricing] = {
54
- # OpenAI Codex models
55
- "gpt-5.1-codex": ModelPricing(
56
- input_per_million=1.25,
57
- output_per_million=10.00,
58
- cached_input_per_million=0.125, # 90% discount for cached
59
- ),
60
- "gpt-5.1-codex-mini": ModelPricing(
61
- input_per_million=0.25,
62
- output_per_million=2.00,
63
- cached_input_per_million=0.025,
64
- ),
65
- "gpt-5.1-codex-max": ModelPricing(
66
- input_per_million=1.25,
67
- output_per_million=10.00,
68
- cached_input_per_million=0.125,
69
- ),
70
- # GPT-5 base models (for reference)
71
- "gpt-5": ModelPricing(
72
- input_per_million=1.25,
73
- output_per_million=10.00,
74
- ),
75
- "gpt-5-mini": ModelPricing(
76
- input_per_million=0.25,
77
- output_per_million=2.00,
78
- ),
79
- # Claude models (Anthropic)
80
- "claude-sonnet-4-20250514": ModelPricing(
81
- input_per_million=3.00,
82
- output_per_million=15.00,
83
- ),
84
- "claude-opus-4-20250514": ModelPricing(
85
- input_per_million=15.00,
86
- output_per_million=75.00,
87
- ),
88
- "claude-3-5-sonnet-20241022": ModelPricing(
89
- input_per_million=3.00,
90
- output_per_million=15.00,
91
- ),
92
- }
93
-
94
- # Aliases for common model names
95
- MODEL_ALIASES: dict[str, str] = {
96
- "codex": "gpt-5.1-codex",
97
- "codex-mini": "gpt-5.1-codex-mini",
98
- "codex-max": "gpt-5.1-codex-max",
99
- "gpt5": "gpt-5",
100
- "gpt5-mini": "gpt-5-mini",
101
- "sonnet": "claude-sonnet-4-20250514",
102
- "opus": "claude-opus-4-20250514",
103
- }
104
-
105
-
106
- def get_pricing(model: str) -> ModelPricing | None:
12
+ # Re-export everything from registry for backwards compatibility
13
+ from zwarm.core.registry import (
14
+ ModelInfo,
15
+ MODELS,
16
+ resolve_model,
17
+ get_adapter_for_model,
18
+ get_default_model,
19
+ list_models,
20
+ list_adapters,
21
+ get_models_help_text,
22
+ get_models_table_data,
23
+ estimate_cost,
24
+ format_cost,
25
+ estimate_session_cost,
26
+ )
27
+
28
+ # Backwards compatibility alias
29
+ ModelPricing = ModelInfo
30
+
31
+ # Legacy aliases for backwards compatibility
32
+ MODEL_PRICING = {m.canonical: m for m in MODELS}
33
+ MODEL_ALIASES = {}
34
+ for m in MODELS:
35
+ for alias in m.aliases:
36
+ MODEL_ALIASES[alias] = m.canonical
37
+
38
+
39
+ def get_pricing(model: str) -> ModelInfo | None:
107
40
  """
108
41
  Get pricing for a model.
109
42
 
@@ -111,89 +44,28 @@ def get_pricing(model: str) -> ModelPricing | None:
111
44
  model: Model name or alias
112
45
 
113
46
  Returns:
114
- ModelPricing or None if unknown
115
- """
116
- # Check aliases first
117
- resolved = MODEL_ALIASES.get(model.lower(), model)
118
-
119
- # Exact match
120
- if resolved in MODEL_PRICING:
121
- return MODEL_PRICING[resolved]
122
-
123
- # Try lowercase
124
- if resolved.lower() in MODEL_PRICING:
125
- return MODEL_PRICING[resolved.lower()]
126
-
127
- # Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
128
- for known_model in MODEL_PRICING:
129
- if resolved.lower().startswith(known_model.lower()):
130
- return MODEL_PRICING[known_model]
131
-
132
- return None
133
-
134
-
135
- def estimate_cost(
136
- model: str,
137
- input_tokens: int,
138
- output_tokens: int,
139
- cached_tokens: int = 0,
140
- ) -> float | None:
47
+ ModelInfo or None if unknown
141
48
  """
142
- Estimate cost for a model run.
143
-
144
- Args:
145
- model: Model name
146
- input_tokens: Number of input tokens
147
- output_tokens: Number of output tokens
148
- cached_tokens: Number of cached input tokens
149
-
150
- Returns:
151
- Cost in USD, or None if model pricing unknown
152
- """
153
- pricing = get_pricing(model)
154
- if pricing is None:
155
- return None
156
-
157
- return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
158
-
159
-
160
- def format_cost(cost: float | None) -> str:
161
- """Format cost as a human-readable string."""
162
- if cost is None:
163
- return "?"
164
- if cost < 0.01:
165
- return f"${cost:.4f}"
166
- elif cost < 1.00:
167
- return f"${cost:.3f}"
168
- else:
169
- return f"${cost:.2f}"
170
-
171
-
172
- def estimate_session_cost(
173
- model: str,
174
- token_usage: dict[str, Any],
175
- ) -> dict[str, Any]:
176
- """
177
- Estimate cost for a session given its token usage.
178
-
179
- Args:
180
- model: Model used
181
- token_usage: Dict with input_tokens, output_tokens, etc.
182
-
183
- Returns:
184
- Dict with cost info: {cost, cost_formatted, pricing_known}
185
- """
186
- input_tokens = token_usage.get("input_tokens", 0)
187
- output_tokens = token_usage.get("output_tokens", 0)
188
- cached_tokens = token_usage.get("cached_tokens", 0)
189
-
190
- cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
191
-
192
- return {
193
- "cost": cost,
194
- "cost_formatted": format_cost(cost),
195
- "pricing_known": cost is not None,
196
- "model": model,
197
- "input_tokens": input_tokens,
198
- "output_tokens": output_tokens,
199
- }
49
+ return resolve_model(model)
50
+
51
+
52
+ __all__ = [
53
+ # New API
54
+ "ModelInfo",
55
+ "MODELS",
56
+ "resolve_model",
57
+ "get_adapter_for_model",
58
+ "get_default_model",
59
+ "list_models",
60
+ "list_adapters",
61
+ "get_models_help_text",
62
+ "get_models_table_data",
63
+ "estimate_cost",
64
+ "format_cost",
65
+ "estimate_session_cost",
66
+ # Legacy API
67
+ "MODEL_PRICING",
68
+ "MODEL_ALIASES",
69
+ "ModelPricing",
70
+ "get_pricing",
71
+ ]