zwarm 3.2.1__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/cli/interactive.py +346 -30
- zwarm/cli/main.py +221 -90
- zwarm/cli/pilot.py +107 -9
- zwarm/core/config.py +26 -9
- zwarm/core/costs.py +55 -183
- zwarm/core/registry.py +329 -0
- zwarm/core/test_config.py +2 -3
- zwarm/orchestrator.py +17 -43
- zwarm/sessions/__init__.py +48 -9
- zwarm/sessions/base.py +501 -0
- zwarm/sessions/claude.py +481 -0
- zwarm/sessions/manager.py +233 -486
- zwarm/tools/delegation.py +93 -31
- {zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/METADATA +73 -21
- {zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/RECORD +17 -21
- zwarm/adapters/__init__.py +0 -21
- zwarm/adapters/base.py +0 -109
- zwarm/adapters/claude_code.py +0 -357
- zwarm/adapters/codex_mcp.py +0 -1262
- zwarm/adapters/registry.py +0 -69
- zwarm/adapters/test_codex_mcp.py +0 -274
- zwarm/adapters/test_registry.py +0 -68
- {zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/WHEEL +0 -0
- {zwarm-3.2.1.dist-info → zwarm-3.6.0.dist-info}/entry_points.txt +0 -0
zwarm/cli/pilot.py
CHANGED
|
@@ -81,14 +81,23 @@ class ChoogingSpinner:
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
# Context window sizes for different models (in tokens)
|
|
84
|
+
# These are for the ORCHESTRATOR LLM, not the executors
|
|
84
85
|
MODEL_CONTEXT_WINDOWS = {
|
|
86
|
+
# OpenAI models
|
|
85
87
|
"gpt-5.1-codex": 200_000,
|
|
86
88
|
"gpt-5.1-codex-mini": 200_000,
|
|
87
89
|
"gpt-5.1-codex-max": 400_000,
|
|
88
90
|
"gpt-5": 200_000,
|
|
89
91
|
"gpt-5-mini": 200_000,
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
+
"o3": 200_000,
|
|
93
|
+
"o3-mini": 200_000,
|
|
94
|
+
# Claude models (if used as orchestrator)
|
|
95
|
+
"claude-sonnet": 200_000,
|
|
96
|
+
"claude-opus": 200_000,
|
|
97
|
+
"claude-haiku": 200_000,
|
|
98
|
+
"sonnet": 200_000,
|
|
99
|
+
"opus": 200_000,
|
|
100
|
+
"haiku": 200_000,
|
|
92
101
|
# Fallback
|
|
93
102
|
"default": 128_000,
|
|
94
103
|
}
|
|
@@ -186,10 +195,12 @@ def build_pilot_orchestrator(
|
|
|
186
195
|
lm_class = lm_map.get(lm_choice, GPT5LargeVerbose)
|
|
187
196
|
lm = lm_class()
|
|
188
197
|
|
|
189
|
-
# Load configuration
|
|
198
|
+
# Load configuration from working_dir (not cwd!)
|
|
199
|
+
# This ensures config.toml and .env are loaded from the project being worked on
|
|
190
200
|
config = load_config(
|
|
191
201
|
config_path=config_path,
|
|
192
202
|
overrides=overrides,
|
|
203
|
+
working_dir=working_dir,
|
|
193
204
|
)
|
|
194
205
|
|
|
195
206
|
# Resolve working directory
|
|
@@ -592,12 +603,33 @@ def execute_step_with_events(
|
|
|
592
603
|
"""
|
|
593
604
|
had_message = False
|
|
594
605
|
|
|
606
|
+
# Update environment with current progress before perceive
|
|
607
|
+
# This ensures the observation has fresh step/token counts
|
|
608
|
+
if hasattr(orchestrator, "env") and hasattr(orchestrator.env, "update_progress"):
|
|
609
|
+
total_tokens = getattr(orchestrator, "_total_tokens", 0)
|
|
610
|
+
executor_usage = orchestrator.get_executor_usage() if hasattr(orchestrator, "get_executor_usage") else {}
|
|
611
|
+
orchestrator.env.update_progress(
|
|
612
|
+
step_count=getattr(orchestrator, "_step_count", 0),
|
|
613
|
+
max_steps=getattr(orchestrator, "maxSteps", 50),
|
|
614
|
+
total_tokens=total_tokens,
|
|
615
|
+
executor_tokens=executor_usage.get("total_tokens", 0),
|
|
616
|
+
)
|
|
617
|
+
|
|
595
618
|
# Execute perceive (updates environment observation)
|
|
596
619
|
orchestrator.perceive()
|
|
597
620
|
|
|
598
621
|
# Execute invoke (calls LLM)
|
|
599
622
|
response = orchestrator.invoke()
|
|
600
623
|
|
|
624
|
+
# Track cumulative token usage from the API response
|
|
625
|
+
# (This mirrors what step() does in orchestrator.py)
|
|
626
|
+
if hasattr(orchestrator, "_last_response") and orchestrator._last_response:
|
|
627
|
+
last_response = orchestrator._last_response
|
|
628
|
+
if hasattr(last_response, "usage") and last_response.usage:
|
|
629
|
+
usage = last_response.usage
|
|
630
|
+
tokens_this_call = getattr(usage, "total_tokens", 0)
|
|
631
|
+
orchestrator._total_tokens = getattr(orchestrator, "_total_tokens", 0) + tokens_this_call
|
|
632
|
+
|
|
601
633
|
# Extract and render events from response
|
|
602
634
|
if response:
|
|
603
635
|
events = extract_events_from_response(response)
|
|
@@ -647,7 +679,7 @@ def execute_step_with_events(
|
|
|
647
679
|
def run_until_response(
|
|
648
680
|
orchestrator: Any,
|
|
649
681
|
renderer: EventRenderer,
|
|
650
|
-
max_steps: int =
|
|
682
|
+
max_steps: int = 60,
|
|
651
683
|
) -> List[tuple]:
|
|
652
684
|
"""
|
|
653
685
|
Run the orchestrator until it produces a message response.
|
|
@@ -655,7 +687,7 @@ def run_until_response(
|
|
|
655
687
|
Keeps stepping while the agent only produces tool calls.
|
|
656
688
|
Stops when:
|
|
657
689
|
- Agent produces a text message (returns to user)
|
|
658
|
-
- Max steps reached
|
|
690
|
+
- Max steps reached (configurable via orchestrator.max_steps_per_turn)
|
|
659
691
|
- Stop condition triggered
|
|
660
692
|
|
|
661
693
|
This is wrapped as a weave.op to group all child calls per turn.
|
|
@@ -663,7 +695,7 @@ def run_until_response(
|
|
|
663
695
|
Args:
|
|
664
696
|
orchestrator: The orchestrator instance
|
|
665
697
|
renderer: Event renderer for output
|
|
666
|
-
max_steps: Safety limit on steps per turn
|
|
698
|
+
max_steps: Safety limit on steps per turn (default: 60)
|
|
667
699
|
|
|
668
700
|
Returns:
|
|
669
701
|
All tool results from the turn
|
|
@@ -701,6 +733,9 @@ def run_until_response(
|
|
|
701
733
|
if not results:
|
|
702
734
|
break
|
|
703
735
|
|
|
736
|
+
# Show session status at end of turn (if there are any sessions)
|
|
737
|
+
render_session_status(orchestrator, renderer)
|
|
738
|
+
|
|
704
739
|
return all_results
|
|
705
740
|
|
|
706
741
|
return _run_turn()
|
|
@@ -722,7 +757,12 @@ def print_help(renderer: EventRenderer) -> None:
|
|
|
722
757
|
" :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
|
|
723
758
|
" :sessions Show executor sessions",
|
|
724
759
|
" :reasoning [on|off] Toggle reasoning display",
|
|
725
|
-
" :
|
|
760
|
+
" :save Save state (for later resume)",
|
|
761
|
+
" :quit / :exit Exit the pilot (auto-saves)",
|
|
762
|
+
"",
|
|
763
|
+
"Resume:",
|
|
764
|
+
" State is auto-saved after each turn. To resume a session:",
|
|
765
|
+
" $ zwarm pilot --resume --instance <instance_id>",
|
|
726
766
|
"",
|
|
727
767
|
"Multiline input:",
|
|
728
768
|
' Start with """ and end with """ to enter multiple lines.',
|
|
@@ -756,6 +796,38 @@ def get_sessions_snapshot(orchestrator: Any) -> Dict[str, Any]:
|
|
|
756
796
|
return {"sessions": []}
|
|
757
797
|
|
|
758
798
|
|
|
799
|
+
def render_session_status(orchestrator: Any, renderer: EventRenderer) -> None:
|
|
800
|
+
"""
|
|
801
|
+
Render a compact session status line if there are active sessions.
|
|
802
|
+
|
|
803
|
+
Shows: "Sessions: 2 running, 1 done, 0 failed"
|
|
804
|
+
Only displays if there are any sessions.
|
|
805
|
+
"""
|
|
806
|
+
if not hasattr(orchestrator, "_session_manager"):
|
|
807
|
+
return
|
|
808
|
+
|
|
809
|
+
sessions = orchestrator._session_manager.list_sessions()
|
|
810
|
+
if not sessions:
|
|
811
|
+
return
|
|
812
|
+
|
|
813
|
+
running = sum(1 for s in sessions if s.status.value == "running")
|
|
814
|
+
completed = sum(1 for s in sessions if s.status.value == "completed")
|
|
815
|
+
failed = sum(1 for s in sessions if s.status.value == "failed")
|
|
816
|
+
|
|
817
|
+
# Build status line with colors
|
|
818
|
+
parts = []
|
|
819
|
+
if running > 0:
|
|
820
|
+
parts.append(f"[cyan]{running} running[/]")
|
|
821
|
+
if completed > 0:
|
|
822
|
+
parts.append(f"[green]{completed} done[/]")
|
|
823
|
+
if failed > 0:
|
|
824
|
+
parts.append(f"[red]{failed} failed[/]")
|
|
825
|
+
|
|
826
|
+
if parts:
|
|
827
|
+
status_line = ", ".join(parts)
|
|
828
|
+
console.print(f"[dim]Sessions:[/] {status_line}")
|
|
829
|
+
|
|
830
|
+
|
|
759
831
|
def run_pilot(
|
|
760
832
|
orchestrator: Any,
|
|
761
833
|
*,
|
|
@@ -812,7 +884,8 @@ def _run_pilot_repl(
|
|
|
812
884
|
})
|
|
813
885
|
|
|
814
886
|
renderer.reset_turn()
|
|
815
|
-
|
|
887
|
+
max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
|
|
888
|
+
results = run_until_response(orchestrator, renderer, max_steps=max_steps)
|
|
816
889
|
|
|
817
890
|
# Record checkpoint
|
|
818
891
|
state.record(
|
|
@@ -893,6 +966,10 @@ def _run_pilot_repl(
|
|
|
893
966
|
|
|
894
967
|
# :quit / :exit
|
|
895
968
|
if cmd in ("quit", "exit", "q"):
|
|
969
|
+
# Save state before exiting
|
|
970
|
+
if hasattr(orchestrator, "save_state"):
|
|
971
|
+
orchestrator.save_state()
|
|
972
|
+
renderer.status("[dim]State saved.[/]")
|
|
896
973
|
renderer.status("Goodbye!")
|
|
897
974
|
break
|
|
898
975
|
|
|
@@ -1085,6 +1162,20 @@ def _run_pilot_repl(
|
|
|
1085
1162
|
renderer.status(f"Reasoning display: {current}")
|
|
1086
1163
|
continue
|
|
1087
1164
|
|
|
1165
|
+
# :save
|
|
1166
|
+
if cmd == "save":
|
|
1167
|
+
if hasattr(orchestrator, "save_state"):
|
|
1168
|
+
orchestrator.save_state()
|
|
1169
|
+
instance_id = getattr(orchestrator, "instance_id", None)
|
|
1170
|
+
if instance_id:
|
|
1171
|
+
renderer.status(f"[green]✓[/] State saved (instance: {instance_id[:8]})")
|
|
1172
|
+
renderer.status(f" [dim]Resume with: zwarm pilot --resume --instance {instance_id[:8]}[/]")
|
|
1173
|
+
else:
|
|
1174
|
+
renderer.status("[green]✓[/] State saved")
|
|
1175
|
+
else:
|
|
1176
|
+
renderer.error("State saving not available")
|
|
1177
|
+
continue
|
|
1178
|
+
|
|
1088
1179
|
# Unknown command
|
|
1089
1180
|
renderer.error(f"Unknown command: {cmd}")
|
|
1090
1181
|
renderer.status("Type :help for available commands.")
|
|
@@ -1101,8 +1192,9 @@ def _run_pilot_repl(
|
|
|
1101
1192
|
|
|
1102
1193
|
# Execute steps until agent responds with a message
|
|
1103
1194
|
renderer.reset_turn()
|
|
1195
|
+
max_steps = getattr(orchestrator.config.orchestrator, "max_steps_per_turn", 60)
|
|
1104
1196
|
try:
|
|
1105
|
-
results = run_until_response(orchestrator, renderer)
|
|
1197
|
+
results = run_until_response(orchestrator, renderer, max_steps=max_steps)
|
|
1106
1198
|
except Exception as e:
|
|
1107
1199
|
renderer.error(f"Step failed: {e}")
|
|
1108
1200
|
# Remove the user message on failure
|
|
@@ -1124,6 +1216,10 @@ def _run_pilot_repl(
|
|
|
1124
1216
|
},
|
|
1125
1217
|
)
|
|
1126
1218
|
|
|
1219
|
+
# Save state for resume capability
|
|
1220
|
+
if hasattr(orchestrator, "save_state"):
|
|
1221
|
+
orchestrator.save_state()
|
|
1222
|
+
|
|
1127
1223
|
# Show turn info
|
|
1128
1224
|
cp = state.current()
|
|
1129
1225
|
if cp:
|
|
@@ -1139,4 +1235,6 @@ def _run_pilot_repl(
|
|
|
1139
1235
|
if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
|
|
1140
1236
|
renderer.status("")
|
|
1141
1237
|
renderer.status("Orchestrator signaled completion.")
|
|
1238
|
+
if hasattr(orchestrator, "save_state"):
|
|
1239
|
+
orchestrator.save_state()
|
|
1142
1240
|
break
|
zwarm/core/config.py
CHANGED
|
@@ -37,6 +37,7 @@ class ExecutorConfig:
|
|
|
37
37
|
sandbox: str = "workspace-write" # read-only | workspace-write | danger-full-access
|
|
38
38
|
timeout: int = 3600
|
|
39
39
|
reasoning_effort: str | None = "high" # low | medium | high (default to high for compatibility)
|
|
40
|
+
# Note: web_search is always enabled via .codex/config.toml (set up by `zwarm init`)
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
@dataclass
|
|
@@ -59,8 +60,8 @@ class OrchestratorConfig:
|
|
|
59
60
|
prompt: str | None = None # path to prompt yaml
|
|
60
61
|
tools: list[str] = field(default_factory=lambda: ["delegate", "converse", "check_session", "end_session", "bash"])
|
|
61
62
|
max_steps: int = 50
|
|
63
|
+
max_steps_per_turn: int = 60 # Max tool-call steps before returning to user (pilot mode)
|
|
62
64
|
parallel_delegations: int = 4
|
|
63
|
-
sync_first: bool = True # prefer sync mode by default
|
|
64
65
|
compaction: CompactionConfig = field(default_factory=CompactionConfig)
|
|
65
66
|
|
|
66
67
|
# Directory restrictions for agent delegations
|
|
@@ -172,8 +173,8 @@ class ZwarmConfig:
|
|
|
172
173
|
"prompt": self.orchestrator.prompt,
|
|
173
174
|
"tools": self.orchestrator.tools,
|
|
174
175
|
"max_steps": self.orchestrator.max_steps,
|
|
176
|
+
"max_steps_per_turn": self.orchestrator.max_steps_per_turn,
|
|
175
177
|
"parallel_delegations": self.orchestrator.parallel_delegations,
|
|
176
|
-
"sync_first": self.orchestrator.sync_first,
|
|
177
178
|
"compaction": {
|
|
178
179
|
"enabled": self.orchestrator.compaction.enabled,
|
|
179
180
|
"max_tokens": self.orchestrator.compaction.max_tokens,
|
|
@@ -195,15 +196,16 @@ class ZwarmConfig:
|
|
|
195
196
|
}
|
|
196
197
|
|
|
197
198
|
|
|
198
|
-
def load_env(path: Path | None = None) -> None:
|
|
199
|
+
def load_env(path: Path | None = None, base_dir: Path | None = None) -> None:
|
|
199
200
|
"""Load .env file if it exists."""
|
|
200
201
|
if path is None:
|
|
201
|
-
|
|
202
|
+
base = base_dir or Path.cwd()
|
|
203
|
+
path = base / ".env"
|
|
202
204
|
if path.exists():
|
|
203
205
|
load_dotenv(path)
|
|
204
206
|
|
|
205
207
|
|
|
206
|
-
def load_toml_config(path: Path | None = None) -> dict[str, Any]:
|
|
208
|
+
def load_toml_config(path: Path | None = None, base_dir: Path | None = None) -> dict[str, Any]:
|
|
207
209
|
"""
|
|
208
210
|
Load config.toml file.
|
|
209
211
|
|
|
@@ -211,11 +213,16 @@ def load_toml_config(path: Path | None = None) -> dict[str, Any]:
|
|
|
211
213
|
1. Explicit path (if provided)
|
|
212
214
|
2. .zwarm/config.toml (new standard location)
|
|
213
215
|
3. config.toml (legacy location for backwards compat)
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
path: Explicit path to config.toml
|
|
219
|
+
base_dir: Base directory to search in (defaults to cwd)
|
|
214
220
|
"""
|
|
215
221
|
if path is None:
|
|
222
|
+
base = base_dir or Path.cwd()
|
|
216
223
|
# Try new location first
|
|
217
|
-
new_path =
|
|
218
|
-
legacy_path =
|
|
224
|
+
new_path = base / ".zwarm" / "config.toml"
|
|
225
|
+
legacy_path = base / "config.toml"
|
|
219
226
|
if new_path.exists():
|
|
220
227
|
path = new_path
|
|
221
228
|
elif legacy_path.exists():
|
|
@@ -306,6 +313,7 @@ def load_config(
|
|
|
306
313
|
toml_path: Path | None = None,
|
|
307
314
|
env_path: Path | None = None,
|
|
308
315
|
overrides: list[str] | None = None,
|
|
316
|
+
working_dir: Path | None = None,
|
|
309
317
|
) -> ZwarmConfig:
|
|
310
318
|
"""
|
|
311
319
|
Load configuration with full precedence chain:
|
|
@@ -314,15 +322,24 @@ def load_config(
|
|
|
314
322
|
3. YAML config file (if provided)
|
|
315
323
|
4. CLI overrides (--set key=value)
|
|
316
324
|
5. Environment variables (for secrets)
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
config_path: Path to YAML config file
|
|
328
|
+
toml_path: Explicit path to config.toml
|
|
329
|
+
env_path: Explicit path to .env file
|
|
330
|
+
overrides: CLI overrides (--set key=value)
|
|
331
|
+
working_dir: Working directory to search for config files (defaults to cwd).
|
|
332
|
+
This is important when using --working-dir flag to ensure
|
|
333
|
+
config is loaded from the project directory, not invoke directory.
|
|
317
334
|
"""
|
|
318
335
|
# Load .env first (for secrets)
|
|
319
|
-
load_env(env_path)
|
|
336
|
+
load_env(env_path, base_dir=working_dir)
|
|
320
337
|
|
|
321
338
|
# Start with defaults
|
|
322
339
|
config_dict: dict[str, Any] = {}
|
|
323
340
|
|
|
324
341
|
# Layer in config.toml
|
|
325
|
-
toml_config = load_toml_config(toml_path)
|
|
342
|
+
toml_config = load_toml_config(toml_path, base_dir=working_dir)
|
|
326
343
|
if toml_config:
|
|
327
344
|
config_dict = deep_merge(config_dict, toml_config)
|
|
328
345
|
|
zwarm/core/costs.py
CHANGED
|
@@ -1,109 +1,42 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Token cost estimation for LLM models.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
This module re-exports from the centralized model registry.
|
|
5
|
+
For adding new models, edit: zwarm/core/registry.py
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
- https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
|
|
8
|
-
- https://pricepertoken.com/pricing-page/model/openai-codex-mini
|
|
7
|
+
Backwards-compatible API preserved for existing code.
|
|
9
8
|
"""
|
|
10
9
|
|
|
11
10
|
from __future__ import annotations
|
|
12
11
|
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
input_cost = (input_tokens / 1_000_000) * self.input_per_million
|
|
42
|
-
output_cost = (output_tokens / 1_000_000) * self.output_per_million
|
|
43
|
-
|
|
44
|
-
cached_cost = 0.0
|
|
45
|
-
if cached_tokens and self.cached_input_per_million:
|
|
46
|
-
cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
|
|
47
|
-
|
|
48
|
-
return input_cost + output_cost + cached_cost
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Model pricing table ($ per million tokens)
|
|
52
|
-
# Last updated: 2026-01
|
|
53
|
-
MODEL_PRICING: dict[str, ModelPricing] = {
|
|
54
|
-
# OpenAI Codex models
|
|
55
|
-
"gpt-5.1-codex": ModelPricing(
|
|
56
|
-
input_per_million=1.25,
|
|
57
|
-
output_per_million=10.00,
|
|
58
|
-
cached_input_per_million=0.125, # 90% discount for cached
|
|
59
|
-
),
|
|
60
|
-
"gpt-5.1-codex-mini": ModelPricing(
|
|
61
|
-
input_per_million=0.25,
|
|
62
|
-
output_per_million=2.00,
|
|
63
|
-
cached_input_per_million=0.025,
|
|
64
|
-
),
|
|
65
|
-
"gpt-5.1-codex-max": ModelPricing(
|
|
66
|
-
input_per_million=1.25,
|
|
67
|
-
output_per_million=10.00,
|
|
68
|
-
cached_input_per_million=0.125,
|
|
69
|
-
),
|
|
70
|
-
# GPT-5 base models (for reference)
|
|
71
|
-
"gpt-5": ModelPricing(
|
|
72
|
-
input_per_million=1.25,
|
|
73
|
-
output_per_million=10.00,
|
|
74
|
-
),
|
|
75
|
-
"gpt-5-mini": ModelPricing(
|
|
76
|
-
input_per_million=0.25,
|
|
77
|
-
output_per_million=2.00,
|
|
78
|
-
),
|
|
79
|
-
# Claude models (Anthropic)
|
|
80
|
-
"claude-sonnet-4-20250514": ModelPricing(
|
|
81
|
-
input_per_million=3.00,
|
|
82
|
-
output_per_million=15.00,
|
|
83
|
-
),
|
|
84
|
-
"claude-opus-4-20250514": ModelPricing(
|
|
85
|
-
input_per_million=15.00,
|
|
86
|
-
output_per_million=75.00,
|
|
87
|
-
),
|
|
88
|
-
"claude-3-5-sonnet-20241022": ModelPricing(
|
|
89
|
-
input_per_million=3.00,
|
|
90
|
-
output_per_million=15.00,
|
|
91
|
-
),
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
# Aliases for common model names
|
|
95
|
-
MODEL_ALIASES: dict[str, str] = {
|
|
96
|
-
"codex": "gpt-5.1-codex",
|
|
97
|
-
"codex-mini": "gpt-5.1-codex-mini",
|
|
98
|
-
"codex-max": "gpt-5.1-codex-max",
|
|
99
|
-
"gpt5": "gpt-5",
|
|
100
|
-
"gpt5-mini": "gpt-5-mini",
|
|
101
|
-
"sonnet": "claude-sonnet-4-20250514",
|
|
102
|
-
"opus": "claude-opus-4-20250514",
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def get_pricing(model: str) -> ModelPricing | None:
|
|
12
|
+
# Re-export everything from registry for backwards compatibility
|
|
13
|
+
from zwarm.core.registry import (
|
|
14
|
+
ModelInfo,
|
|
15
|
+
MODELS,
|
|
16
|
+
resolve_model,
|
|
17
|
+
get_adapter_for_model,
|
|
18
|
+
get_default_model,
|
|
19
|
+
list_models,
|
|
20
|
+
list_adapters,
|
|
21
|
+
get_models_help_text,
|
|
22
|
+
get_models_table_data,
|
|
23
|
+
estimate_cost,
|
|
24
|
+
format_cost,
|
|
25
|
+
estimate_session_cost,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Backwards compatibility alias
|
|
29
|
+
ModelPricing = ModelInfo
|
|
30
|
+
|
|
31
|
+
# Legacy aliases for backwards compatibility
|
|
32
|
+
MODEL_PRICING = {m.canonical: m for m in MODELS}
|
|
33
|
+
MODEL_ALIASES = {}
|
|
34
|
+
for m in MODELS:
|
|
35
|
+
for alias in m.aliases:
|
|
36
|
+
MODEL_ALIASES[alias] = m.canonical
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_pricing(model: str) -> ModelInfo | None:
|
|
107
40
|
"""
|
|
108
41
|
Get pricing for a model.
|
|
109
42
|
|
|
@@ -111,89 +44,28 @@ def get_pricing(model: str) -> ModelPricing | None:
|
|
|
111
44
|
model: Model name or alias
|
|
112
45
|
|
|
113
46
|
Returns:
|
|
114
|
-
|
|
115
|
-
"""
|
|
116
|
-
# Check aliases first
|
|
117
|
-
resolved = MODEL_ALIASES.get(model.lower(), model)
|
|
118
|
-
|
|
119
|
-
# Exact match
|
|
120
|
-
if resolved in MODEL_PRICING:
|
|
121
|
-
return MODEL_PRICING[resolved]
|
|
122
|
-
|
|
123
|
-
# Try lowercase
|
|
124
|
-
if resolved.lower() in MODEL_PRICING:
|
|
125
|
-
return MODEL_PRICING[resolved.lower()]
|
|
126
|
-
|
|
127
|
-
# Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
|
|
128
|
-
for known_model in MODEL_PRICING:
|
|
129
|
-
if resolved.lower().startswith(known_model.lower()):
|
|
130
|
-
return MODEL_PRICING[known_model]
|
|
131
|
-
|
|
132
|
-
return None
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def estimate_cost(
|
|
136
|
-
model: str,
|
|
137
|
-
input_tokens: int,
|
|
138
|
-
output_tokens: int,
|
|
139
|
-
cached_tokens: int = 0,
|
|
140
|
-
) -> float | None:
|
|
47
|
+
ModelInfo or None if unknown
|
|
141
48
|
"""
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
""
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
""
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
return f"${cost:.4f}"
|
|
166
|
-
elif cost < 1.00:
|
|
167
|
-
return f"${cost:.3f}"
|
|
168
|
-
else:
|
|
169
|
-
return f"${cost:.2f}"
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def estimate_session_cost(
|
|
173
|
-
model: str,
|
|
174
|
-
token_usage: dict[str, Any],
|
|
175
|
-
) -> dict[str, Any]:
|
|
176
|
-
"""
|
|
177
|
-
Estimate cost for a session given its token usage.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
model: Model used
|
|
181
|
-
token_usage: Dict with input_tokens, output_tokens, etc.
|
|
182
|
-
|
|
183
|
-
Returns:
|
|
184
|
-
Dict with cost info: {cost, cost_formatted, pricing_known}
|
|
185
|
-
"""
|
|
186
|
-
input_tokens = token_usage.get("input_tokens", 0)
|
|
187
|
-
output_tokens = token_usage.get("output_tokens", 0)
|
|
188
|
-
cached_tokens = token_usage.get("cached_tokens", 0)
|
|
189
|
-
|
|
190
|
-
cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
|
|
191
|
-
|
|
192
|
-
return {
|
|
193
|
-
"cost": cost,
|
|
194
|
-
"cost_formatted": format_cost(cost),
|
|
195
|
-
"pricing_known": cost is not None,
|
|
196
|
-
"model": model,
|
|
197
|
-
"input_tokens": input_tokens,
|
|
198
|
-
"output_tokens": output_tokens,
|
|
199
|
-
}
|
|
49
|
+
return resolve_model(model)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# New API
|
|
54
|
+
"ModelInfo",
|
|
55
|
+
"MODELS",
|
|
56
|
+
"resolve_model",
|
|
57
|
+
"get_adapter_for_model",
|
|
58
|
+
"get_default_model",
|
|
59
|
+
"list_models",
|
|
60
|
+
"list_adapters",
|
|
61
|
+
"get_models_help_text",
|
|
62
|
+
"get_models_table_data",
|
|
63
|
+
"estimate_cost",
|
|
64
|
+
"format_cost",
|
|
65
|
+
"estimate_session_cost",
|
|
66
|
+
# Legacy API
|
|
67
|
+
"MODEL_PRICING",
|
|
68
|
+
"MODEL_ALIASES",
|
|
69
|
+
"ModelPricing",
|
|
70
|
+
"get_pricing",
|
|
71
|
+
]
|