zwarm 3.4.0__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/cli/interactive.py +420 -52
- zwarm/cli/main.py +127 -14
- zwarm/cli/pilot.py +52 -4
- zwarm/core/costs.py +55 -183
- zwarm/core/environment.py +55 -1
- zwarm/core/registry.py +329 -0
- zwarm/orchestrator.py +64 -12
- zwarm/sessions/__init__.py +48 -9
- zwarm/sessions/base.py +501 -0
- zwarm/sessions/claude.py +481 -0
- zwarm/sessions/manager.py +85 -458
- zwarm/tools/delegation.py +126 -61
- {zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/METADATA +70 -21
- {zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/RECORD +16 -13
- {zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/WHEEL +0 -0
- {zwarm-3.4.0.dist-info → zwarm-3.7.0.dist-info}/entry_points.txt +0 -0
zwarm/cli/main.py
CHANGED
|
@@ -299,7 +299,8 @@ def pilot(
|
|
|
299
299
|
config: Annotated[Optional[Path], typer.Option("--config", "-c", help="Path to config YAML")] = None,
|
|
300
300
|
overrides: Annotated[Optional[list[str]], typer.Option("--set", help="Override config (key=value)")] = None,
|
|
301
301
|
working_dir: Annotated[Path, typer.Option("--working-dir", "-w", help="Working directory")] = Path("."),
|
|
302
|
-
|
|
302
|
+
resume: Annotated[bool, typer.Option("--resume", help="Resume from previous state")] = False,
|
|
303
|
+
instance: Annotated[Optional[str], typer.Option("--instance", "-i", help="Instance ID (for isolation/resume)")] = None,
|
|
303
304
|
instance_name: Annotated[Optional[str], typer.Option("--name", "-n", help="Human-readable instance name")] = None,
|
|
304
305
|
model: Annotated[PilotLM, typer.Option("--model", "-m", help="LM to use")] = PilotLM.gpt5_verbose,
|
|
305
306
|
):
|
|
@@ -341,19 +342,30 @@ def pilot(
|
|
|
341
342
|
|
|
342
343
|
[dim]# Named instance[/]
|
|
343
344
|
$ zwarm pilot --name my-feature
|
|
345
|
+
|
|
346
|
+
[dim]# Resume a previous session[/]
|
|
347
|
+
$ zwarm pilot --resume --instance abc123
|
|
344
348
|
"""
|
|
345
349
|
from zwarm.cli.pilot import run_pilot, build_pilot_orchestrator
|
|
346
350
|
|
|
347
351
|
# Resolve task (optional for pilot)
|
|
348
352
|
resolved_task = _resolve_task(task, task_file)
|
|
349
353
|
|
|
350
|
-
|
|
354
|
+
# Validate resume requirements
|
|
355
|
+
if resume and not instance:
|
|
356
|
+
console.print("[red]Error:[/] --resume requires --instance to specify which session to resume")
|
|
357
|
+
console.print(" [dim]Use 'zwarm instances' to list available instances[/]")
|
|
358
|
+
raise typer.Exit(1)
|
|
359
|
+
|
|
360
|
+
console.print(f"[bold]{'Resuming' if resume else 'Starting'} pilot session...[/]")
|
|
351
361
|
console.print(f" Working dir: {working_dir.absolute()}")
|
|
352
362
|
console.print(f" Model: {model.value}")
|
|
353
363
|
if resolved_task:
|
|
354
364
|
console.print(f" Initial task: {resolved_task[:60]}...")
|
|
355
365
|
if instance:
|
|
356
366
|
console.print(f" Instance: {instance}" + (f" ({instance_name})" if instance_name else ""))
|
|
367
|
+
if resume:
|
|
368
|
+
console.print(f" [yellow]Resuming from saved state...[/]")
|
|
357
369
|
console.print()
|
|
358
370
|
|
|
359
371
|
orchestrator = None
|
|
@@ -371,6 +383,12 @@ def pilot(
|
|
|
371
383
|
if orchestrator.instance_id and not instance:
|
|
372
384
|
console.print(f" [dim]Instance: {orchestrator.instance_id[:8]}[/]")
|
|
373
385
|
|
|
386
|
+
# Resume from saved state if requested
|
|
387
|
+
if resume:
|
|
388
|
+
orchestrator.load_state()
|
|
389
|
+
msg_count = len(orchestrator.messages)
|
|
390
|
+
console.print(f" [green]✓[/] Resumed with {msg_count} messages")
|
|
391
|
+
|
|
374
392
|
# Run the pilot REPL
|
|
375
393
|
run_pilot(orchestrator, initial_task=resolved_task)
|
|
376
394
|
|
|
@@ -917,6 +935,23 @@ def init(
|
|
|
917
935
|
codex_toml_path.write_text(codex_content)
|
|
918
936
|
console.print(f" [green]✓[/] Created .zwarm/codex.toml")
|
|
919
937
|
|
|
938
|
+
# Create claude.toml for isolated Claude Code configuration
|
|
939
|
+
claude_toml_path = state_dir / "claude.toml"
|
|
940
|
+
write_claude_toml = True
|
|
941
|
+
if claude_toml_path.exists():
|
|
942
|
+
if not non_interactive:
|
|
943
|
+
overwrite_claude = typer.confirm(" .zwarm/claude.toml exists. Overwrite?", default=False)
|
|
944
|
+
if not overwrite_claude:
|
|
945
|
+
write_claude_toml = False
|
|
946
|
+
console.print(" [dim]Skipping claude.toml[/]")
|
|
947
|
+
else:
|
|
948
|
+
write_claude_toml = False # Don't overwrite in non-interactive mode
|
|
949
|
+
|
|
950
|
+
if write_claude_toml:
|
|
951
|
+
claude_content = _generate_claude_toml(model="sonnet")
|
|
952
|
+
claude_toml_path.write_text(claude_content)
|
|
953
|
+
console.print(f" [green]✓[/] Created .zwarm/claude.toml")
|
|
954
|
+
|
|
920
955
|
# Create zwarm.yaml
|
|
921
956
|
if create_project_config:
|
|
922
957
|
if zwarm_yaml_path.exists() and not non_interactive:
|
|
@@ -959,41 +994,87 @@ def _generate_config_toml(
|
|
|
959
994
|
adapter: str = "codex_mcp",
|
|
960
995
|
watchers: list[str] | None = None,
|
|
961
996
|
) -> str:
|
|
962
|
-
"""Generate config.toml content."""
|
|
997
|
+
"""Generate config.toml content with all options at their defaults."""
|
|
963
998
|
watchers = watchers or []
|
|
964
999
|
|
|
965
1000
|
lines = [
|
|
966
1001
|
"# zwarm configuration",
|
|
967
1002
|
"# Generated by 'zwarm init'",
|
|
1003
|
+
"# All values shown are defaults - uncomment and modify as needed",
|
|
968
1004
|
"",
|
|
1005
|
+
"# ============================================================================",
|
|
1006
|
+
"# Weave Integration (optional tracing/observability)",
|
|
1007
|
+
"# ============================================================================",
|
|
969
1008
|
"[weave]",
|
|
970
1009
|
]
|
|
971
1010
|
|
|
972
1011
|
if weave_project:
|
|
973
1012
|
lines.append(f'project = "{weave_project}"')
|
|
974
1013
|
else:
|
|
975
|
-
lines.append(
|
|
1014
|
+
lines.append('# project = "your-entity/your-project" # Uncomment to enable Weave tracing')
|
|
976
1015
|
|
|
977
1016
|
lines.extend([
|
|
1017
|
+
"enabled = true",
|
|
978
1018
|
"",
|
|
1019
|
+
"# ============================================================================",
|
|
1020
|
+
"# Orchestrator Settings",
|
|
1021
|
+
"# ============================================================================",
|
|
979
1022
|
"[orchestrator]",
|
|
980
|
-
|
|
1023
|
+
'# lm = "gpt-5-mini" # LLM for orchestrator (gpt-5-mini, gpt-5, claude-sonnet-4)',
|
|
1024
|
+
"max_steps = 50 # Max steps for orchestrate command",
|
|
1025
|
+
"max_steps_per_turn = 60 # Max steps per turn in pilot mode",
|
|
1026
|
+
"parallel_delegations = 4 # Max concurrent delegations",
|
|
1027
|
+
'# prompt = "path/to/prompt.yaml" # Custom prompt file (optional)',
|
|
1028
|
+
'# allowed_dirs = ["*"] # Directories agent can delegate to (default: working_dir only)',
|
|
1029
|
+
"",
|
|
1030
|
+
"# Context window compaction (prevents overflow on long tasks)",
|
|
1031
|
+
"[orchestrator.compaction]",
|
|
1032
|
+
"enabled = true",
|
|
1033
|
+
"max_tokens = 100000 # Trigger compaction above this",
|
|
1034
|
+
"threshold_pct = 0.85 # Compact when at this % of max_tokens",
|
|
1035
|
+
"target_pct = 0.7 # Target this % after compaction",
|
|
1036
|
+
"keep_first_n = 2 # Always keep first N messages (system + task)",
|
|
1037
|
+
"keep_last_n = 10 # Always keep last N messages (recent context)",
|
|
981
1038
|
"",
|
|
1039
|
+
"# ============================================================================",
|
|
1040
|
+
"# Executor Settings (codex agent configuration)",
|
|
1041
|
+
"# ============================================================================",
|
|
982
1042
|
"[executor]",
|
|
983
|
-
f'adapter = "{adapter}"',
|
|
984
|
-
|
|
985
|
-
|
|
1043
|
+
f'adapter = "{adapter}" # codex_mcp | codex_exec | claude_code',
|
|
1044
|
+
'# model = "gpt-5.1-codex-mini" # Model for delegated sessions (uses codex.toml default if not set)',
|
|
1045
|
+
'sandbox = "workspace-write" # read-only | workspace-write | danger-full-access',
|
|
1046
|
+
"timeout = 3600 # Session timeout in seconds",
|
|
1047
|
+
'reasoning_effort = "high" # low | medium | high',
|
|
986
1048
|
"",
|
|
1049
|
+
"# ============================================================================",
|
|
1050
|
+
"# Watchers (automated monitoring and nudges)",
|
|
1051
|
+
"# ============================================================================",
|
|
987
1052
|
"[watchers]",
|
|
988
|
-
f"enabled = {watchers}",
|
|
1053
|
+
f"enabled = {str(bool(watchers)).lower()}",
|
|
1054
|
+
'message_role = "user" # Role for nudge messages: user | assistant | system',
|
|
989
1055
|
"",
|
|
990
|
-
"#
|
|
991
|
-
"#
|
|
992
|
-
"
|
|
1056
|
+
"# Default watchers: progress, budget, delegation_reminder",
|
|
1057
|
+
"# Uncomment below to customize:",
|
|
1058
|
+
"",
|
|
1059
|
+
"# [[watchers.watchers]]",
|
|
1060
|
+
'# name = "progress"',
|
|
1061
|
+
"# enabled = true",
|
|
1062
|
+
"",
|
|
1063
|
+
"# [[watchers.watchers]]",
|
|
1064
|
+
'# name = "budget"',
|
|
1065
|
+
"# enabled = true",
|
|
1066
|
+
"# [watchers.watchers.config]",
|
|
1067
|
+
"# max_sessions = 10",
|
|
993
1068
|
"# warn_at_percent = 80",
|
|
994
1069
|
"",
|
|
995
|
-
"# [watchers.
|
|
996
|
-
|
|
1070
|
+
"# [[watchers.watchers]]",
|
|
1071
|
+
'# name = "delegation_reminder"',
|
|
1072
|
+
"# enabled = true",
|
|
1073
|
+
"",
|
|
1074
|
+
"# ============================================================================",
|
|
1075
|
+
"# State Directory",
|
|
1076
|
+
"# ============================================================================",
|
|
1077
|
+
'# state_dir = ".zwarm" # Where to store session data',
|
|
997
1078
|
"",
|
|
998
1079
|
])
|
|
999
1080
|
|
|
@@ -1042,6 +1123,38 @@ def _generate_codex_toml(
|
|
|
1042
1123
|
return "\n".join(lines)
|
|
1043
1124
|
|
|
1044
1125
|
|
|
1126
|
+
def _generate_claude_toml(
|
|
1127
|
+
model: str = "sonnet",
|
|
1128
|
+
) -> str:
|
|
1129
|
+
"""
|
|
1130
|
+
Generate claude.toml for isolated Claude Code configuration.
|
|
1131
|
+
|
|
1132
|
+
This file is parsed by zwarm and settings are passed to claude via CLI flags.
|
|
1133
|
+
Each .zwarm directory has its own claude config.
|
|
1134
|
+
"""
|
|
1135
|
+
lines = [
|
|
1136
|
+
"# Claude Code configuration for zwarm",
|
|
1137
|
+
"# zwarm parses this file and passes settings to claude via CLI flags",
|
|
1138
|
+
"# Each .zwarm dir has its own config",
|
|
1139
|
+
"# Generated by 'zwarm init'",
|
|
1140
|
+
"",
|
|
1141
|
+
"# Model settings",
|
|
1142
|
+
f'model = "{model}" # sonnet | opus | haiku',
|
|
1143
|
+
"",
|
|
1144
|
+
"# DANGER MODE - bypasses all permission checks",
|
|
1145
|
+
"# Set to true to use --dangerously-skip-permissions",
|
|
1146
|
+
"full_danger = true",
|
|
1147
|
+
"",
|
|
1148
|
+
"# Note: Claude Code uses different CLI flags than Codex",
|
|
1149
|
+
"# Common options:",
|
|
1150
|
+
"# --model <model> Model to use (sonnet, opus, haiku)",
|
|
1151
|
+
"# --add-dir <path> Additional directories to allow",
|
|
1152
|
+
"# --allowed-tools <tools> Restrict available tools",
|
|
1153
|
+
"",
|
|
1154
|
+
]
|
|
1155
|
+
return "\n".join(lines)
|
|
1156
|
+
|
|
1157
|
+
|
|
1045
1158
|
def _generate_zwarm_yaml(
|
|
1046
1159
|
description: str = "",
|
|
1047
1160
|
context: str = "",
|
zwarm/cli/pilot.py
CHANGED
|
@@ -81,14 +81,23 @@ class ChoogingSpinner:
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
# Context window sizes for different models (in tokens)
|
|
84
|
+
# These are for the ORCHESTRATOR LLM, not the executors
|
|
84
85
|
MODEL_CONTEXT_WINDOWS = {
|
|
86
|
+
# OpenAI models
|
|
85
87
|
"gpt-5.1-codex": 200_000,
|
|
86
88
|
"gpt-5.1-codex-mini": 200_000,
|
|
87
89
|
"gpt-5.1-codex-max": 400_000,
|
|
88
90
|
"gpt-5": 200_000,
|
|
89
91
|
"gpt-5-mini": 200_000,
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
+
"o3": 200_000,
|
|
93
|
+
"o3-mini": 200_000,
|
|
94
|
+
# Claude models (if used as orchestrator)
|
|
95
|
+
"claude-sonnet": 200_000,
|
|
96
|
+
"claude-opus": 200_000,
|
|
97
|
+
"claude-haiku": 200_000,
|
|
98
|
+
"sonnet": 200_000,
|
|
99
|
+
"opus": 200_000,
|
|
100
|
+
"haiku": 200_000,
|
|
92
101
|
# Fallback
|
|
93
102
|
"default": 128_000,
|
|
94
103
|
}
|
|
@@ -204,11 +213,12 @@ def build_pilot_orchestrator(
|
|
|
204
213
|
# Build pilot system prompt
|
|
205
214
|
system_prompt = get_pilot_prompt(working_dir=str(working_dir))
|
|
206
215
|
|
|
207
|
-
# Create lean orchestrator environment
|
|
216
|
+
# Create lean orchestrator environment (pilot mode = simpler observation)
|
|
208
217
|
env = OrchestratorEnv(
|
|
209
218
|
task="", # No task - pilot is conversational
|
|
210
219
|
working_dir=working_dir,
|
|
211
220
|
)
|
|
221
|
+
env.set_pilot_mode(True) # Human is in control, use lean observation
|
|
212
222
|
|
|
213
223
|
# Create orchestrator with ONLY delegation tools (no bash)
|
|
214
224
|
orchestrator = Orchestrator(
|
|
@@ -612,6 +622,15 @@ def execute_step_with_events(
|
|
|
612
622
|
# Execute invoke (calls LLM)
|
|
613
623
|
response = orchestrator.invoke()
|
|
614
624
|
|
|
625
|
+
# Track cumulative token usage from the API response
|
|
626
|
+
# (This mirrors what step() does in orchestrator.py)
|
|
627
|
+
if hasattr(orchestrator, "_last_response") and orchestrator._last_response:
|
|
628
|
+
last_response = orchestrator._last_response
|
|
629
|
+
if hasattr(last_response, "usage") and last_response.usage:
|
|
630
|
+
usage = last_response.usage
|
|
631
|
+
tokens_this_call = getattr(usage, "total_tokens", 0)
|
|
632
|
+
orchestrator._total_tokens = getattr(orchestrator, "_total_tokens", 0) + tokens_this_call
|
|
633
|
+
|
|
615
634
|
# Extract and render events from response
|
|
616
635
|
if response:
|
|
617
636
|
events = extract_events_from_response(response)
|
|
@@ -739,7 +758,12 @@ def print_help(renderer: EventRenderer) -> None:
|
|
|
739
758
|
" :goto <turn|root> Jump to a prior turn (e.g., :goto T1)",
|
|
740
759
|
" :sessions Show executor sessions",
|
|
741
760
|
" :reasoning [on|off] Toggle reasoning display",
|
|
742
|
-
" :
|
|
761
|
+
" :save Save state (for later resume)",
|
|
762
|
+
" :quit / :exit Exit the pilot (auto-saves)",
|
|
763
|
+
"",
|
|
764
|
+
"Resume:",
|
|
765
|
+
" State is auto-saved after each turn. To resume a session:",
|
|
766
|
+
" $ zwarm pilot --resume --instance <instance_id>",
|
|
743
767
|
"",
|
|
744
768
|
"Multiline input:",
|
|
745
769
|
' Start with """ and end with """ to enter multiple lines.',
|
|
@@ -943,6 +967,10 @@ def _run_pilot_repl(
|
|
|
943
967
|
|
|
944
968
|
# :quit / :exit
|
|
945
969
|
if cmd in ("quit", "exit", "q"):
|
|
970
|
+
# Save state before exiting
|
|
971
|
+
if hasattr(orchestrator, "save_state"):
|
|
972
|
+
orchestrator.save_state()
|
|
973
|
+
renderer.status("[dim]State saved.[/]")
|
|
946
974
|
renderer.status("Goodbye!")
|
|
947
975
|
break
|
|
948
976
|
|
|
@@ -1135,6 +1163,20 @@ def _run_pilot_repl(
|
|
|
1135
1163
|
renderer.status(f"Reasoning display: {current}")
|
|
1136
1164
|
continue
|
|
1137
1165
|
|
|
1166
|
+
# :save
|
|
1167
|
+
if cmd == "save":
|
|
1168
|
+
if hasattr(orchestrator, "save_state"):
|
|
1169
|
+
orchestrator.save_state()
|
|
1170
|
+
instance_id = getattr(orchestrator, "instance_id", None)
|
|
1171
|
+
if instance_id:
|
|
1172
|
+
renderer.status(f"[green]✓[/] State saved (instance: {instance_id[:8]})")
|
|
1173
|
+
renderer.status(f" [dim]Resume with: zwarm pilot --resume --instance {instance_id[:8]}[/]")
|
|
1174
|
+
else:
|
|
1175
|
+
renderer.status("[green]✓[/] State saved")
|
|
1176
|
+
else:
|
|
1177
|
+
renderer.error("State saving not available")
|
|
1178
|
+
continue
|
|
1179
|
+
|
|
1138
1180
|
# Unknown command
|
|
1139
1181
|
renderer.error(f"Unknown command: {cmd}")
|
|
1140
1182
|
renderer.status("Type :help for available commands.")
|
|
@@ -1175,6 +1217,10 @@ def _run_pilot_repl(
|
|
|
1175
1217
|
},
|
|
1176
1218
|
)
|
|
1177
1219
|
|
|
1220
|
+
# Save state for resume capability
|
|
1221
|
+
if hasattr(orchestrator, "save_state"):
|
|
1222
|
+
orchestrator.save_state()
|
|
1223
|
+
|
|
1178
1224
|
# Show turn info
|
|
1179
1225
|
cp = state.current()
|
|
1180
1226
|
if cp:
|
|
@@ -1190,4 +1236,6 @@ def _run_pilot_repl(
|
|
|
1190
1236
|
if hasattr(orchestrator, "stopCondition") and orchestrator.stopCondition:
|
|
1191
1237
|
renderer.status("")
|
|
1192
1238
|
renderer.status("Orchestrator signaled completion.")
|
|
1239
|
+
if hasattr(orchestrator, "save_state"):
|
|
1240
|
+
orchestrator.save_state()
|
|
1193
1241
|
break
|
zwarm/core/costs.py
CHANGED
|
@@ -1,109 +1,42 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Token cost estimation for LLM models.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
This module re-exports from the centralized model registry.
|
|
5
|
+
For adding new models, edit: zwarm/core/registry.py
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
- https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
|
|
8
|
-
- https://pricepertoken.com/pricing-page/model/openai-codex-mini
|
|
7
|
+
Backwards-compatible API preserved for existing code.
|
|
9
8
|
"""
|
|
10
9
|
|
|
11
10
|
from __future__ import annotations
|
|
12
11
|
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
input_cost = (input_tokens / 1_000_000) * self.input_per_million
|
|
42
|
-
output_cost = (output_tokens / 1_000_000) * self.output_per_million
|
|
43
|
-
|
|
44
|
-
cached_cost = 0.0
|
|
45
|
-
if cached_tokens and self.cached_input_per_million:
|
|
46
|
-
cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
|
|
47
|
-
|
|
48
|
-
return input_cost + output_cost + cached_cost
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Model pricing table ($ per million tokens)
|
|
52
|
-
# Last updated: 2026-01
|
|
53
|
-
MODEL_PRICING: dict[str, ModelPricing] = {
|
|
54
|
-
# OpenAI Codex models
|
|
55
|
-
"gpt-5.1-codex": ModelPricing(
|
|
56
|
-
input_per_million=1.25,
|
|
57
|
-
output_per_million=10.00,
|
|
58
|
-
cached_input_per_million=0.125, # 90% discount for cached
|
|
59
|
-
),
|
|
60
|
-
"gpt-5.1-codex-mini": ModelPricing(
|
|
61
|
-
input_per_million=0.25,
|
|
62
|
-
output_per_million=2.00,
|
|
63
|
-
cached_input_per_million=0.025,
|
|
64
|
-
),
|
|
65
|
-
"gpt-5.1-codex-max": ModelPricing(
|
|
66
|
-
input_per_million=1.25,
|
|
67
|
-
output_per_million=10.00,
|
|
68
|
-
cached_input_per_million=0.125,
|
|
69
|
-
),
|
|
70
|
-
# GPT-5 base models (for reference)
|
|
71
|
-
"gpt-5": ModelPricing(
|
|
72
|
-
input_per_million=1.25,
|
|
73
|
-
output_per_million=10.00,
|
|
74
|
-
),
|
|
75
|
-
"gpt-5-mini": ModelPricing(
|
|
76
|
-
input_per_million=0.25,
|
|
77
|
-
output_per_million=2.00,
|
|
78
|
-
),
|
|
79
|
-
# Claude models (Anthropic)
|
|
80
|
-
"claude-sonnet-4-20250514": ModelPricing(
|
|
81
|
-
input_per_million=3.00,
|
|
82
|
-
output_per_million=15.00,
|
|
83
|
-
),
|
|
84
|
-
"claude-opus-4-20250514": ModelPricing(
|
|
85
|
-
input_per_million=15.00,
|
|
86
|
-
output_per_million=75.00,
|
|
87
|
-
),
|
|
88
|
-
"claude-3-5-sonnet-20241022": ModelPricing(
|
|
89
|
-
input_per_million=3.00,
|
|
90
|
-
output_per_million=15.00,
|
|
91
|
-
),
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
# Aliases for common model names
|
|
95
|
-
MODEL_ALIASES: dict[str, str] = {
|
|
96
|
-
"codex": "gpt-5.1-codex",
|
|
97
|
-
"codex-mini": "gpt-5.1-codex-mini",
|
|
98
|
-
"codex-max": "gpt-5.1-codex-max",
|
|
99
|
-
"gpt5": "gpt-5",
|
|
100
|
-
"gpt5-mini": "gpt-5-mini",
|
|
101
|
-
"sonnet": "claude-sonnet-4-20250514",
|
|
102
|
-
"opus": "claude-opus-4-20250514",
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def get_pricing(model: str) -> ModelPricing | None:
|
|
12
|
+
# Re-export everything from registry for backwards compatibility
|
|
13
|
+
from zwarm.core.registry import (
|
|
14
|
+
ModelInfo,
|
|
15
|
+
MODELS,
|
|
16
|
+
resolve_model,
|
|
17
|
+
get_adapter_for_model,
|
|
18
|
+
get_default_model,
|
|
19
|
+
list_models,
|
|
20
|
+
list_adapters,
|
|
21
|
+
get_models_help_text,
|
|
22
|
+
get_models_table_data,
|
|
23
|
+
estimate_cost,
|
|
24
|
+
format_cost,
|
|
25
|
+
estimate_session_cost,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Backwards compatibility alias
|
|
29
|
+
ModelPricing = ModelInfo
|
|
30
|
+
|
|
31
|
+
# Legacy aliases for backwards compatibility
|
|
32
|
+
MODEL_PRICING = {m.canonical: m for m in MODELS}
|
|
33
|
+
MODEL_ALIASES = {}
|
|
34
|
+
for m in MODELS:
|
|
35
|
+
for alias in m.aliases:
|
|
36
|
+
MODEL_ALIASES[alias] = m.canonical
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_pricing(model: str) -> ModelInfo | None:
|
|
107
40
|
"""
|
|
108
41
|
Get pricing for a model.
|
|
109
42
|
|
|
@@ -111,89 +44,28 @@ def get_pricing(model: str) -> ModelPricing | None:
|
|
|
111
44
|
model: Model name or alias
|
|
112
45
|
|
|
113
46
|
Returns:
|
|
114
|
-
|
|
115
|
-
"""
|
|
116
|
-
# Check aliases first
|
|
117
|
-
resolved = MODEL_ALIASES.get(model.lower(), model)
|
|
118
|
-
|
|
119
|
-
# Exact match
|
|
120
|
-
if resolved in MODEL_PRICING:
|
|
121
|
-
return MODEL_PRICING[resolved]
|
|
122
|
-
|
|
123
|
-
# Try lowercase
|
|
124
|
-
if resolved.lower() in MODEL_PRICING:
|
|
125
|
-
return MODEL_PRICING[resolved.lower()]
|
|
126
|
-
|
|
127
|
-
# Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
|
|
128
|
-
for known_model in MODEL_PRICING:
|
|
129
|
-
if resolved.lower().startswith(known_model.lower()):
|
|
130
|
-
return MODEL_PRICING[known_model]
|
|
131
|
-
|
|
132
|
-
return None
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def estimate_cost(
|
|
136
|
-
model: str,
|
|
137
|
-
input_tokens: int,
|
|
138
|
-
output_tokens: int,
|
|
139
|
-
cached_tokens: int = 0,
|
|
140
|
-
) -> float | None:
|
|
47
|
+
ModelInfo or None if unknown
|
|
141
48
|
"""
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
""
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
""
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
return f"${cost:.4f}"
|
|
166
|
-
elif cost < 1.00:
|
|
167
|
-
return f"${cost:.3f}"
|
|
168
|
-
else:
|
|
169
|
-
return f"${cost:.2f}"
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def estimate_session_cost(
|
|
173
|
-
model: str,
|
|
174
|
-
token_usage: dict[str, Any],
|
|
175
|
-
) -> dict[str, Any]:
|
|
176
|
-
"""
|
|
177
|
-
Estimate cost for a session given its token usage.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
model: Model used
|
|
181
|
-
token_usage: Dict with input_tokens, output_tokens, etc.
|
|
182
|
-
|
|
183
|
-
Returns:
|
|
184
|
-
Dict with cost info: {cost, cost_formatted, pricing_known}
|
|
185
|
-
"""
|
|
186
|
-
input_tokens = token_usage.get("input_tokens", 0)
|
|
187
|
-
output_tokens = token_usage.get("output_tokens", 0)
|
|
188
|
-
cached_tokens = token_usage.get("cached_tokens", 0)
|
|
189
|
-
|
|
190
|
-
cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
|
|
191
|
-
|
|
192
|
-
return {
|
|
193
|
-
"cost": cost,
|
|
194
|
-
"cost_formatted": format_cost(cost),
|
|
195
|
-
"pricing_known": cost is not None,
|
|
196
|
-
"model": model,
|
|
197
|
-
"input_tokens": input_tokens,
|
|
198
|
-
"output_tokens": output_tokens,
|
|
199
|
-
}
|
|
49
|
+
return resolve_model(model)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# New API
|
|
54
|
+
"ModelInfo",
|
|
55
|
+
"MODELS",
|
|
56
|
+
"resolve_model",
|
|
57
|
+
"get_adapter_for_model",
|
|
58
|
+
"get_default_model",
|
|
59
|
+
"list_models",
|
|
60
|
+
"list_adapters",
|
|
61
|
+
"get_models_help_text",
|
|
62
|
+
"get_models_table_data",
|
|
63
|
+
"estimate_cost",
|
|
64
|
+
"format_cost",
|
|
65
|
+
"estimate_session_cost",
|
|
66
|
+
# Legacy API
|
|
67
|
+
"MODEL_PRICING",
|
|
68
|
+
"MODEL_ALIASES",
|
|
69
|
+
"ModelPricing",
|
|
70
|
+
"get_pricing",
|
|
71
|
+
]
|
zwarm/core/environment.py
CHANGED
|
@@ -52,6 +52,9 @@ class OrchestratorEnv(Environment):
|
|
|
52
52
|
# Budget config (set from config)
|
|
53
53
|
_budget_max_sessions: int | None = PrivateAttr(default=None)
|
|
54
54
|
|
|
55
|
+
# Pilot mode: simpler observation since human is in control
|
|
56
|
+
_pilot_mode: bool = PrivateAttr(default=False)
|
|
57
|
+
|
|
55
58
|
def set_session_manager(self, manager: "CodexSessionManager") -> None:
|
|
56
59
|
"""Set the session manager for live session visibility in observe()."""
|
|
57
60
|
self._session_manager = manager
|
|
@@ -77,18 +80,69 @@ class OrchestratorEnv(Environment):
|
|
|
77
80
|
"""Set budget limits from config."""
|
|
78
81
|
self._budget_max_sessions = max_sessions
|
|
79
82
|
|
|
83
|
+
def set_pilot_mode(self, enabled: bool = True) -> None:
|
|
84
|
+
"""
|
|
85
|
+
Enable pilot mode for simpler env observation.
|
|
86
|
+
|
|
87
|
+
In pilot mode, the human is in control and can use :status/:sessions
|
|
88
|
+
commands to see detailed progress. The LLM only needs a brief context.
|
|
89
|
+
"""
|
|
90
|
+
self._pilot_mode = enabled
|
|
91
|
+
|
|
80
92
|
def observe(self) -> str:
|
|
81
93
|
"""
|
|
82
94
|
Return observable state for the orchestrator.
|
|
83
95
|
|
|
84
|
-
|
|
96
|
+
In full mode (autonomous orchestrator):
|
|
85
97
|
- Progress (steps, tokens)
|
|
86
98
|
- Session summary (pulled LIVE from CodexSessionManager)
|
|
87
99
|
- Active sessions with their status
|
|
88
100
|
- Working directory
|
|
89
101
|
|
|
102
|
+
In pilot mode (human in control):
|
|
103
|
+
- Brief session status (just what's active)
|
|
104
|
+
- Working directory
|
|
105
|
+
|
|
90
106
|
Note: Task is NOT included here as it's already in the user message.
|
|
91
107
|
"""
|
|
108
|
+
if self._pilot_mode:
|
|
109
|
+
return self._observe_pilot()
|
|
110
|
+
return self._observe_full()
|
|
111
|
+
|
|
112
|
+
def _observe_pilot(self) -> str:
|
|
113
|
+
"""Lean observation for pilot mode (human is in control)."""
|
|
114
|
+
parts = []
|
|
115
|
+
|
|
116
|
+
# Brief session status - just enough for context
|
|
117
|
+
if self._session_manager is not None:
|
|
118
|
+
sessions = self._session_manager.list_sessions()
|
|
119
|
+
|
|
120
|
+
running = [s for s in sessions if s.status.value == "running"]
|
|
121
|
+
if running:
|
|
122
|
+
session_lines = []
|
|
123
|
+
for s in running:
|
|
124
|
+
task_preview = s.task[:40] + "..." if len(s.task) > 40 else s.task
|
|
125
|
+
session_lines.append(f" • {s.short_id}: {task_preview}")
|
|
126
|
+
parts.append("## Active Sessions\n" + "\n".join(session_lines))
|
|
127
|
+
|
|
128
|
+
# Just show counts for completed/failed
|
|
129
|
+
completed = sum(1 for s in sessions if s.status.value == "completed")
|
|
130
|
+
failed = sum(1 for s in sessions if s.status.value == "failed")
|
|
131
|
+
if completed or failed:
|
|
132
|
+
status = []
|
|
133
|
+
if completed:
|
|
134
|
+
status.append(f"{completed} completed")
|
|
135
|
+
if failed:
|
|
136
|
+
status.append(f"{failed} failed")
|
|
137
|
+
parts.append(f"Previous: {', '.join(status)}")
|
|
138
|
+
|
|
139
|
+
# Working directory
|
|
140
|
+
parts.append(f"Working dir: {self.working_dir.absolute()}")
|
|
141
|
+
|
|
142
|
+
return "\n\n".join(parts) if parts else ""
|
|
143
|
+
|
|
144
|
+
def _observe_full(self) -> str:
|
|
145
|
+
"""Full observation for autonomous orchestrator runs."""
|
|
92
146
|
parts = []
|
|
93
147
|
|
|
94
148
|
# Progress bar and stats
|