zwarm 3.9.0__tar.gz → 3.10.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {zwarm-3.9.0 → zwarm-3.10.2}/PKG-INFO +2 -1
  2. {zwarm-3.9.0 → zwarm-3.10.2}/pyproject.toml +2 -1
  3. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/cli/interactive.py +4 -3
  4. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/cli/main.py +12 -12
  5. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/cli/pilot.py +2 -1
  6. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/registry.py +21 -20
  7. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/prompts/pilot.py +16 -8
  8. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/tools/delegation.py +32 -16
  9. {zwarm-3.9.0 → zwarm-3.10.2}/.gitignore +0 -0
  10. {zwarm-3.9.0 → zwarm-3.10.2}/README.md +0 -0
  11. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/__init__.py +0 -0
  12. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/cli/__init__.py +0 -0
  13. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/__init__.py +0 -0
  14. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/checkpoints.py +0 -0
  15. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/compact.py +0 -0
  16. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/config.py +0 -0
  17. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/costs.py +0 -0
  18. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/environment.py +0 -0
  19. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/models.py +0 -0
  20. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/state.py +0 -0
  21. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/test_compact.py +0 -0
  22. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/test_config.py +0 -0
  23. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/core/test_models.py +0 -0
  24. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/orchestrator.py +0 -0
  25. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/prompts/__init__.py +0 -0
  26. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/prompts/orchestrator.py +0 -0
  27. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/sessions/__init__.py +0 -0
  28. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/sessions/base.py +0 -0
  29. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/sessions/claude.py +0 -0
  30. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/sessions/manager.py +0 -0
  31. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/test_orchestrator_watchers.py +0 -0
  32. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/tools/__init__.py +0 -0
  33. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/__init__.py +0 -0
  34. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/base.py +0 -0
  35. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/builtin.py +0 -0
  36. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/llm_watcher.py +0 -0
  37. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/manager.py +0 -0
  38. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/registry.py +0 -0
  39. {zwarm-3.9.0 → zwarm-3.10.2}/src/zwarm/watchers/test_watchers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 3.9.0
3
+ Version: 3.10.2
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
6
  Requires-Dist: prompt-toolkit>=3.0.52
@@ -8,6 +8,7 @@ Requires-Dist: python-dotenv>=1.0.0
8
8
  Requires-Dist: pyyaml>=6.0
9
9
  Requires-Dist: rich>=13.0.0
10
10
  Requires-Dist: typer>=0.9.0
11
+ Requires-Dist: wandb>=0.24.0
11
12
  Requires-Dist: wbal>=0.5.8
12
13
  Description-Content-Type: text/markdown
13
14
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "zwarm"
3
- version = "3.9.0"
3
+ version = "3.10.2"
4
4
  description = "Multi-Agent CLI Orchestration Research Platform"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13,<3.14"
@@ -11,6 +11,7 @@ dependencies = [
11
11
  "pyyaml>=6.0",
12
12
  "wbal>=0.5.8",
13
13
  "prompt-toolkit>=3.0.52",
14
+ "wandb>=0.24.0",
14
15
  ]
15
16
 
16
17
  [project.scripts]
@@ -157,7 +157,7 @@ def cmd_help():
157
157
  table.add_column("Description")
158
158
 
159
159
  table.add_row("[bold]Session Lifecycle[/]", "")
160
- table.add_row('spawn "task" [--model M] [--adapter A]', "Start new session")
160
+ table.add_row('spawn "task" [--model M]', "Start new session")
161
161
  table.add_row('c ID "message"', "Continue conversation")
162
162
  table.add_row("kill ID | all", "Stop session(s)")
163
163
  table.add_row("rm ID | all", "Delete session(s)")
@@ -832,7 +832,7 @@ def cmd_rm(manager, target: str):
832
832
 
833
833
  def run_interactive(
834
834
  working_dir: Path,
835
- model: str = "gpt-5.1-codex-mini",
835
+ model: str = "gpt-5.2-codex",
836
836
  ):
837
837
  """
838
838
  Run the interactive REPL.
@@ -1020,7 +1020,8 @@ def run_interactive(
1020
1020
 
1021
1021
  elif cmd == "spawn":
1022
1022
  if not args:
1023
- console.print(" [red]Usage:[/] spawn \"task\" [--model M] [--adapter A]")
1023
+ console.print(" [red]Usage:[/] spawn \"task\" [--model M]")
1024
+ console.print(" [dim]Models: 5.2, 5.2-think, opus, sonnet (adapter auto-detected)[/]")
1024
1025
  else:
1025
1026
  # Parse spawn args
1026
1027
  task_parts = []
@@ -441,7 +441,7 @@ def exec(
441
441
  console.print(f" Model: {model}")
442
442
 
443
443
  manager = CodexSessionManager(working_dir / ".zwarm")
444
- effective_model = model or "gpt-5.1-codex-mini"
444
+ effective_model = model or "gpt-5.2-codex"
445
445
 
446
446
  session = manager.start_session(
447
447
  task=task,
@@ -813,7 +813,7 @@ def init(
813
813
  project_description = ""
814
814
  project_context = ""
815
815
  # Codex settings
816
- codex_model = "gpt-5.1-codex-mini"
816
+ codex_model = "gpt-5.2-codex"
817
817
  codex_reasoning = "high"
818
818
 
819
819
  if not non_interactive:
@@ -838,9 +838,9 @@ def init(
838
838
  console.print(" [dim]These control the underlying Codex CLI that runs executor sessions[/]\n")
839
839
 
840
840
  console.print(" Available models:")
841
- console.print(" [cyan]1[/] gpt-5.1-codex-mini [dim]- Fast, cheap, good for most tasks (Recommended)[/]")
842
- console.print(" [cyan]2[/] gpt-5.1-codex [dim]- Balanced speed and capability[/]")
843
- console.print(" [cyan]3[/] gpt-5.1-codex-max [dim]- Most capable, 400k context, expensive[/]")
841
+ console.print(" [cyan]1[/] gpt-5.2-codex [dim]- GPT-5.2 Codex, balanced (Recommended)[/]")
842
+ console.print(" [cyan]2[/] gpt-5.2 [dim]- GPT-5.2 with extended reasoning[/]")
843
+ console.print(" [cyan]3[/] gpt-5.1-codex [dim]- GPT-5.1 Codex (legacy)[/]")
844
844
 
845
845
  model_choice = typer.prompt(
846
846
  " Select model (1-3)",
@@ -848,9 +848,9 @@ def init(
848
848
  type=str,
849
849
  )
850
850
  model_map = {
851
- "1": "gpt-5.1-codex-mini",
852
- "2": "gpt-5.1-codex",
853
- "3": "gpt-5.1-codex-max",
851
+ "1": "gpt-5.2-codex",
852
+ "2": "gpt-5.2",
853
+ "3": "gpt-5.1-codex",
854
854
  }
855
855
  codex_model = model_map.get(model_choice, model_choice)
856
856
  if model_choice not in model_map:
@@ -1041,7 +1041,7 @@ def _generate_config_toml(
1041
1041
  "# ============================================================================",
1042
1042
  "[executor]",
1043
1043
  f'adapter = "{adapter}" # codex_mcp | codex_exec | claude_code',
1044
- '# model = "gpt-5.1-codex-mini" # Model for delegated sessions (uses codex.toml default if not set)',
1044
+ '# model = "gpt-5.2-codex" # Model for delegated sessions (uses codex.toml default if not set)',
1045
1045
  'sandbox = "workspace-write" # read-only | workspace-write | danger-full-access',
1046
1046
  "timeout = 3600 # Session timeout in seconds",
1047
1047
  'reasoning_effort = "high" # low | medium | high',
@@ -1082,7 +1082,7 @@ def _generate_config_toml(
1082
1082
 
1083
1083
 
1084
1084
  def _generate_codex_toml(
1085
- model: str = "gpt-5.1-codex-mini",
1085
+ model: str = "gpt-5.2-codex",
1086
1086
  reasoning_effort: str = "high",
1087
1087
  ) -> str:
1088
1088
  """
@@ -1461,7 +1461,7 @@ def interactive(
1461
1461
  """
1462
1462
  from zwarm.cli.interactive import run_interactive
1463
1463
 
1464
- default_model = model or "gpt-5.1-codex-mini"
1464
+ default_model = model or "gpt-5.2-codex"
1465
1465
  run_interactive(working_dir=default_dir.absolute(), model=default_model)
1466
1466
 
1467
1467
 
@@ -1655,7 +1655,7 @@ app.add_typer(session_app, name="session")
1655
1655
  def session_start(
1656
1656
  task: Annotated[str, typer.Argument(help="Task description")],
1657
1657
  working_dir: Annotated[Path, typer.Option("--dir", "-d", help="Working directory")] = Path("."),
1658
- model: Annotated[str, typer.Option("--model", "-m", help="Model to use")] = "gpt-5.1-codex-mini",
1658
+ model: Annotated[str, typer.Option("--model", "-m", help="Model to use")] = "gpt-5.2-codex",
1659
1659
  ):
1660
1660
  """
1661
1661
  Start a new Codex session in the background.
@@ -84,9 +84,10 @@ class ChoogingSpinner:
84
84
  # These are for the ORCHESTRATOR LLM, not the executors
85
85
  MODEL_CONTEXT_WINDOWS = {
86
86
  # OpenAI models
87
+ "gpt-5.2-codex": 200_000,
88
+ "gpt-5.2": 200_000,
87
89
  "gpt-5.1-codex": 200_000,
88
90
  "gpt-5.1-codex-mini": 200_000,
89
- "gpt-5.1-codex-max": 400_000,
90
91
  "gpt-5": 200_000,
91
92
  "gpt-5-mini": 200_000,
92
93
  "o3": 200_000,
@@ -61,34 +61,43 @@ MODELS: list[ModelInfo] = [
61
61
  # -------------------------------------------------------------------------
62
62
  # OpenAI Codex Models (via `codex` CLI)
63
63
  # -------------------------------------------------------------------------
64
+ ModelInfo(
65
+ canonical="gpt-5.2-codex",
66
+ adapter="codex",
67
+ aliases=["5.2", "5.2-codex"],
68
+ input_per_million=1.50,
69
+ output_per_million=12.00,
70
+ cached_input_per_million=0.15,
71
+ description="GPT-5.2 Codex - balanced speed/capability",
72
+ is_default=True,
73
+ ),
74
+ ModelInfo(
75
+ canonical="gpt-5.2",
76
+ adapter="codex",
77
+ aliases=["5.2-think", "gpt5.2"],
78
+ input_per_million=2.00,
79
+ output_per_million=16.00,
80
+ cached_input_per_million=0.20,
81
+ description="GPT-5.2 with extended reasoning (xhigh)",
82
+ ),
64
83
  ModelInfo(
65
84
  canonical="gpt-5.1-codex-mini",
66
85
  adapter="codex",
67
- aliases=["codex-mini", "mini"],
86
+ aliases=["codex-mini", "mini", "5.1-mini"],
68
87
  input_per_million=0.25,
69
88
  output_per_million=2.00,
70
89
  cached_input_per_million=0.025,
71
90
  description="Fast, cost-effective coding model",
72
- is_default=True,
73
91
  ),
74
92
  ModelInfo(
75
93
  canonical="gpt-5.1-codex",
76
94
  adapter="codex",
77
- aliases=["codex", "codex-full"],
95
+ aliases=["codex", "codex-full", "5.1"],
78
96
  input_per_million=1.25,
79
97
  output_per_million=10.00,
80
98
  cached_input_per_million=0.125,
81
99
  description="Full Codex model with extended reasoning",
82
100
  ),
83
- ModelInfo(
84
- canonical="gpt-5.1-codex-max",
85
- adapter="codex",
86
- aliases=["codex-max", "max"],
87
- input_per_million=1.25,
88
- output_per_million=10.00,
89
- cached_input_per_million=0.125,
90
- description="Maximum context Codex model",
91
- ),
92
101
  # -------------------------------------------------------------------------
93
102
  # Anthropic Claude Models (via `claude` CLI)
94
103
  # -------------------------------------------------------------------------
@@ -109,14 +118,6 @@ MODELS: list[ModelInfo] = [
109
118
  output_per_million=75.00,
110
119
  description="Most capable Claude model",
111
120
  ),
112
- ModelInfo(
113
- canonical="haiku",
114
- adapter="claude",
115
- aliases=["claude-haiku", "claude-4-haiku"],
116
- input_per_million=0.25,
117
- output_per_million=1.25,
118
- description="Fast, lightweight Claude model",
119
- ),
120
121
  ]
121
122
 
122
123
 
@@ -44,10 +44,13 @@ You command executor agents - capable coding agents that do specific tasks. Thin
44
44
 
45
45
  # Your Tools
46
46
 
47
- **delegate(task, adapter="codex", model=None, working_dir=None)** - Dispatch a crew member. Returns immediately with session_id.
48
- - `adapter`: "codex" (fast, great for code) or "claude" (powerful reasoning)
49
- - `model`: Override model (default: gpt-5.1-codex-mini for codex, sonnet for claude)
50
- - Use codex for most tasks - it's fast. Use claude for complex reasoning.
47
+ **delegate(task, model=None, working_dir=None)** - Dispatch a crew member. Returns immediately with session_id.
48
+ - `model`: Just use the model name - adapter is auto-detected:
49
+ - `"5.2"` or `"gpt-5.2-codex"` - GPT-5.2 Codex (default, fast, great for code)
50
+ - `"5.2-think"` - GPT-5.2 with extended reasoning
51
+ - `"opus"` - Claude Opus (most capable, complex reasoning)
52
+ - `"sonnet"` - Claude Sonnet (balanced)
53
+ - Use codex models for most tasks - they're fast. Use opus for complex reasoning.
51
54
 
52
55
  **converse(session_id, message)** - Send follow-up to a crew member. Returns immediately.
53
56
 
@@ -55,11 +58,16 @@ You command executor agents - capable coding agents that do specific tasks. Thin
55
58
 
56
59
  **check_session(session_id)** - Get FULL result. Complete response, tokens, runtime.
57
60
 
58
- **get_trajectory(session_id, full=False)** - See what steps the agent took (for debugging).
61
+ **get_trajectory(session_id, full=False)** - See what steps the agent took.
62
+ - `full=True`: Show complete untruncated content for all steps (debugging)
63
+ - `full=False`: Concise summaries (default)
59
64
 
60
- **list_sessions()** - See all crew. `needs_attention=True` means ready for review.
65
+ **list_sessions(status=None)** - See all crew. `needs_attention=True` means ready for review.
66
+ - `status`: Filter by "running", "completed", "failed", or None for all
61
67
 
62
- **end_session(session_id)** - Dismiss a crew member.
68
+ **end_session(session_id, reason=None, delete=False)** - Dismiss a crew member.
69
+ - `reason`: Optional note about why
70
+ - `delete=True`: Permanently remove from list (otherwise just kills if running)
63
71
 
64
72
  **sleep(seconds)** - Wait before checking. Give crew time to work (15-60s typical).
65
73
 
@@ -68,7 +76,7 @@ You command executor agents - capable coding agents that do specific tasks. Thin
68
76
  # Workflow
69
77
 
70
78
  ```
71
- 1. delegate(task) → session_id
79
+ 1. delegate(task, model="5.2") → session_id # or model="opus" for complex tasks
72
80
  2. sleep(30)
73
81
  3. peek_session(id) → done?
74
82
  4. If running, goto 2
@@ -163,17 +163,19 @@ def delegate(
163
163
  task: str,
164
164
  model: str | None = None,
165
165
  working_dir: str | None = None,
166
- adapter: str = "codex",
166
+ adapter: str | None = None,
167
167
  ) -> dict[str, Any]:
168
168
  """
169
169
  Delegate work to an executor agent. Returns immediately - sessions run async.
170
170
 
171
- Supports multiple adapters:
172
- - codex: OpenAI's Codex CLI (default, fast, good for code tasks)
173
- - claude: Claude Code CLI (powerful, good for complex reasoning)
171
+ Model shortcuts (adapter auto-detected):
172
+ - "5.2" or "gpt-5.2-codex": GPT-5.2 Codex (default, balanced)
173
+ - "5.2-think" or "gpt-5.2": GPT-5.2 with extended reasoning
174
+ - "sonnet": Claude Sonnet (balanced)
175
+ - "opus": Claude Opus (most capable)
174
176
 
175
177
  WORKFLOW:
176
- 1. delegate(task="...") -> session_id
178
+ 1. delegate(task="...", model="5.2") -> session_id
177
179
  2. sleep(30)
178
180
  3. peek_session(session_id) -> {is_running: true/false}
179
181
  4. If is_running, goto 2
@@ -181,22 +183,35 @@ def delegate(
181
183
 
182
184
  Args:
183
185
  task: Clear description of what to do. Be specific about requirements.
184
- model: Model override (codex: gpt-5.1-codex-mini, claude: sonnet).
186
+ model: Model name or alias. Adapter is auto-detected from model.
185
187
  working_dir: Directory for executor to work in (default: orchestrator's dir).
186
- adapter: Which executor to use - "codex" (default) or "claude".
188
+ adapter: Override adapter (usually auto-detected from model).
187
189
 
188
190
  Returns:
189
- {session_id, status: "running", adapter}
191
+ {session_id, status: "running", adapter, model}
190
192
 
191
193
  Example:
192
- delegate(task="Add a logout button to the navbar")
193
- delegate(task="Refactor auth to OAuth2", adapter="claude")
194
+ delegate(task="Add a logout button", model="5.2")
195
+ delegate(task="Complex refactor", model="opus")
194
196
  """
197
+ from zwarm.core.registry import get_adapter_for_model, resolve_model
198
+
199
+ # Auto-detect adapter from model if not specified
200
+ effective_adapter = adapter
201
+ if model and not adapter:
202
+ detected = get_adapter_for_model(model)
203
+ if detected:
204
+ effective_adapter = detected
205
+
206
+ # Fall back to codex if still no adapter
207
+ if not effective_adapter:
208
+ effective_adapter = "codex"
209
+
195
210
  # Validate adapter
196
- if adapter not in ADAPTERS:
211
+ if effective_adapter not in ADAPTERS:
197
212
  return {
198
213
  "success": False,
199
- "error": f"Unknown adapter: {adapter}. Available: {ADAPTERS}",
214
+ "error": f"Unknown adapter: {effective_adapter}. Available: {ADAPTERS}",
200
215
  "hint": f"Use one of: {ADAPTERS}",
201
216
  }
202
217
 
@@ -215,11 +230,12 @@ def delegate(
215
230
  }
216
231
 
217
232
  # Get the session manager for this adapter
218
- manager = _get_adapter_manager(self, adapter)
233
+ manager = _get_adapter_manager(self, effective_adapter)
219
234
 
220
- # Determine model (defaults vary by adapter)
235
+ # Determine model - resolve aliases to canonical names
221
236
  if model:
222
- effective_model = model
237
+ model_info = resolve_model(model)
238
+ effective_model = model_info.canonical if model_info else model
223
239
  elif self.config.executor.model:
224
240
  effective_model = self.config.executor.model
225
241
  else:
@@ -245,7 +261,7 @@ def delegate(
245
261
  "session_id": session.id,
246
262
  "status": "running",
247
263
  "task": _truncate(task, 100),
248
- "adapter": adapter,
264
+ "adapter": effective_adapter,
249
265
  "model": effective_model,
250
266
  "hint": "Use sleep() then check_session(session_id) to monitor progress",
251
267
  }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes