zwarm 3.4.0__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/core/registry.py ADDED
@@ -0,0 +1,329 @@
1
+ """
2
+ Model Registry - Centralized LLM model definitions for zwarm.
3
+
4
+ This registry defines all supported models with:
5
+ - Canonical names and aliases
6
+ - Adapter mapping (which CLI handles the model)
7
+ - Pricing information
8
+
9
+ Add new models here and they'll automatically appear in:
10
+ - `zwarm interactive` help and `models` command
11
+ - Cost estimation
12
+ - Adapter auto-detection from model name
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass, field
18
+ from typing import Any
19
+
20
+
21
+ @dataclass
22
+ class ModelInfo:
23
+ """Complete information about an LLM model."""
24
+
25
+ # Identity
26
+ canonical: str # Full model name (e.g., "gpt-5.1-codex-mini")
27
+ adapter: str # "codex" or "claude"
28
+ aliases: list[str] = field(default_factory=list) # Short names
29
+
30
+ # Pricing ($ per million tokens)
31
+ input_per_million: float = 0.0
32
+ output_per_million: float = 0.0
33
+ cached_input_per_million: float | None = None
34
+
35
+ # Metadata
36
+ description: str = ""
37
+ is_default: bool = False # Default model for this adapter
38
+
39
+ def estimate_cost(
40
+ self,
41
+ input_tokens: int,
42
+ output_tokens: int,
43
+ cached_tokens: int = 0,
44
+ ) -> float:
45
+ """Estimate cost in dollars."""
46
+ input_cost = (input_tokens / 1_000_000) * self.input_per_million
47
+ output_cost = (output_tokens / 1_000_000) * self.output_per_million
48
+
49
+ cached_cost = 0.0
50
+ if cached_tokens and self.cached_input_per_million:
51
+ cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
52
+
53
+ return input_cost + output_cost + cached_cost
54
+
55
+
56
+ # =============================================================================
57
+ # Model Registry - ADD NEW MODELS HERE
58
+ # =============================================================================
59
+
60
+ MODELS: list[ModelInfo] = [
61
+ # -------------------------------------------------------------------------
62
+ # OpenAI Codex Models (via `codex` CLI)
63
+ # -------------------------------------------------------------------------
64
+ ModelInfo(
65
+ canonical="gpt-5.1-codex-mini",
66
+ adapter="codex",
67
+ aliases=["codex-mini", "mini"],
68
+ input_per_million=0.25,
69
+ output_per_million=2.00,
70
+ cached_input_per_million=0.025,
71
+ description="Fast, cost-effective coding model",
72
+ is_default=True,
73
+ ),
74
+ ModelInfo(
75
+ canonical="gpt-5.1-codex",
76
+ adapter="codex",
77
+ aliases=["codex", "codex-full"],
78
+ input_per_million=1.25,
79
+ output_per_million=10.00,
80
+ cached_input_per_million=0.125,
81
+ description="Full Codex model with extended reasoning",
82
+ ),
83
+ ModelInfo(
84
+ canonical="gpt-5.1-codex-max",
85
+ adapter="codex",
86
+ aliases=["codex-max", "max"],
87
+ input_per_million=1.25,
88
+ output_per_million=10.00,
89
+ cached_input_per_million=0.125,
90
+ description="Maximum context Codex model",
91
+ ),
92
+ # -------------------------------------------------------------------------
93
+ # Anthropic Claude Models (via `claude` CLI)
94
+ # -------------------------------------------------------------------------
95
+ ModelInfo(
96
+ canonical="sonnet",
97
+ adapter="claude",
98
+ aliases=["claude-sonnet", "claude-4-sonnet"],
99
+ input_per_million=3.00,
100
+ output_per_million=15.00,
101
+ description="Balanced Claude model for most tasks",
102
+ is_default=True,
103
+ ),
104
+ ModelInfo(
105
+ canonical="opus",
106
+ adapter="claude",
107
+ aliases=["claude-opus", "claude-4-opus"],
108
+ input_per_million=15.00,
109
+ output_per_million=75.00,
110
+ description="Most capable Claude model",
111
+ ),
112
+ ModelInfo(
113
+ canonical="haiku",
114
+ adapter="claude",
115
+ aliases=["claude-haiku", "claude-4-haiku"],
116
+ input_per_million=0.25,
117
+ output_per_million=1.25,
118
+ description="Fast, lightweight Claude model",
119
+ ),
120
+ ]
121
+
122
+
123
+ # =============================================================================
124
+ # Registry Lookups
125
+ # =============================================================================
126
+
127
+
128
+ def _build_lookup_tables() -> tuple[dict[str, ModelInfo], dict[str, ModelInfo]]:
129
+ """Build lookup tables for fast model resolution."""
130
+ by_canonical: dict[str, ModelInfo] = {}
131
+ by_alias: dict[str, ModelInfo] = {}
132
+
133
+ for model in MODELS:
134
+ by_canonical[model.canonical.lower()] = model
135
+ by_alias[model.canonical.lower()] = model
136
+ for alias in model.aliases:
137
+ by_alias[alias.lower()] = model
138
+
139
+ return by_canonical, by_alias
140
+
141
+
142
+ _BY_CANONICAL, _BY_ALIAS = _build_lookup_tables()
143
+
144
+
145
+ def resolve_model(name: str) -> ModelInfo | None:
146
+ """
147
+ Resolve a model name or alias to its ModelInfo.
148
+
149
+ Args:
150
+ name: Model name, alias, or partial match
151
+
152
+ Returns:
153
+ ModelInfo or None if not found
154
+ """
155
+ name_lower = name.lower()
156
+
157
+ # Exact match on alias or canonical
158
+ if name_lower in _BY_ALIAS:
159
+ return _BY_ALIAS[name_lower]
160
+
161
+ # Prefix match (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
162
+ for canonical, model in _BY_CANONICAL.items():
163
+ if name_lower.startswith(canonical):
164
+ return model
165
+
166
+ return None
167
+
168
+
169
+ def get_adapter_for_model(name: str) -> str | None:
170
+ """
171
+ Get the adapter name for a model.
172
+
173
+ Args:
174
+ name: Model name or alias
175
+
176
+ Returns:
177
+ Adapter name ("codex" or "claude") or None if unknown
178
+ """
179
+ model = resolve_model(name)
180
+ return model.adapter if model else None
181
+
182
+
183
+ def get_default_model(adapter: str) -> str | None:
184
+ """
185
+ Get the default model for an adapter.
186
+
187
+ Args:
188
+ adapter: Adapter name ("codex" or "claude")
189
+
190
+ Returns:
191
+ Default model canonical name or None
192
+ """
193
+ for model in MODELS:
194
+ if model.adapter == adapter and model.is_default:
195
+ return model.canonical
196
+ return None
197
+
198
+
199
+ def list_models(adapter: str | None = None) -> list[ModelInfo]:
200
+ """
201
+ List available models.
202
+
203
+ Args:
204
+ adapter: Filter by adapter, or None for all
205
+
206
+ Returns:
207
+ List of ModelInfo objects
208
+ """
209
+ if adapter:
210
+ return [m for m in MODELS if m.adapter == adapter]
211
+ return MODELS.copy()
212
+
213
+
214
+ def list_adapters() -> list[str]:
215
+ """Get list of unique adapter names."""
216
+ return sorted(set(m.adapter for m in MODELS))
217
+
218
+
219
+ def get_models_help_text() -> str:
220
+ """
221
+ Generate help text showing all available models.
222
+
223
+ Returns formatted string for display in help messages.
224
+ """
225
+ lines = ["", "Available models:"]
226
+
227
+ for adapter in list_adapters():
228
+ lines.append(f"\n {adapter.upper()}:")
229
+ for model in list_models(adapter):
230
+ default_marker = " *" if model.is_default else ""
231
+ aliases = ", ".join(model.aliases) if model.aliases else ""
232
+ alias_str = f" ({aliases})" if aliases else ""
233
+
234
+ lines.append(f" {model.canonical}{alias_str}{default_marker}")
235
+
236
+ lines.append("\n * = default for adapter")
237
+ return "\n".join(lines)
238
+
239
+
240
+ def get_models_table_data() -> list[dict[str, Any]]:
241
+ """
242
+ Get model data formatted for table display.
243
+
244
+ Returns list of dicts with keys: adapter, model, aliases, default, price, description
245
+ """
246
+ data = []
247
+ for model in MODELS:
248
+ data.append({
249
+ "adapter": model.adapter,
250
+ "model": model.canonical,
251
+ "aliases": ", ".join(model.aliases),
252
+ "default": model.is_default,
253
+ "input_price": model.input_per_million,
254
+ "output_price": model.output_per_million,
255
+ "description": model.description,
256
+ })
257
+ return data
258
+
259
+
260
+ # =============================================================================
261
+ # Cost Estimation
262
+ # =============================================================================
263
+
264
+
265
+ def estimate_cost(
266
+ model: str,
267
+ input_tokens: int,
268
+ output_tokens: int,
269
+ cached_tokens: int = 0,
270
+ ) -> float | None:
271
+ """
272
+ Estimate cost for a model run.
273
+
274
+ Args:
275
+ model: Model name or alias
276
+ input_tokens: Number of input tokens
277
+ output_tokens: Number of output tokens
278
+ cached_tokens: Number of cached input tokens
279
+
280
+ Returns:
281
+ Cost in USD, or None if model unknown
282
+ """
283
+ model_info = resolve_model(model)
284
+ if model_info is None:
285
+ return None
286
+
287
+ return model_info.estimate_cost(input_tokens, output_tokens, cached_tokens)
288
+
289
+
290
+ def format_cost(cost: float | None) -> str:
291
+ """Format cost as a human-readable string."""
292
+ if cost is None:
293
+ return "?"
294
+ if cost < 0.01:
295
+ return f"${cost:.4f}"
296
+ elif cost < 1.00:
297
+ return f"${cost:.3f}"
298
+ else:
299
+ return f"${cost:.2f}"
300
+
301
+
302
+ def estimate_session_cost(
303
+ model: str,
304
+ token_usage: dict[str, Any],
305
+ ) -> dict[str, Any]:
306
+ """
307
+ Estimate cost for a session given its token usage.
308
+
309
+ Args:
310
+ model: Model used
311
+ token_usage: Dict with input_tokens, output_tokens, etc.
312
+
313
+ Returns:
314
+ Dict with cost info: {cost, cost_formatted, pricing_known, ...}
315
+ """
316
+ input_tokens = token_usage.get("input_tokens", 0)
317
+ output_tokens = token_usage.get("output_tokens", 0)
318
+ cached_tokens = token_usage.get("cached_tokens", 0)
319
+
320
+ cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
321
+
322
+ return {
323
+ "cost": cost,
324
+ "cost_formatted": format_cost(cost),
325
+ "pricing_known": cost is not None,
326
+ "model": model,
327
+ "input_tokens": input_tokens,
328
+ "output_tokens": output_tokens,
329
+ }
zwarm/orchestrator.py CHANGED
@@ -293,13 +293,60 @@ Review what was accomplished in the previous session and delegate new tasks as n
293
293
 
294
294
  def perceive(self) -> None:
295
295
  """
296
- Override perceive to refresh environment observation each step.
296
+ Override perceive to properly inject system prompt and environment observation.
297
297
 
298
- The base YamlAgent only adds env.observe() on step 0. We need to
299
- update it each step to show current progress, sessions, etc.
298
+ Fixes over base YamlAgent:
299
+ 1. Always injects system prompt on step 0, even if messages isn't empty
300
+ (pilot mode adds user messages before perceive runs)
301
+ 2. Only adds "Task: " message if there's actually a task (skips for pilot mode)
302
+ 3. Refreshes environment observation each step
303
+
304
+ Note: self.messages can contain both dict messages AND OpenAI response objects
305
+ (ResponseReasoningItem, ResponseMessageItem, etc.), so we must check isinstance().
300
306
  """
301
- # Let base class do initial setup
302
- super().perceive()
307
+ from datetime import datetime
308
+
309
+ def _is_dict_msg(msg, role: str | None = None, content_check: str | None = None) -> bool:
310
+ """Check if msg is a dict with optional role/content matching."""
311
+ if not isinstance(msg, dict):
312
+ return False
313
+ if role and msg.get("role") != role:
314
+ return False
315
+ if content_check and content_check not in msg.get("content", ""):
316
+ return False
317
+ return True
318
+
319
+ # On step 0, ensure system prompt is present
320
+ if self._step_count == 0:
321
+ # Check if system prompt already exists (avoid duplicates on resume)
322
+ has_system_prompt = False
323
+ if self.system_prompt:
324
+ prompt_snippet = self.system_prompt[:100]
325
+ has_system_prompt = any(
326
+ _is_dict_msg(msg, role="system", content_check=prompt_snippet)
327
+ for msg in self.messages
328
+ )
329
+
330
+ if not has_system_prompt and self.system_prompt:
331
+ today = datetime.now().strftime("%Y-%m-%d")
332
+ # Insert at beginning to ensure it's first
333
+ self.messages.insert(0, {
334
+ "role": "system",
335
+ "content": f"{self.system_prompt}\n\nToday's date: {today}",
336
+ })
337
+
338
+ # Add task message ONLY if we have a task (skip for pilot mode where task is empty)
339
+ task = getattr(self.env, "task", "")
340
+ if task:
341
+ # Check if Task message already exists (avoid duplicates)
342
+ has_task_msg = any(
343
+ isinstance(msg, dict)
344
+ and msg.get("role") == "user"
345
+ and msg.get("content", "").startswith("Task: ")
346
+ for msg in self.messages
347
+ )
348
+ if not has_task_msg:
349
+ self.messages.append({"role": "user", "content": f"Task: {task}"})
303
350
 
304
351
  # Update environment observation
305
352
  env_obs = (self.env.observe() or "").strip()
@@ -308,15 +355,20 @@ Review what was accomplished in the previous session and delegate new tasks as n
308
355
 
309
356
  # Find and update existing env observation, or append new one
310
357
  # Look for a system message containing our markers
311
- env_marker = "## Progress" # Our env observation has this
358
+ # Note: pilot mode uses "## Active Sessions", full mode uses "## Progress"
359
+ env_markers = ["## Progress", "## Active Sessions", "Working dir:"]
312
360
 
313
361
  for i, msg in enumerate(self.messages):
314
- if msg.get("role") == "system" and env_marker in msg.get("content", ""):
315
- # Update in place
316
- self.messages[i]["content"] = env_obs
317
- return
318
-
319
- # Not found - append as new system message (shouldn't happen after step 0)
362
+ if not isinstance(msg, dict):
363
+ continue
364
+ if msg.get("role") == "system":
365
+ content = msg.get("content", "")
366
+ if any(marker in content for marker in env_markers):
367
+ # Update in place
368
+ self.messages[i]["content"] = env_obs
369
+ return
370
+
371
+ # Not found - append as new system message
320
372
  self.messages.append({"role": "system", "content": env_obs})
321
373
 
322
374
  @weave.op()
@@ -1,26 +1,65 @@
1
1
  """
2
- Codex Session Manager.
2
+ Session Manager - Background process management for executor agents.
3
3
 
4
- A standalone session manager for running Codex agents in the background.
5
- Similar to Sculptor/Claude parallel tools but for Codex.
4
+ Supports multiple executor adapters:
5
+ - Codex (CodexSessionManager) - OpenAI's Codex CLI
6
+ - Claude (ClaudeSessionManager) - Anthropic's Claude Code CLI
6
7
 
7
8
  Features:
8
- - Start codex exec tasks in background processes
9
+ - Start executor tasks in background processes
9
10
  - Monitor status and view message history
10
11
  - Inject follow-up messages (continue conversations)
11
12
  - Kill running sessions
13
+ - Unified interface via BaseSessionManager
12
14
  """
13
15
 
14
- from zwarm.sessions.manager import (
15
- CodexSession,
16
- CodexSessionManager,
16
+ from zwarm.sessions.base import (
17
+ BaseSessionManager,
18
+ CodexSession, # Alias for Session (backwards compat)
19
+ Session,
17
20
  SessionMessage,
18
21
  SessionStatus,
19
22
  )
23
+ from zwarm.sessions.manager import CodexSessionManager
24
+
25
+ # Available adapters
26
+ AVAILABLE_ADAPTERS = ["codex", "claude"]
20
27
 
21
28
  __all__ = [
22
- "CodexSession",
23
- "CodexSessionManager",
29
+ # Base classes
30
+ "BaseSessionManager",
31
+ "Session",
24
32
  "SessionMessage",
25
33
  "SessionStatus",
34
+ # Backwards compatibility
35
+ "CodexSession",
36
+ # Adapters
37
+ "CodexSessionManager",
38
+ # Registry
39
+ "AVAILABLE_ADAPTERS",
40
+ # Factory
41
+ "get_session_manager",
26
42
  ]
43
+
44
+
45
+ def get_session_manager(adapter: str, state_dir: str = ".zwarm") -> BaseSessionManager:
46
+ """
47
+ Factory function to get a session manager for the given adapter.
48
+
49
+ Args:
50
+ adapter: Adapter name ("codex" or "claude")
51
+ state_dir: State directory path
52
+
53
+ Returns:
54
+ Session manager instance
55
+
56
+ Raises:
57
+ ValueError: If adapter is not recognized
58
+ """
59
+ if adapter == "codex":
60
+ return CodexSessionManager(state_dir)
61
+ elif adapter == "claude":
62
+ from zwarm.sessions.claude import ClaudeSessionManager
63
+ return ClaudeSessionManager(state_dir)
64
+ else:
65
+ raise ValueError(f"Unknown adapter: {adapter}. Available: {AVAILABLE_ADAPTERS}")