zwarm 3.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,216 @@
1
+ """
2
+ Checkpoint primitives for state management.
3
+
4
+ Provides time-travel capability by recording snapshots of state at key points.
5
+ Used by pilot for turn-by-turn checkpointing, and potentially by other
6
+ interfaces that need state restoration.
7
+
8
+ Topology reminder:
9
+ orchestrator → pilot → interactive → CodexSessionManager
10
+
11
+ These primitives sit at the core layer, usable by any interface above.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import copy
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from typing import Any
20
+
21
+
22
+ @dataclass
23
+ class Checkpoint:
24
+ """
25
+ A snapshot of state at a specific point in time.
26
+
27
+ Attributes:
28
+ checkpoint_id: Unique identifier (e.g., turn number)
29
+ label: Human-readable label (e.g., "T1", "T2")
30
+ description: What action led to this state
31
+ state: The actual state snapshot (deep-copied)
32
+ timestamp: When checkpoint was created
33
+ metadata: Optional extra data
34
+ """
35
+ checkpoint_id: int
36
+ label: str
37
+ description: str
38
+ state: dict[str, Any]
39
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
40
+ metadata: dict[str, Any] = field(default_factory=dict)
41
+
42
+
43
+ @dataclass
44
+ class CheckpointManager:
45
+ """
46
+ Manages checkpoints and time travel.
47
+
48
+ Maintains a list of checkpoints and a current position. Supports:
49
+ - Recording new checkpoints
50
+ - Jumping to any previous checkpoint
51
+ - Branching (going back and continuing creates new timeline)
52
+ - History inspection
53
+
54
+ Usage:
55
+ mgr = CheckpointManager()
56
+
57
+ # Record state after each action
58
+ mgr.record(description="Added auth", state={"messages": [...], ...})
59
+ mgr.record(description="Fixed bug", state={"messages": [...], ...})
60
+
61
+ # Jump back
62
+ cp = mgr.goto(1) # Go to first checkpoint
63
+ restored_state = cp.state
64
+
65
+ # Continue from there (branches off)
66
+ mgr.record(description="Different path", state={...})
67
+ """
68
+
69
+ checkpoints: list[Checkpoint] = field(default_factory=list)
70
+ current_index: int = -1 # -1 = root (before any checkpoints)
71
+ next_id: int = 1
72
+ label_prefix: str = "T" # Labels will be T1, T2, etc.
73
+
74
+ def record(
75
+ self,
76
+ description: str,
77
+ state: dict[str, Any],
78
+ metadata: dict[str, Any] | None = None,
79
+ ) -> Checkpoint:
80
+ """
81
+ Record a new checkpoint.
82
+
83
+ If not at the end of history (i.e., we've gone back), this creates
84
+ a branch - future checkpoints are discarded.
85
+
86
+ Args:
87
+ description: What action led to this state
88
+ state: State to snapshot (will be deep-copied)
89
+ metadata: Optional extra data
90
+
91
+ Returns:
92
+ The created checkpoint
93
+ """
94
+ checkpoint = Checkpoint(
95
+ checkpoint_id=self.next_id,
96
+ label=f"{self.label_prefix}{self.next_id}",
97
+ description=description,
98
+ state=copy.deepcopy(state),
99
+ metadata=metadata or {},
100
+ )
101
+
102
+ # If we're not at the end, we're branching - truncate future
103
+ if self.current_index < len(self.checkpoints) - 1:
104
+ self.checkpoints = self.checkpoints[:self.current_index + 1]
105
+
106
+ self.checkpoints.append(checkpoint)
107
+ self.current_index = len(self.checkpoints) - 1
108
+ self.next_id += 1
109
+
110
+ return checkpoint
111
+
112
+ def goto(self, checkpoint_id: int) -> Checkpoint | None:
113
+ """
114
+ Jump to a specific checkpoint.
115
+
116
+ Args:
117
+ checkpoint_id: The checkpoint ID to jump to (0 = root)
118
+
119
+ Returns:
120
+ The checkpoint, or None if not found (or root)
121
+ """
122
+ if checkpoint_id == 0:
123
+ # Root state - before any checkpoints
124
+ self.current_index = -1
125
+ return None
126
+
127
+ for i, cp in enumerate(self.checkpoints):
128
+ if cp.checkpoint_id == checkpoint_id:
129
+ self.current_index = i
130
+ return cp
131
+
132
+ return None # Not found
133
+
134
+ def goto_label(self, label: str) -> Checkpoint | None:
135
+ """
136
+ Jump to a checkpoint by label (e.g., "T1", "root").
137
+
138
+ Args:
139
+ label: The label to find
140
+
141
+ Returns:
142
+ The checkpoint, or None if not found
143
+ """
144
+ if label.lower() == "root":
145
+ self.current_index = -1
146
+ return None
147
+
148
+ for i, cp in enumerate(self.checkpoints):
149
+ if cp.label == label:
150
+ self.current_index = i
151
+ return cp
152
+
153
+ return None
154
+
155
+ def current(self) -> Checkpoint | None:
156
+ """Get the current checkpoint, or None if at root."""
157
+ if self.current_index < 0 or self.current_index >= len(self.checkpoints):
158
+ return None
159
+ return self.checkpoints[self.current_index]
160
+
161
+ def current_state(self) -> dict[str, Any] | None:
162
+ """Get the current state, or None if at root."""
163
+ cp = self.current()
164
+ return copy.deepcopy(cp.state) if cp else None
165
+
166
+ def history(
167
+ self,
168
+ limit: int | None = None,
169
+ include_state: bool = False,
170
+ ) -> list[dict[str, Any]]:
171
+ """
172
+ Get history entries for display.
173
+
174
+ Args:
175
+ limit: Max entries to return (most recent)
176
+ include_state: Whether to include full state in entries
177
+
178
+ Returns:
179
+ List of history entries with checkpoint info
180
+ """
181
+ entries = []
182
+ for i, cp in enumerate(self.checkpoints):
183
+ entry = {
184
+ "checkpoint_id": cp.checkpoint_id,
185
+ "label": cp.label,
186
+ "description": cp.description,
187
+ "timestamp": cp.timestamp,
188
+ "is_current": i == self.current_index,
189
+ "metadata": cp.metadata,
190
+ }
191
+ if include_state:
192
+ entry["state"] = cp.state
193
+ entries.append(entry)
194
+
195
+ if limit:
196
+ entries = entries[-limit:]
197
+
198
+ return entries
199
+
200
+ def label_for(self, checkpoint_id: int) -> str:
201
+ """Get label for a checkpoint ID."""
202
+ if checkpoint_id == 0:
203
+ return "root"
204
+ return f"{self.label_prefix}{checkpoint_id}"
205
+
206
+ def __len__(self) -> int:
207
+ """Number of checkpoints."""
208
+ return len(self.checkpoints)
209
+
210
+ def is_at_root(self) -> bool:
211
+ """Whether we're at root (before any checkpoints)."""
212
+ return self.current_index < 0
213
+
214
+ def is_at_end(self) -> bool:
215
+ """Whether we're at the most recent checkpoint."""
216
+ return self.current_index == len(self.checkpoints) - 1
zwarm/core/costs.py ADDED
@@ -0,0 +1,199 @@
1
+ """
2
+ Token cost estimation for LLM models.
3
+
4
+ Pricing data is hardcoded and may become stale. Last updated: 2026-01.
5
+
6
+ Sources:
7
+ - https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
8
+ - https://pricepertoken.com/pricing-page/model/openai-codex-mini
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass
14
+ from typing import Any
15
+
16
+
17
+ @dataclass
18
+ class ModelPricing:
19
+ """Pricing for a model in $ per million tokens."""
20
+ input_per_million: float
21
+ output_per_million: float
22
+ cached_input_per_million: float | None = None # Some models have cached input discount
23
+
24
+ def estimate_cost(
25
+ self,
26
+ input_tokens: int,
27
+ output_tokens: int,
28
+ cached_tokens: int = 0,
29
+ ) -> float:
30
+ """
31
+ Estimate cost in dollars.
32
+
33
+ Args:
34
+ input_tokens: Number of input tokens
35
+ output_tokens: Number of output tokens
36
+ cached_tokens: Number of cached input tokens (if applicable)
37
+
38
+ Returns:
39
+ Estimated cost in USD
40
+ """
41
+ input_cost = (input_tokens / 1_000_000) * self.input_per_million
42
+ output_cost = (output_tokens / 1_000_000) * self.output_per_million
43
+
44
+ cached_cost = 0.0
45
+ if cached_tokens and self.cached_input_per_million:
46
+ cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
47
+
48
+ return input_cost + output_cost + cached_cost
49
+
50
+
51
+ # Model pricing table ($ per million tokens)
52
+ # Last updated: 2026-01
53
+ MODEL_PRICING: dict[str, ModelPricing] = {
54
+ # OpenAI Codex models
55
+ "gpt-5.1-codex": ModelPricing(
56
+ input_per_million=1.25,
57
+ output_per_million=10.00,
58
+ cached_input_per_million=0.125, # 90% discount for cached
59
+ ),
60
+ "gpt-5.1-codex-mini": ModelPricing(
61
+ input_per_million=0.25,
62
+ output_per_million=2.00,
63
+ cached_input_per_million=0.025,
64
+ ),
65
+ "gpt-5.1-codex-max": ModelPricing(
66
+ input_per_million=1.25,
67
+ output_per_million=10.00,
68
+ cached_input_per_million=0.125,
69
+ ),
70
+ # GPT-5 base models (for reference)
71
+ "gpt-5": ModelPricing(
72
+ input_per_million=1.25,
73
+ output_per_million=10.00,
74
+ ),
75
+ "gpt-5-mini": ModelPricing(
76
+ input_per_million=0.25,
77
+ output_per_million=2.00,
78
+ ),
79
+ # Claude models (Anthropic)
80
+ "claude-sonnet-4-20250514": ModelPricing(
81
+ input_per_million=3.00,
82
+ output_per_million=15.00,
83
+ ),
84
+ "claude-opus-4-20250514": ModelPricing(
85
+ input_per_million=15.00,
86
+ output_per_million=75.00,
87
+ ),
88
+ "claude-3-5-sonnet-20241022": ModelPricing(
89
+ input_per_million=3.00,
90
+ output_per_million=15.00,
91
+ ),
92
+ }
93
+
94
+ # Aliases for common model names
95
+ MODEL_ALIASES: dict[str, str] = {
96
+ "codex": "gpt-5.1-codex",
97
+ "codex-mini": "gpt-5.1-codex-mini",
98
+ "codex-max": "gpt-5.1-codex-max",
99
+ "gpt5": "gpt-5",
100
+ "gpt5-mini": "gpt-5-mini",
101
+ "sonnet": "claude-sonnet-4-20250514",
102
+ "opus": "claude-opus-4-20250514",
103
+ }
104
+
105
+
106
+ def get_pricing(model: str) -> ModelPricing | None:
107
+ """
108
+ Get pricing for a model.
109
+
110
+ Args:
111
+ model: Model name or alias
112
+
113
+ Returns:
114
+ ModelPricing or None if unknown
115
+ """
116
+ # Check aliases first
117
+ resolved = MODEL_ALIASES.get(model.lower(), model)
118
+
119
+ # Exact match
120
+ if resolved in MODEL_PRICING:
121
+ return MODEL_PRICING[resolved]
122
+
123
+ # Try lowercase
124
+ if resolved.lower() in MODEL_PRICING:
125
+ return MODEL_PRICING[resolved.lower()]
126
+
127
+ # Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
128
+ for known_model in MODEL_PRICING:
129
+ if resolved.lower().startswith(known_model.lower()):
130
+ return MODEL_PRICING[known_model]
131
+
132
+ return None
133
+
134
+
135
+ def estimate_cost(
136
+ model: str,
137
+ input_tokens: int,
138
+ output_tokens: int,
139
+ cached_tokens: int = 0,
140
+ ) -> float | None:
141
+ """
142
+ Estimate cost for a model run.
143
+
144
+ Args:
145
+ model: Model name
146
+ input_tokens: Number of input tokens
147
+ output_tokens: Number of output tokens
148
+ cached_tokens: Number of cached input tokens
149
+
150
+ Returns:
151
+ Cost in USD, or None if model pricing unknown
152
+ """
153
+ pricing = get_pricing(model)
154
+ if pricing is None:
155
+ return None
156
+
157
+ return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
158
+
159
+
160
+ def format_cost(cost: float | None) -> str:
161
+ """Format cost as a human-readable string."""
162
+ if cost is None:
163
+ return "?"
164
+ if cost < 0.01:
165
+ return f"${cost:.4f}"
166
+ elif cost < 1.00:
167
+ return f"${cost:.3f}"
168
+ else:
169
+ return f"${cost:.2f}"
170
+
171
+
172
+ def estimate_session_cost(
173
+ model: str,
174
+ token_usage: dict[str, Any],
175
+ ) -> dict[str, Any]:
176
+ """
177
+ Estimate cost for a session given its token usage.
178
+
179
+ Args:
180
+ model: Model used
181
+ token_usage: Dict with input_tokens, output_tokens, etc.
182
+
183
+ Returns:
184
+ Dict with cost info: {cost, cost_formatted, pricing_known}
185
+ """
186
+ input_tokens = token_usage.get("input_tokens", 0)
187
+ output_tokens = token_usage.get("output_tokens", 0)
188
+ cached_tokens = token_usage.get("cached_tokens", 0)
189
+
190
+ cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
191
+
192
+ return {
193
+ "cost": cost,
194
+ "cost_formatted": format_cost(cost),
195
+ "pricing_known": cost is not None,
196
+ "model": model,
197
+ "input_tokens": input_tokens,
198
+ "output_tokens": output_tokens,
199
+ }
zwarm/tools/delegation.py CHANGED
@@ -19,7 +19,7 @@ from __future__ import annotations
19
19
 
20
20
  import time
21
21
  from pathlib import Path
22
- from typing import TYPE_CHECKING, Any, Literal
22
+ from typing import TYPE_CHECKING, Any
23
23
 
24
24
  from wbal.helper import weaveTool
25
25
 
@@ -44,37 +44,6 @@ def _get_session_manager(orchestrator: "Orchestrator"):
44
44
  return orchestrator._session_manager
45
45
 
46
46
 
47
- def _wait_for_completion(manager, session_id: str, timeout: float = 300.0, poll_interval: float = 1.0) -> bool:
48
- """
49
- Wait for a session to complete.
50
-
51
- Args:
52
- manager: CodexSessionManager
53
- session_id: Session to wait for
54
- timeout: Max seconds to wait
55
- poll_interval: Seconds between polls
56
-
57
- Returns:
58
- True if completed, False if timed out
59
- """
60
- from zwarm.sessions import SessionStatus
61
-
62
- start = time.time()
63
- while time.time() - start < timeout:
64
- # get_session() auto-updates status based on output completion markers
65
- session = manager.get_session(session_id)
66
- if not session:
67
- return False
68
-
69
- # Check status (not is_running - PID check is unreliable due to reuse)
70
- if session.status in (SessionStatus.COMPLETED, SessionStatus.FAILED, SessionStatus.KILLED):
71
- return True
72
-
73
- time.sleep(poll_interval)
74
-
75
- return False
76
-
77
-
78
47
  def _truncate(text: str, max_len: int = 200) -> str:
79
48
  """Truncate text with ellipsis."""
80
49
  if len(text) <= max_len:
@@ -158,7 +127,6 @@ def _validate_working_dir(
158
127
  def delegate(
159
128
  self: "Orchestrator",
160
129
  task: str,
161
- mode: Literal["sync", "async"] = "async",
162
130
  model: str | None = None,
163
131
  working_dir: str | None = None,
164
132
  ) -> dict[str, Any]:
@@ -166,11 +134,9 @@ def delegate(
166
134
  Delegate work to a Codex agent.
167
135
 
168
136
  This spawns a codex session - the exact same way `zwarm interactive` does.
137
+ All sessions run async - you get a session_id immediately and poll for results.
169
138
 
170
- **NOTE: All sessions run async.** The mode parameter is ignored - sessions
171
- always return immediately. Use sleep() + peek_session() to poll for completion.
172
-
173
- Async workflow pattern:
139
+ Workflow pattern:
174
140
  1. delegate(task="Add logout button") -> session_id
175
141
  2. sleep(30) -> give it time
176
142
  3. peek_session(session_id) -> check if done
@@ -179,7 +145,6 @@ def delegate(
179
145
 
180
146
  Args:
181
147
  task: Clear description of what to do. Be specific about requirements.
182
- mode: IGNORED - always async. (Legacy parameter, will be removed.)
183
148
  model: Model override (default: gpt-5.1-codex-mini).
184
149
  working_dir: Directory for codex to work in (default: orchestrator's dir).
185
150
 
@@ -191,9 +156,6 @@ def delegate(
191
156
  sleep(30)
192
157
  peek_session(session_id) # Check progress
193
158
  """
194
- # Force async mode - sync is deprecated
195
- # TODO: Remove sync codepath entirely (see STATE.md)
196
- mode = "async"
197
159
  # Validate working directory
198
160
  effective_dir, dir_error = _validate_working_dir(
199
161
  working_dir,
@@ -228,74 +190,15 @@ def delegate(
228
190
  adapter="codex",
229
191
  )
230
192
 
231
- # For sync mode, wait for completion
232
- if mode == "sync":
233
- completed = _wait_for_completion(
234
- manager,
235
- session.id,
236
- timeout=self.config.executor.timeout or 300.0,
237
- )
238
-
239
- # Refresh session to get updated status and messages
240
- session = manager.get_session(session.id)
241
-
242
- if not completed:
243
- return {
244
- "success": False,
245
- "session_id": session.id,
246
- "status": "timeout",
247
- "error": "Session timed out waiting for codex to complete",
248
- "hint": "Use check_session() to monitor progress, or use async mode for long tasks",
249
- }
250
-
251
- # Get the response from messages
252
- response_text = ""
253
- messages = manager.get_messages(session.id)
254
- for msg in messages:
255
- if msg.role == "assistant":
256
- response_text = msg.content
257
- break # Take first assistant message
258
-
259
- # Build log path for debugging
260
- log_path = str(manager._output_path(session.id, session.turn))
261
-
262
- # Check if session failed
263
- from zwarm.sessions import SessionStatus
264
- if session.status == SessionStatus.FAILED:
265
- return {
266
- "success": False,
267
- "session": _format_session_header(session),
268
- "session_id": session.id,
269
- "status": "failed",
270
- "task": _truncate(task, 100),
271
- "error": session.error or "Unknown error",
272
- "response": response_text or "(no response captured)",
273
- "tokens": _get_total_tokens(session),
274
- "log_file": log_path,
275
- "hint": "Check log_file for raw codex output. Use bash('cat <log_file>') to inspect.",
276
- }
277
-
278
- return {
279
- "success": True,
280
- "session": _format_session_header(session),
281
- "session_id": session.id,
282
- "status": session.status.value,
283
- "task": _truncate(task, 100),
284
- "response": response_text or "(no response captured)",
285
- "tokens": _get_total_tokens(session),
286
- "log_file": log_path,
287
- "hint": "Use converse(session_id, message) to send follow-up messages",
288
- }
289
- else:
290
- # Async mode - return immediately
291
- return {
292
- "success": True,
293
- "session": _format_session_header(session),
294
- "session_id": session.id,
295
- "status": "running",
296
- "task": _truncate(task, 100),
297
- "hint": "Use check_session(session_id) to monitor progress",
298
- }
193
+ # Return immediately - session runs in background
194
+ return {
195
+ "success": True,
196
+ "session": _format_session_header(session),
197
+ "session_id": session.id,
198
+ "status": "running",
199
+ "task": _truncate(task, 100),
200
+ "hint": "Use sleep() then check_session(session_id) to monitor progress",
201
+ }
299
202
 
300
203
 
301
204
  @weaveTool
@@ -303,21 +206,17 @@ def converse(
303
206
  self: "Orchestrator",
304
207
  session_id: str,
305
208
  message: str,
306
- wait: bool = False,
307
209
  ) -> dict[str, Any]:
308
210
  """
309
211
  Continue a conversation with a codex session.
310
212
 
311
213
  This injects a follow-up message into the session, providing the
312
214
  conversation history as context. Like chatting with a developer.
313
-
314
- **NOTE: Always runs async.** The wait parameter is ignored - messages
315
- are sent and return immediately. Use sleep() + check_session() to poll.
215
+ Returns immediately - use sleep() + check_session() to poll for the response.
316
216
 
317
217
  Args:
318
218
  session_id: The session to continue (from delegate() result).
319
219
  message: Your next message to codex.
320
- wait: IGNORED - always async. (Legacy parameter, will be removed.)
321
220
 
322
221
  Returns:
323
222
  {session_id, turn, status: "running"}
@@ -327,10 +226,6 @@ def converse(
327
226
  sleep(30)
328
227
  check_session(session_id) # Get response
329
228
  """
330
- # Force async mode - sync is deprecated
331
- # TODO: Remove sync codepath entirely (see STATE.md)
332
- wait = False
333
-
334
229
  manager = _get_session_manager(self)
335
230
 
336
231
  # Get current session
@@ -371,53 +266,15 @@ def converse(
371
266
  "session_id": session_id,
372
267
  }
373
268
 
374
- if not wait:
375
- # Async mode - return immediately
376
- return {
377
- "success": True,
378
- "session": _format_session_header(updated_session),
379
- "session_id": session_id,
380
- "turn": updated_session.turn,
381
- "status": "running",
382
- "you_said": _truncate(message, 100),
383
- "hint": "Use check_session(session_id) to see the response when ready",
384
- }
385
-
386
- # Sync mode - wait for completion
387
- completed = _wait_for_completion(
388
- manager,
389
- session_id,
390
- timeout=self.config.executor.timeout or 300.0,
391
- )
392
-
393
- # Refresh session
394
- session = manager.get_session(session_id)
395
-
396
- if not completed:
397
- return {
398
- "success": False,
399
- "session_id": session_id,
400
- "status": "timeout",
401
- "error": "Session timed out waiting for response",
402
- "hint": "Use check_session() to monitor progress",
403
- }
404
-
405
- # Get the response (last assistant message)
406
- response_text = ""
407
- messages = manager.get_messages(session_id)
408
- for msg in reversed(messages):
409
- if msg.role == "assistant":
410
- response_text = msg.content
411
- break
412
-
269
+ # Return immediately - session runs in background
413
270
  return {
414
271
  "success": True,
415
- "session": _format_session_header(session),
272
+ "session": _format_session_header(updated_session),
416
273
  "session_id": session_id,
417
- "turn": session.turn,
274
+ "turn": updated_session.turn,
275
+ "status": "running",
418
276
  "you_said": _truncate(message, 100),
419
- "response": response_text or "(no response captured)",
420
- "tokens": _get_total_tokens(session),
277
+ "hint": "Use sleep() then check_session(session_id) to see the response",
421
278
  }
422
279
 
423
280
 
@@ -1,8 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zwarm
3
- Version: 3.0.1
3
+ Version: 3.2.0
4
4
  Summary: Multi-Agent CLI Orchestration Research Platform
5
5
  Requires-Python: <3.14,>=3.13
6
+ Requires-Dist: prompt-toolkit>=3.0.52
6
7
  Requires-Dist: python-dotenv>=1.0.0
7
8
  Requires-Dist: pyyaml>=6.0
8
9
  Requires-Dist: rich>=13.0.0