zwarm 3.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zwarm/cli/interactive.py +749 -0
- zwarm/cli/main.py +207 -854
- zwarm/cli/pilot.py +293 -151
- zwarm/core/__init__.py +20 -0
- zwarm/core/checkpoints.py +216 -0
- zwarm/core/costs.py +199 -0
- zwarm/tools/delegation.py +18 -161
- {zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/METADATA +2 -1
- {zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/RECORD +11 -8
- {zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/WHEEL +0 -0
- {zwarm-3.0.1.dist-info → zwarm-3.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Checkpoint primitives for state management.
|
|
3
|
+
|
|
4
|
+
Provides time-travel capability by recording snapshots of state at key points.
|
|
5
|
+
Used by pilot for turn-by-turn checkpointing, and potentially by other
|
|
6
|
+
interfaces that need state restoration.
|
|
7
|
+
|
|
8
|
+
Topology reminder:
|
|
9
|
+
orchestrator → pilot → interactive → CodexSessionManager
|
|
10
|
+
|
|
11
|
+
These primitives sit at the core layer, usable by any interface above.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import copy
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Checkpoint:
|
|
24
|
+
"""
|
|
25
|
+
A snapshot of state at a specific point in time.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
checkpoint_id: Unique identifier (e.g., turn number)
|
|
29
|
+
label: Human-readable label (e.g., "T1", "T2")
|
|
30
|
+
description: What action led to this state
|
|
31
|
+
state: The actual state snapshot (deep-copied)
|
|
32
|
+
timestamp: When checkpoint was created
|
|
33
|
+
metadata: Optional extra data
|
|
34
|
+
"""
|
|
35
|
+
checkpoint_id: int
|
|
36
|
+
label: str
|
|
37
|
+
description: str
|
|
38
|
+
state: dict[str, Any]
|
|
39
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
40
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class CheckpointManager:
|
|
45
|
+
"""
|
|
46
|
+
Manages checkpoints and time travel.
|
|
47
|
+
|
|
48
|
+
Maintains a list of checkpoints and a current position. Supports:
|
|
49
|
+
- Recording new checkpoints
|
|
50
|
+
- Jumping to any previous checkpoint
|
|
51
|
+
- Branching (going back and continuing creates new timeline)
|
|
52
|
+
- History inspection
|
|
53
|
+
|
|
54
|
+
Usage:
|
|
55
|
+
mgr = CheckpointManager()
|
|
56
|
+
|
|
57
|
+
# Record state after each action
|
|
58
|
+
mgr.record(description="Added auth", state={"messages": [...], ...})
|
|
59
|
+
mgr.record(description="Fixed bug", state={"messages": [...], ...})
|
|
60
|
+
|
|
61
|
+
# Jump back
|
|
62
|
+
cp = mgr.goto(1) # Go to first checkpoint
|
|
63
|
+
restored_state = cp.state
|
|
64
|
+
|
|
65
|
+
# Continue from there (branches off)
|
|
66
|
+
mgr.record(description="Different path", state={...})
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
checkpoints: list[Checkpoint] = field(default_factory=list)
|
|
70
|
+
current_index: int = -1 # -1 = root (before any checkpoints)
|
|
71
|
+
next_id: int = 1
|
|
72
|
+
label_prefix: str = "T" # Labels will be T1, T2, etc.
|
|
73
|
+
|
|
74
|
+
def record(
|
|
75
|
+
self,
|
|
76
|
+
description: str,
|
|
77
|
+
state: dict[str, Any],
|
|
78
|
+
metadata: dict[str, Any] | None = None,
|
|
79
|
+
) -> Checkpoint:
|
|
80
|
+
"""
|
|
81
|
+
Record a new checkpoint.
|
|
82
|
+
|
|
83
|
+
If not at the end of history (i.e., we've gone back), this creates
|
|
84
|
+
a branch - future checkpoints are discarded.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
description: What action led to this state
|
|
88
|
+
state: State to snapshot (will be deep-copied)
|
|
89
|
+
metadata: Optional extra data
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
The created checkpoint
|
|
93
|
+
"""
|
|
94
|
+
checkpoint = Checkpoint(
|
|
95
|
+
checkpoint_id=self.next_id,
|
|
96
|
+
label=f"{self.label_prefix}{self.next_id}",
|
|
97
|
+
description=description,
|
|
98
|
+
state=copy.deepcopy(state),
|
|
99
|
+
metadata=metadata or {},
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# If we're not at the end, we're branching - truncate future
|
|
103
|
+
if self.current_index < len(self.checkpoints) - 1:
|
|
104
|
+
self.checkpoints = self.checkpoints[:self.current_index + 1]
|
|
105
|
+
|
|
106
|
+
self.checkpoints.append(checkpoint)
|
|
107
|
+
self.current_index = len(self.checkpoints) - 1
|
|
108
|
+
self.next_id += 1
|
|
109
|
+
|
|
110
|
+
return checkpoint
|
|
111
|
+
|
|
112
|
+
def goto(self, checkpoint_id: int) -> Checkpoint | None:
|
|
113
|
+
"""
|
|
114
|
+
Jump to a specific checkpoint.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
checkpoint_id: The checkpoint ID to jump to (0 = root)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
The checkpoint, or None if not found (or root)
|
|
121
|
+
"""
|
|
122
|
+
if checkpoint_id == 0:
|
|
123
|
+
# Root state - before any checkpoints
|
|
124
|
+
self.current_index = -1
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
for i, cp in enumerate(self.checkpoints):
|
|
128
|
+
if cp.checkpoint_id == checkpoint_id:
|
|
129
|
+
self.current_index = i
|
|
130
|
+
return cp
|
|
131
|
+
|
|
132
|
+
return None # Not found
|
|
133
|
+
|
|
134
|
+
def goto_label(self, label: str) -> Checkpoint | None:
|
|
135
|
+
"""
|
|
136
|
+
Jump to a checkpoint by label (e.g., "T1", "root").
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
label: The label to find
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
The checkpoint, or None if not found
|
|
143
|
+
"""
|
|
144
|
+
if label.lower() == "root":
|
|
145
|
+
self.current_index = -1
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
for i, cp in enumerate(self.checkpoints):
|
|
149
|
+
if cp.label == label:
|
|
150
|
+
self.current_index = i
|
|
151
|
+
return cp
|
|
152
|
+
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
def current(self) -> Checkpoint | None:
|
|
156
|
+
"""Get the current checkpoint, or None if at root."""
|
|
157
|
+
if self.current_index < 0 or self.current_index >= len(self.checkpoints):
|
|
158
|
+
return None
|
|
159
|
+
return self.checkpoints[self.current_index]
|
|
160
|
+
|
|
161
|
+
def current_state(self) -> dict[str, Any] | None:
|
|
162
|
+
"""Get the current state, or None if at root."""
|
|
163
|
+
cp = self.current()
|
|
164
|
+
return copy.deepcopy(cp.state) if cp else None
|
|
165
|
+
|
|
166
|
+
def history(
|
|
167
|
+
self,
|
|
168
|
+
limit: int | None = None,
|
|
169
|
+
include_state: bool = False,
|
|
170
|
+
) -> list[dict[str, Any]]:
|
|
171
|
+
"""
|
|
172
|
+
Get history entries for display.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
limit: Max entries to return (most recent)
|
|
176
|
+
include_state: Whether to include full state in entries
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
List of history entries with checkpoint info
|
|
180
|
+
"""
|
|
181
|
+
entries = []
|
|
182
|
+
for i, cp in enumerate(self.checkpoints):
|
|
183
|
+
entry = {
|
|
184
|
+
"checkpoint_id": cp.checkpoint_id,
|
|
185
|
+
"label": cp.label,
|
|
186
|
+
"description": cp.description,
|
|
187
|
+
"timestamp": cp.timestamp,
|
|
188
|
+
"is_current": i == self.current_index,
|
|
189
|
+
"metadata": cp.metadata,
|
|
190
|
+
}
|
|
191
|
+
if include_state:
|
|
192
|
+
entry["state"] = cp.state
|
|
193
|
+
entries.append(entry)
|
|
194
|
+
|
|
195
|
+
if limit:
|
|
196
|
+
entries = entries[-limit:]
|
|
197
|
+
|
|
198
|
+
return entries
|
|
199
|
+
|
|
200
|
+
def label_for(self, checkpoint_id: int) -> str:
|
|
201
|
+
"""Get label for a checkpoint ID."""
|
|
202
|
+
if checkpoint_id == 0:
|
|
203
|
+
return "root"
|
|
204
|
+
return f"{self.label_prefix}{checkpoint_id}"
|
|
205
|
+
|
|
206
|
+
def __len__(self) -> int:
|
|
207
|
+
"""Number of checkpoints."""
|
|
208
|
+
return len(self.checkpoints)
|
|
209
|
+
|
|
210
|
+
def is_at_root(self) -> bool:
|
|
211
|
+
"""Whether we're at root (before any checkpoints)."""
|
|
212
|
+
return self.current_index < 0
|
|
213
|
+
|
|
214
|
+
def is_at_end(self) -> bool:
|
|
215
|
+
"""Whether we're at the most recent checkpoint."""
|
|
216
|
+
return self.current_index == len(self.checkpoints) - 1
|
zwarm/core/costs.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token cost estimation for LLM models.
|
|
3
|
+
|
|
4
|
+
Pricing data is hardcoded and may become stale. Last updated: 2026-01.
|
|
5
|
+
|
|
6
|
+
Sources:
|
|
7
|
+
- https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
|
|
8
|
+
- https://pricepertoken.com/pricing-page/model/openai-codex-mini
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class ModelPricing:
|
|
19
|
+
"""Pricing for a model in $ per million tokens."""
|
|
20
|
+
input_per_million: float
|
|
21
|
+
output_per_million: float
|
|
22
|
+
cached_input_per_million: float | None = None # Some models have cached input discount
|
|
23
|
+
|
|
24
|
+
def estimate_cost(
|
|
25
|
+
self,
|
|
26
|
+
input_tokens: int,
|
|
27
|
+
output_tokens: int,
|
|
28
|
+
cached_tokens: int = 0,
|
|
29
|
+
) -> float:
|
|
30
|
+
"""
|
|
31
|
+
Estimate cost in dollars.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
input_tokens: Number of input tokens
|
|
35
|
+
output_tokens: Number of output tokens
|
|
36
|
+
cached_tokens: Number of cached input tokens (if applicable)
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Estimated cost in USD
|
|
40
|
+
"""
|
|
41
|
+
input_cost = (input_tokens / 1_000_000) * self.input_per_million
|
|
42
|
+
output_cost = (output_tokens / 1_000_000) * self.output_per_million
|
|
43
|
+
|
|
44
|
+
cached_cost = 0.0
|
|
45
|
+
if cached_tokens and self.cached_input_per_million:
|
|
46
|
+
cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
|
|
47
|
+
|
|
48
|
+
return input_cost + output_cost + cached_cost
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Model pricing table ($ per million tokens)
|
|
52
|
+
# Last updated: 2026-01
|
|
53
|
+
MODEL_PRICING: dict[str, ModelPricing] = {
|
|
54
|
+
# OpenAI Codex models
|
|
55
|
+
"gpt-5.1-codex": ModelPricing(
|
|
56
|
+
input_per_million=1.25,
|
|
57
|
+
output_per_million=10.00,
|
|
58
|
+
cached_input_per_million=0.125, # 90% discount for cached
|
|
59
|
+
),
|
|
60
|
+
"gpt-5.1-codex-mini": ModelPricing(
|
|
61
|
+
input_per_million=0.25,
|
|
62
|
+
output_per_million=2.00,
|
|
63
|
+
cached_input_per_million=0.025,
|
|
64
|
+
),
|
|
65
|
+
"gpt-5.1-codex-max": ModelPricing(
|
|
66
|
+
input_per_million=1.25,
|
|
67
|
+
output_per_million=10.00,
|
|
68
|
+
cached_input_per_million=0.125,
|
|
69
|
+
),
|
|
70
|
+
# GPT-5 base models (for reference)
|
|
71
|
+
"gpt-5": ModelPricing(
|
|
72
|
+
input_per_million=1.25,
|
|
73
|
+
output_per_million=10.00,
|
|
74
|
+
),
|
|
75
|
+
"gpt-5-mini": ModelPricing(
|
|
76
|
+
input_per_million=0.25,
|
|
77
|
+
output_per_million=2.00,
|
|
78
|
+
),
|
|
79
|
+
# Claude models (Anthropic)
|
|
80
|
+
"claude-sonnet-4-20250514": ModelPricing(
|
|
81
|
+
input_per_million=3.00,
|
|
82
|
+
output_per_million=15.00,
|
|
83
|
+
),
|
|
84
|
+
"claude-opus-4-20250514": ModelPricing(
|
|
85
|
+
input_per_million=15.00,
|
|
86
|
+
output_per_million=75.00,
|
|
87
|
+
),
|
|
88
|
+
"claude-3-5-sonnet-20241022": ModelPricing(
|
|
89
|
+
input_per_million=3.00,
|
|
90
|
+
output_per_million=15.00,
|
|
91
|
+
),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Aliases for common model names
|
|
95
|
+
MODEL_ALIASES: dict[str, str] = {
|
|
96
|
+
"codex": "gpt-5.1-codex",
|
|
97
|
+
"codex-mini": "gpt-5.1-codex-mini",
|
|
98
|
+
"codex-max": "gpt-5.1-codex-max",
|
|
99
|
+
"gpt5": "gpt-5",
|
|
100
|
+
"gpt5-mini": "gpt-5-mini",
|
|
101
|
+
"sonnet": "claude-sonnet-4-20250514",
|
|
102
|
+
"opus": "claude-opus-4-20250514",
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_pricing(model: str) -> ModelPricing | None:
|
|
107
|
+
"""
|
|
108
|
+
Get pricing for a model.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
model: Model name or alias
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
ModelPricing or None if unknown
|
|
115
|
+
"""
|
|
116
|
+
# Check aliases first
|
|
117
|
+
resolved = MODEL_ALIASES.get(model.lower(), model)
|
|
118
|
+
|
|
119
|
+
# Exact match
|
|
120
|
+
if resolved in MODEL_PRICING:
|
|
121
|
+
return MODEL_PRICING[resolved]
|
|
122
|
+
|
|
123
|
+
# Try lowercase
|
|
124
|
+
if resolved.lower() in MODEL_PRICING:
|
|
125
|
+
return MODEL_PRICING[resolved.lower()]
|
|
126
|
+
|
|
127
|
+
# Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
|
|
128
|
+
for known_model in MODEL_PRICING:
|
|
129
|
+
if resolved.lower().startswith(known_model.lower()):
|
|
130
|
+
return MODEL_PRICING[known_model]
|
|
131
|
+
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def estimate_cost(
|
|
136
|
+
model: str,
|
|
137
|
+
input_tokens: int,
|
|
138
|
+
output_tokens: int,
|
|
139
|
+
cached_tokens: int = 0,
|
|
140
|
+
) -> float | None:
|
|
141
|
+
"""
|
|
142
|
+
Estimate cost for a model run.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
model: Model name
|
|
146
|
+
input_tokens: Number of input tokens
|
|
147
|
+
output_tokens: Number of output tokens
|
|
148
|
+
cached_tokens: Number of cached input tokens
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Cost in USD, or None if model pricing unknown
|
|
152
|
+
"""
|
|
153
|
+
pricing = get_pricing(model)
|
|
154
|
+
if pricing is None:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def format_cost(cost: float | None) -> str:
|
|
161
|
+
"""Format cost as a human-readable string."""
|
|
162
|
+
if cost is None:
|
|
163
|
+
return "?"
|
|
164
|
+
if cost < 0.01:
|
|
165
|
+
return f"${cost:.4f}"
|
|
166
|
+
elif cost < 1.00:
|
|
167
|
+
return f"${cost:.3f}"
|
|
168
|
+
else:
|
|
169
|
+
return f"${cost:.2f}"
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def estimate_session_cost(
|
|
173
|
+
model: str,
|
|
174
|
+
token_usage: dict[str, Any],
|
|
175
|
+
) -> dict[str, Any]:
|
|
176
|
+
"""
|
|
177
|
+
Estimate cost for a session given its token usage.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
model: Model used
|
|
181
|
+
token_usage: Dict with input_tokens, output_tokens, etc.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Dict with cost info: {cost, cost_formatted, pricing_known}
|
|
185
|
+
"""
|
|
186
|
+
input_tokens = token_usage.get("input_tokens", 0)
|
|
187
|
+
output_tokens = token_usage.get("output_tokens", 0)
|
|
188
|
+
cached_tokens = token_usage.get("cached_tokens", 0)
|
|
189
|
+
|
|
190
|
+
cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
"cost": cost,
|
|
194
|
+
"cost_formatted": format_cost(cost),
|
|
195
|
+
"pricing_known": cost is not None,
|
|
196
|
+
"model": model,
|
|
197
|
+
"input_tokens": input_tokens,
|
|
198
|
+
"output_tokens": output_tokens,
|
|
199
|
+
}
|
zwarm/tools/delegation.py
CHANGED
|
@@ -19,7 +19,7 @@ from __future__ import annotations
|
|
|
19
19
|
|
|
20
20
|
import time
|
|
21
21
|
from pathlib import Path
|
|
22
|
-
from typing import TYPE_CHECKING, Any
|
|
22
|
+
from typing import TYPE_CHECKING, Any
|
|
23
23
|
|
|
24
24
|
from wbal.helper import weaveTool
|
|
25
25
|
|
|
@@ -44,37 +44,6 @@ def _get_session_manager(orchestrator: "Orchestrator"):
|
|
|
44
44
|
return orchestrator._session_manager
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def _wait_for_completion(manager, session_id: str, timeout: float = 300.0, poll_interval: float = 1.0) -> bool:
|
|
48
|
-
"""
|
|
49
|
-
Wait for a session to complete.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
manager: CodexSessionManager
|
|
53
|
-
session_id: Session to wait for
|
|
54
|
-
timeout: Max seconds to wait
|
|
55
|
-
poll_interval: Seconds between polls
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
True if completed, False if timed out
|
|
59
|
-
"""
|
|
60
|
-
from zwarm.sessions import SessionStatus
|
|
61
|
-
|
|
62
|
-
start = time.time()
|
|
63
|
-
while time.time() - start < timeout:
|
|
64
|
-
# get_session() auto-updates status based on output completion markers
|
|
65
|
-
session = manager.get_session(session_id)
|
|
66
|
-
if not session:
|
|
67
|
-
return False
|
|
68
|
-
|
|
69
|
-
# Check status (not is_running - PID check is unreliable due to reuse)
|
|
70
|
-
if session.status in (SessionStatus.COMPLETED, SessionStatus.FAILED, SessionStatus.KILLED):
|
|
71
|
-
return True
|
|
72
|
-
|
|
73
|
-
time.sleep(poll_interval)
|
|
74
|
-
|
|
75
|
-
return False
|
|
76
|
-
|
|
77
|
-
|
|
78
47
|
def _truncate(text: str, max_len: int = 200) -> str:
|
|
79
48
|
"""Truncate text with ellipsis."""
|
|
80
49
|
if len(text) <= max_len:
|
|
@@ -158,7 +127,6 @@ def _validate_working_dir(
|
|
|
158
127
|
def delegate(
|
|
159
128
|
self: "Orchestrator",
|
|
160
129
|
task: str,
|
|
161
|
-
mode: Literal["sync", "async"] = "async",
|
|
162
130
|
model: str | None = None,
|
|
163
131
|
working_dir: str | None = None,
|
|
164
132
|
) -> dict[str, Any]:
|
|
@@ -166,11 +134,9 @@ def delegate(
|
|
|
166
134
|
Delegate work to a Codex agent.
|
|
167
135
|
|
|
168
136
|
This spawns a codex session - the exact same way `zwarm interactive` does.
|
|
137
|
+
All sessions run async - you get a session_id immediately and poll for results.
|
|
169
138
|
|
|
170
|
-
|
|
171
|
-
always return immediately. Use sleep() + peek_session() to poll for completion.
|
|
172
|
-
|
|
173
|
-
Async workflow pattern:
|
|
139
|
+
Workflow pattern:
|
|
174
140
|
1. delegate(task="Add logout button") -> session_id
|
|
175
141
|
2. sleep(30) -> give it time
|
|
176
142
|
3. peek_session(session_id) -> check if done
|
|
@@ -179,7 +145,6 @@ def delegate(
|
|
|
179
145
|
|
|
180
146
|
Args:
|
|
181
147
|
task: Clear description of what to do. Be specific about requirements.
|
|
182
|
-
mode: IGNORED - always async. (Legacy parameter, will be removed.)
|
|
183
148
|
model: Model override (default: gpt-5.1-codex-mini).
|
|
184
149
|
working_dir: Directory for codex to work in (default: orchestrator's dir).
|
|
185
150
|
|
|
@@ -191,9 +156,6 @@ def delegate(
|
|
|
191
156
|
sleep(30)
|
|
192
157
|
peek_session(session_id) # Check progress
|
|
193
158
|
"""
|
|
194
|
-
# Force async mode - sync is deprecated
|
|
195
|
-
# TODO: Remove sync codepath entirely (see STATE.md)
|
|
196
|
-
mode = "async"
|
|
197
159
|
# Validate working directory
|
|
198
160
|
effective_dir, dir_error = _validate_working_dir(
|
|
199
161
|
working_dir,
|
|
@@ -228,74 +190,15 @@ def delegate(
|
|
|
228
190
|
adapter="codex",
|
|
229
191
|
)
|
|
230
192
|
|
|
231
|
-
#
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
session = manager.get_session(session.id)
|
|
241
|
-
|
|
242
|
-
if not completed:
|
|
243
|
-
return {
|
|
244
|
-
"success": False,
|
|
245
|
-
"session_id": session.id,
|
|
246
|
-
"status": "timeout",
|
|
247
|
-
"error": "Session timed out waiting for codex to complete",
|
|
248
|
-
"hint": "Use check_session() to monitor progress, or use async mode for long tasks",
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
# Get the response from messages
|
|
252
|
-
response_text = ""
|
|
253
|
-
messages = manager.get_messages(session.id)
|
|
254
|
-
for msg in messages:
|
|
255
|
-
if msg.role == "assistant":
|
|
256
|
-
response_text = msg.content
|
|
257
|
-
break # Take first assistant message
|
|
258
|
-
|
|
259
|
-
# Build log path for debugging
|
|
260
|
-
log_path = str(manager._output_path(session.id, session.turn))
|
|
261
|
-
|
|
262
|
-
# Check if session failed
|
|
263
|
-
from zwarm.sessions import SessionStatus
|
|
264
|
-
if session.status == SessionStatus.FAILED:
|
|
265
|
-
return {
|
|
266
|
-
"success": False,
|
|
267
|
-
"session": _format_session_header(session),
|
|
268
|
-
"session_id": session.id,
|
|
269
|
-
"status": "failed",
|
|
270
|
-
"task": _truncate(task, 100),
|
|
271
|
-
"error": session.error or "Unknown error",
|
|
272
|
-
"response": response_text or "(no response captured)",
|
|
273
|
-
"tokens": _get_total_tokens(session),
|
|
274
|
-
"log_file": log_path,
|
|
275
|
-
"hint": "Check log_file for raw codex output. Use bash('cat <log_file>') to inspect.",
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
return {
|
|
279
|
-
"success": True,
|
|
280
|
-
"session": _format_session_header(session),
|
|
281
|
-
"session_id": session.id,
|
|
282
|
-
"status": session.status.value,
|
|
283
|
-
"task": _truncate(task, 100),
|
|
284
|
-
"response": response_text or "(no response captured)",
|
|
285
|
-
"tokens": _get_total_tokens(session),
|
|
286
|
-
"log_file": log_path,
|
|
287
|
-
"hint": "Use converse(session_id, message) to send follow-up messages",
|
|
288
|
-
}
|
|
289
|
-
else:
|
|
290
|
-
# Async mode - return immediately
|
|
291
|
-
return {
|
|
292
|
-
"success": True,
|
|
293
|
-
"session": _format_session_header(session),
|
|
294
|
-
"session_id": session.id,
|
|
295
|
-
"status": "running",
|
|
296
|
-
"task": _truncate(task, 100),
|
|
297
|
-
"hint": "Use check_session(session_id) to monitor progress",
|
|
298
|
-
}
|
|
193
|
+
# Return immediately - session runs in background
|
|
194
|
+
return {
|
|
195
|
+
"success": True,
|
|
196
|
+
"session": _format_session_header(session),
|
|
197
|
+
"session_id": session.id,
|
|
198
|
+
"status": "running",
|
|
199
|
+
"task": _truncate(task, 100),
|
|
200
|
+
"hint": "Use sleep() then check_session(session_id) to monitor progress",
|
|
201
|
+
}
|
|
299
202
|
|
|
300
203
|
|
|
301
204
|
@weaveTool
|
|
@@ -303,21 +206,17 @@ def converse(
|
|
|
303
206
|
self: "Orchestrator",
|
|
304
207
|
session_id: str,
|
|
305
208
|
message: str,
|
|
306
|
-
wait: bool = False,
|
|
307
209
|
) -> dict[str, Any]:
|
|
308
210
|
"""
|
|
309
211
|
Continue a conversation with a codex session.
|
|
310
212
|
|
|
311
213
|
This injects a follow-up message into the session, providing the
|
|
312
214
|
conversation history as context. Like chatting with a developer.
|
|
313
|
-
|
|
314
|
-
**NOTE: Always runs async.** The wait parameter is ignored - messages
|
|
315
|
-
are sent and return immediately. Use sleep() + check_session() to poll.
|
|
215
|
+
Returns immediately - use sleep() + check_session() to poll for the response.
|
|
316
216
|
|
|
317
217
|
Args:
|
|
318
218
|
session_id: The session to continue (from delegate() result).
|
|
319
219
|
message: Your next message to codex.
|
|
320
|
-
wait: IGNORED - always async. (Legacy parameter, will be removed.)
|
|
321
220
|
|
|
322
221
|
Returns:
|
|
323
222
|
{session_id, turn, status: "running"}
|
|
@@ -327,10 +226,6 @@ def converse(
|
|
|
327
226
|
sleep(30)
|
|
328
227
|
check_session(session_id) # Get response
|
|
329
228
|
"""
|
|
330
|
-
# Force async mode - sync is deprecated
|
|
331
|
-
# TODO: Remove sync codepath entirely (see STATE.md)
|
|
332
|
-
wait = False
|
|
333
|
-
|
|
334
229
|
manager = _get_session_manager(self)
|
|
335
230
|
|
|
336
231
|
# Get current session
|
|
@@ -371,53 +266,15 @@ def converse(
|
|
|
371
266
|
"session_id": session_id,
|
|
372
267
|
}
|
|
373
268
|
|
|
374
|
-
|
|
375
|
-
# Async mode - return immediately
|
|
376
|
-
return {
|
|
377
|
-
"success": True,
|
|
378
|
-
"session": _format_session_header(updated_session),
|
|
379
|
-
"session_id": session_id,
|
|
380
|
-
"turn": updated_session.turn,
|
|
381
|
-
"status": "running",
|
|
382
|
-
"you_said": _truncate(message, 100),
|
|
383
|
-
"hint": "Use check_session(session_id) to see the response when ready",
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
# Sync mode - wait for completion
|
|
387
|
-
completed = _wait_for_completion(
|
|
388
|
-
manager,
|
|
389
|
-
session_id,
|
|
390
|
-
timeout=self.config.executor.timeout or 300.0,
|
|
391
|
-
)
|
|
392
|
-
|
|
393
|
-
# Refresh session
|
|
394
|
-
session = manager.get_session(session_id)
|
|
395
|
-
|
|
396
|
-
if not completed:
|
|
397
|
-
return {
|
|
398
|
-
"success": False,
|
|
399
|
-
"session_id": session_id,
|
|
400
|
-
"status": "timeout",
|
|
401
|
-
"error": "Session timed out waiting for response",
|
|
402
|
-
"hint": "Use check_session() to monitor progress",
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
# Get the response (last assistant message)
|
|
406
|
-
response_text = ""
|
|
407
|
-
messages = manager.get_messages(session_id)
|
|
408
|
-
for msg in reversed(messages):
|
|
409
|
-
if msg.role == "assistant":
|
|
410
|
-
response_text = msg.content
|
|
411
|
-
break
|
|
412
|
-
|
|
269
|
+
# Return immediately - session runs in background
|
|
413
270
|
return {
|
|
414
271
|
"success": True,
|
|
415
|
-
"session": _format_session_header(
|
|
272
|
+
"session": _format_session_header(updated_session),
|
|
416
273
|
"session_id": session_id,
|
|
417
|
-
"turn":
|
|
274
|
+
"turn": updated_session.turn,
|
|
275
|
+
"status": "running",
|
|
418
276
|
"you_said": _truncate(message, 100),
|
|
419
|
-
"
|
|
420
|
-
"tokens": _get_total_tokens(session),
|
|
277
|
+
"hint": "Use sleep() then check_session(session_id) to see the response",
|
|
421
278
|
}
|
|
422
279
|
|
|
423
280
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zwarm
|
|
3
|
-
Version: 3.0
|
|
3
|
+
Version: 3.2.0
|
|
4
4
|
Summary: Multi-Agent CLI Orchestration Research Platform
|
|
5
5
|
Requires-Python: <3.14,>=3.13
|
|
6
|
+
Requires-Dist: prompt-toolkit>=3.0.52
|
|
6
7
|
Requires-Dist: python-dotenv>=1.0.0
|
|
7
8
|
Requires-Dist: pyyaml>=6.0
|
|
8
9
|
Requires-Dist: rich>=13.0.0
|