zwarm 2.3.5__py3-none-any.whl → 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zwarm/core/__init__.py CHANGED
@@ -0,0 +1,20 @@
1
+ """Core primitives for zwarm."""
2
+
3
+ from .checkpoints import Checkpoint, CheckpointManager
4
+ from .costs import (
5
+ estimate_cost,
6
+ estimate_session_cost,
7
+ format_cost,
8
+ get_pricing,
9
+ ModelPricing,
10
+ )
11
+
12
+ __all__ = [
13
+ "Checkpoint",
14
+ "CheckpointManager",
15
+ "estimate_cost",
16
+ "estimate_session_cost",
17
+ "format_cost",
18
+ "get_pricing",
19
+ "ModelPricing",
20
+ ]
@@ -0,0 +1,216 @@
1
+ """
2
+ Checkpoint primitives for state management.
3
+
4
+ Provides time-travel capability by recording snapshots of state at key points.
5
+ Used by pilot for turn-by-turn checkpointing, and potentially by other
6
+ interfaces that need state restoration.
7
+
8
+ Topology reminder:
9
+ orchestrator → pilot → interactive → CodexSessionManager
10
+
11
+ These primitives sit at the core layer, usable by any interface above.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import copy
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from typing import Any
20
+
21
+
22
+ @dataclass
23
+ class Checkpoint:
24
+ """
25
+ A snapshot of state at a specific point in time.
26
+
27
+ Attributes:
28
+ checkpoint_id: Unique identifier (e.g., turn number)
29
+ label: Human-readable label (e.g., "T1", "T2")
30
+ description: What action led to this state
31
+ state: The actual state snapshot (deep-copied)
32
+ timestamp: When checkpoint was created
33
+ metadata: Optional extra data
34
+ """
35
+ checkpoint_id: int
36
+ label: str
37
+ description: str
38
+ state: dict[str, Any]
39
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
40
+ metadata: dict[str, Any] = field(default_factory=dict)
41
+
42
+
43
+ @dataclass
44
+ class CheckpointManager:
45
+ """
46
+ Manages checkpoints and time travel.
47
+
48
+ Maintains a list of checkpoints and a current position. Supports:
49
+ - Recording new checkpoints
50
+ - Jumping to any previous checkpoint
51
+ - Branching (going back and continuing creates new timeline)
52
+ - History inspection
53
+
54
+ Usage:
55
+ mgr = CheckpointManager()
56
+
57
+ # Record state after each action
58
+ mgr.record(description="Added auth", state={"messages": [...], ...})
59
+ mgr.record(description="Fixed bug", state={"messages": [...], ...})
60
+
61
+ # Jump back
62
+ cp = mgr.goto(1) # Go to first checkpoint
63
+ restored_state = cp.state
64
+
65
+ # Continue from there (branches off)
66
+ mgr.record(description="Different path", state={...})
67
+ """
68
+
69
+ checkpoints: list[Checkpoint] = field(default_factory=list)
70
+ current_index: int = -1 # -1 = root (before any checkpoints)
71
+ next_id: int = 1
72
+ label_prefix: str = "T" # Labels will be T1, T2, etc.
73
+
74
+ def record(
75
+ self,
76
+ description: str,
77
+ state: dict[str, Any],
78
+ metadata: dict[str, Any] | None = None,
79
+ ) -> Checkpoint:
80
+ """
81
+ Record a new checkpoint.
82
+
83
+ If not at the end of history (i.e., we've gone back), this creates
84
+ a branch - future checkpoints are discarded.
85
+
86
+ Args:
87
+ description: What action led to this state
88
+ state: State to snapshot (will be deep-copied)
89
+ metadata: Optional extra data
90
+
91
+ Returns:
92
+ The created checkpoint
93
+ """
94
+ checkpoint = Checkpoint(
95
+ checkpoint_id=self.next_id,
96
+ label=f"{self.label_prefix}{self.next_id}",
97
+ description=description,
98
+ state=copy.deepcopy(state),
99
+ metadata=metadata or {},
100
+ )
101
+
102
+ # If we're not at the end, we're branching - truncate future
103
+ if self.current_index < len(self.checkpoints) - 1:
104
+ self.checkpoints = self.checkpoints[:self.current_index + 1]
105
+
106
+ self.checkpoints.append(checkpoint)
107
+ self.current_index = len(self.checkpoints) - 1
108
+ self.next_id += 1
109
+
110
+ return checkpoint
111
+
112
+ def goto(self, checkpoint_id: int) -> Checkpoint | None:
113
+ """
114
+ Jump to a specific checkpoint.
115
+
116
+ Args:
117
+ checkpoint_id: The checkpoint ID to jump to (0 = root)
118
+
119
+ Returns:
120
+ The checkpoint, or None if not found (or root)
121
+ """
122
+ if checkpoint_id == 0:
123
+ # Root state - before any checkpoints
124
+ self.current_index = -1
125
+ return None
126
+
127
+ for i, cp in enumerate(self.checkpoints):
128
+ if cp.checkpoint_id == checkpoint_id:
129
+ self.current_index = i
130
+ return cp
131
+
132
+ return None # Not found
133
+
134
+ def goto_label(self, label: str) -> Checkpoint | None:
135
+ """
136
+ Jump to a checkpoint by label (e.g., "T1", "root").
137
+
138
+ Args:
139
+ label: The label to find
140
+
141
+ Returns:
142
+ The checkpoint, or None if not found
143
+ """
144
+ if label.lower() == "root":
145
+ self.current_index = -1
146
+ return None
147
+
148
+ for i, cp in enumerate(self.checkpoints):
149
+ if cp.label == label:
150
+ self.current_index = i
151
+ return cp
152
+
153
+ return None
154
+
155
+ def current(self) -> Checkpoint | None:
156
+ """Get the current checkpoint, or None if at root."""
157
+ if self.current_index < 0 or self.current_index >= len(self.checkpoints):
158
+ return None
159
+ return self.checkpoints[self.current_index]
160
+
161
+ def current_state(self) -> dict[str, Any] | None:
162
+ """Get the current state, or None if at root."""
163
+ cp = self.current()
164
+ return copy.deepcopy(cp.state) if cp else None
165
+
166
+ def history(
167
+ self,
168
+ limit: int | None = None,
169
+ include_state: bool = False,
170
+ ) -> list[dict[str, Any]]:
171
+ """
172
+ Get history entries for display.
173
+
174
+ Args:
175
+ limit: Max entries to return (most recent)
176
+ include_state: Whether to include full state in entries
177
+
178
+ Returns:
179
+ List of history entries with checkpoint info
180
+ """
181
+ entries = []
182
+ for i, cp in enumerate(self.checkpoints):
183
+ entry = {
184
+ "checkpoint_id": cp.checkpoint_id,
185
+ "label": cp.label,
186
+ "description": cp.description,
187
+ "timestamp": cp.timestamp,
188
+ "is_current": i == self.current_index,
189
+ "metadata": cp.metadata,
190
+ }
191
+ if include_state:
192
+ entry["state"] = cp.state
193
+ entries.append(entry)
194
+
195
+ if limit:
196
+ entries = entries[-limit:]
197
+
198
+ return entries
199
+
200
+ def label_for(self, checkpoint_id: int) -> str:
201
+ """Get label for a checkpoint ID."""
202
+ if checkpoint_id == 0:
203
+ return "root"
204
+ return f"{self.label_prefix}{checkpoint_id}"
205
+
206
+ def __len__(self) -> int:
207
+ """Number of checkpoints."""
208
+ return len(self.checkpoints)
209
+
210
+ def is_at_root(self) -> bool:
211
+ """Whether we're at root (before any checkpoints)."""
212
+ return self.current_index < 0
213
+
214
+ def is_at_end(self) -> bool:
215
+ """Whether we're at the most recent checkpoint."""
216
+ return self.current_index == len(self.checkpoints) - 1
zwarm/core/costs.py ADDED
@@ -0,0 +1,199 @@
1
+ """
2
+ Token cost estimation for LLM models.
3
+
4
+ Pricing data is hardcoded and may become stale. Last updated: 2026-01.
5
+
6
+ Sources:
7
+ - https://www.helicone.ai/llm-cost/provider/openai/model/gpt-5.1-codex
8
+ - https://pricepertoken.com/pricing-page/model/openai-codex-mini
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass
14
+ from typing import Any
15
+
16
+
17
+ @dataclass
18
+ class ModelPricing:
19
+ """Pricing for a model in $ per million tokens."""
20
+ input_per_million: float
21
+ output_per_million: float
22
+ cached_input_per_million: float | None = None # Some models have cached input discount
23
+
24
+ def estimate_cost(
25
+ self,
26
+ input_tokens: int,
27
+ output_tokens: int,
28
+ cached_tokens: int = 0,
29
+ ) -> float:
30
+ """
31
+ Estimate cost in dollars.
32
+
33
+ Args:
34
+ input_tokens: Number of input tokens
35
+ output_tokens: Number of output tokens
36
+ cached_tokens: Number of cached input tokens (if applicable)
37
+
38
+ Returns:
39
+ Estimated cost in USD
40
+ """
41
+ input_cost = (input_tokens / 1_000_000) * self.input_per_million
42
+ output_cost = (output_tokens / 1_000_000) * self.output_per_million
43
+
44
+ cached_cost = 0.0
45
+ if cached_tokens and self.cached_input_per_million:
46
+ cached_cost = (cached_tokens / 1_000_000) * self.cached_input_per_million
47
+
48
+ return input_cost + output_cost + cached_cost
49
+
50
+
51
+ # Model pricing table ($ per million tokens)
52
+ # Last updated: 2026-01
53
+ MODEL_PRICING: dict[str, ModelPricing] = {
54
+ # OpenAI Codex models
55
+ "gpt-5.1-codex": ModelPricing(
56
+ input_per_million=1.25,
57
+ output_per_million=10.00,
58
+ cached_input_per_million=0.125, # 90% discount for cached
59
+ ),
60
+ "gpt-5.1-codex-mini": ModelPricing(
61
+ input_per_million=0.25,
62
+ output_per_million=2.00,
63
+ cached_input_per_million=0.025,
64
+ ),
65
+ "gpt-5.1-codex-max": ModelPricing(
66
+ input_per_million=1.25,
67
+ output_per_million=10.00,
68
+ cached_input_per_million=0.125,
69
+ ),
70
+ # GPT-5 base models (for reference)
71
+ "gpt-5": ModelPricing(
72
+ input_per_million=1.25,
73
+ output_per_million=10.00,
74
+ ),
75
+ "gpt-5-mini": ModelPricing(
76
+ input_per_million=0.25,
77
+ output_per_million=2.00,
78
+ ),
79
+ # Claude models (Anthropic)
80
+ "claude-sonnet-4-20250514": ModelPricing(
81
+ input_per_million=3.00,
82
+ output_per_million=15.00,
83
+ ),
84
+ "claude-opus-4-20250514": ModelPricing(
85
+ input_per_million=15.00,
86
+ output_per_million=75.00,
87
+ ),
88
+ "claude-3-5-sonnet-20241022": ModelPricing(
89
+ input_per_million=3.00,
90
+ output_per_million=15.00,
91
+ ),
92
+ }
93
+
94
+ # Aliases for common model names
95
+ MODEL_ALIASES: dict[str, str] = {
96
+ "codex": "gpt-5.1-codex",
97
+ "codex-mini": "gpt-5.1-codex-mini",
98
+ "codex-max": "gpt-5.1-codex-max",
99
+ "gpt5": "gpt-5",
100
+ "gpt5-mini": "gpt-5-mini",
101
+ "sonnet": "claude-sonnet-4-20250514",
102
+ "opus": "claude-opus-4-20250514",
103
+ }
104
+
105
+
106
+ def get_pricing(model: str) -> ModelPricing | None:
107
+ """
108
+ Get pricing for a model.
109
+
110
+ Args:
111
+ model: Model name or alias
112
+
113
+ Returns:
114
+ ModelPricing or None if unknown
115
+ """
116
+ # Check aliases first
117
+ resolved = MODEL_ALIASES.get(model.lower(), model)
118
+
119
+ # Exact match
120
+ if resolved in MODEL_PRICING:
121
+ return MODEL_PRICING[resolved]
122
+
123
+ # Try lowercase
124
+ if resolved.lower() in MODEL_PRICING:
125
+ return MODEL_PRICING[resolved.lower()]
126
+
127
+ # Try prefix matching (e.g., "gpt-5.1-codex-mini-2026-01" -> "gpt-5.1-codex-mini")
128
+ for known_model in MODEL_PRICING:
129
+ if resolved.lower().startswith(known_model.lower()):
130
+ return MODEL_PRICING[known_model]
131
+
132
+ return None
133
+
134
+
135
+ def estimate_cost(
136
+ model: str,
137
+ input_tokens: int,
138
+ output_tokens: int,
139
+ cached_tokens: int = 0,
140
+ ) -> float | None:
141
+ """
142
+ Estimate cost for a model run.
143
+
144
+ Args:
145
+ model: Model name
146
+ input_tokens: Number of input tokens
147
+ output_tokens: Number of output tokens
148
+ cached_tokens: Number of cached input tokens
149
+
150
+ Returns:
151
+ Cost in USD, or None if model pricing unknown
152
+ """
153
+ pricing = get_pricing(model)
154
+ if pricing is None:
155
+ return None
156
+
157
+ return pricing.estimate_cost(input_tokens, output_tokens, cached_tokens)
158
+
159
+
160
+ def format_cost(cost: float | None) -> str:
161
+ """Format cost as a human-readable string."""
162
+ if cost is None:
163
+ return "?"
164
+ if cost < 0.01:
165
+ return f"${cost:.4f}"
166
+ elif cost < 1.00:
167
+ return f"${cost:.3f}"
168
+ else:
169
+ return f"${cost:.2f}"
170
+
171
+
172
+ def estimate_session_cost(
173
+ model: str,
174
+ token_usage: dict[str, Any],
175
+ ) -> dict[str, Any]:
176
+ """
177
+ Estimate cost for a session given its token usage.
178
+
179
+ Args:
180
+ model: Model used
181
+ token_usage: Dict with input_tokens, output_tokens, etc.
182
+
183
+ Returns:
184
+ Dict with cost info: {cost, cost_formatted, pricing_known}
185
+ """
186
+ input_tokens = token_usage.get("input_tokens", 0)
187
+ output_tokens = token_usage.get("output_tokens", 0)
188
+ cached_tokens = token_usage.get("cached_tokens", 0)
189
+
190
+ cost = estimate_cost(model, input_tokens, output_tokens, cached_tokens)
191
+
192
+ return {
193
+ "cost": cost,
194
+ "cost_formatted": format_cost(cost),
195
+ "pricing_known": cost is not None,
196
+ "model": model,
197
+ "input_tokens": input_tokens,
198
+ "output_tokens": output_tokens,
199
+ }
zwarm/prompts/__init__.py CHANGED
@@ -3,8 +3,11 @@ System prompts for zwarm agents.
3
3
  """
4
4
 
5
5
  from zwarm.prompts.orchestrator import ORCHESTRATOR_SYSTEM_PROMPT, get_orchestrator_prompt
6
+ from zwarm.prompts.pilot import PILOT_SYSTEM_PROMPT, get_pilot_prompt
6
7
 
7
8
  __all__ = [
8
9
  "ORCHESTRATOR_SYSTEM_PROMPT",
9
10
  "get_orchestrator_prompt",
11
+ "PILOT_SYSTEM_PROMPT",
12
+ "get_pilot_prompt",
10
13
  ]
@@ -27,18 +27,20 @@ For everything else, make your best judgment and proceed. If you're unsure wheth
27
27
 
28
28
  Your primary tools are for delegation and verification:
29
29
 
30
- **delegate(task, working_dir=None, model=None, wait=True)** - Start a new executor session. The `task` should be a clear, specific description of what you want done. Use `wait=True` (default) for interactive work where you'll iterate with the executor. Use `wait=False` to spawn background work and continue immediately. The `working_dir` parameter lets you run the executor in a specific directory.
30
+ **delegate(task, working_dir=None, model=None)** - Start a new executor session. The `task` should be a clear, specific description of what you want done. All sessions run asynchronously - you'll get a session_id back immediately and can poll for results. The `working_dir` parameter lets you run the executor in a specific directory.
31
31
 
32
- **converse(session_id, message, wait=True)** - Continue an existing conversation. Use this to provide feedback, ask for changes, or guide the executor through complex work. The executor maintains full context. Use `wait=False` to send the message and continue without waiting for a response.
32
+ **converse(session_id, message)** - Continue an existing conversation. Use this to provide feedback, ask for changes, or guide the executor through complex work. The executor maintains full context. Returns immediately - use polling to check for the response.
33
33
 
34
- **peek_session(session_id)** - Quick status check. Returns just the session status and latest message. Use this for fast polling when you have multiple sessions running.
34
+ **peek_session(session_id)** - Quick status check. Returns just the session status and latest message. Use this for fast polling.
35
35
 
36
36
  **check_session(session_id)** - Full session details including all messages, token usage, runtime. Use this when you need the complete picture.
37
37
 
38
- **list_sessions(status=None)** - List all sessions. Returns a `needs_attention` flag for each session indicating if it recently completed or failed. Use this to monitor multiple parallel sessions and see which ones have new responses ready for review.
38
+ **list_sessions(status=None)** - List all sessions. Returns a `needs_attention` flag for each session indicating if it recently completed or failed. Use this to monitor multiple sessions and see which ones have new responses ready for review.
39
39
 
40
40
  **end_session(session_id, reason=None, delete=False)** - Kill a running session or clean up a completed one. Use `delete=True` to remove the session entirely (won't show in list_sessions anymore).
41
41
 
42
+ **sleep(seconds)** - Pause execution for specified seconds (max 300). Use this when you've started sessions and want to give them time to complete before polling. Essential for the async workflow pattern.
43
+
42
44
  **bash(command)** - Run shell commands directly. Use this primarily for verification: running tests, type checkers, linters, build commands, or inspecting the filesystem. Do NOT use bash to write code yourself - that's what executors are for.
43
45
 
44
46
  **chat(message, wait_for_user_input)** - Communicate with the human user. Use this sparingly. Most of the time you should be working autonomously without bothering the user.
@@ -63,35 +65,40 @@ The watchers are on your side. They exist to help you succeed, not to criticize.
63
65
 
64
66
  ---
65
67
 
66
- # Sync vs Async: Choosing the Right Approach
67
-
68
- The `wait` parameter controls whether you block waiting for a response or continue immediately.
68
+ # Async Workflow Pattern
69
69
 
70
- **Sync (wait=True)** creates an interactive conversation with the executor. After your task description, you receive the executor's response immediately. You can then provide feedback via converse(), ask for changes, or confirm the work is acceptable. This back-and-forth continues until you're satisfied.
70
+ All executor sessions run asynchronously. When you call delegate() or converse(), you get a session_id back immediately and the executor works in the background. This lets you parallelize work efficiently.
71
71
 
72
- Use sync when the task involves ambiguity, when you expect to iterate, when you want to review results before proceeding, or for high-stakes work needing close supervision.
72
+ The core workflow pattern is: **delegate sleep poll respond**
73
73
 
74
- Typical sync pattern:
75
- 1. `delegate(task)` - get initial response
76
- 2. Evaluate - does it meet requirements?
77
- 3. `converse(id, "feedback...")` - if changes needed
78
- 4. Repeat until satisfied
79
- 5. `end_session(id)` or just move on
74
+ ```
75
+ 1. delegate(task1) session_a
76
+ 2. delegate(task2) session_b
77
+ 3. delegate(task3) session_c
78
+ 4. sleep(30) give them time to work
79
+ 5. list_sessions() check which have needs_attention=True
80
+ 6. peek_session(a) → quick status check
81
+ 7. If still running, sleep(30) and repeat
82
+ 8. check_session(a) → full results when done
83
+ 9. converse(a, "feedback...") → continue the conversation
84
+ 10. sleep(15) → wait for response
85
+ 11. check_session(a) → see the response
86
+ ```
80
87
 
81
- **Async (wait=False)** is fire-and-forget. You spawn the work and continue immediately without waiting. The executor works in the background.
88
+ **Key principles:**
82
89
 
83
- Use async when tasks are well-defined and self-contained, when you're confident the executor can complete without guidance, or when you want to parallelize multiple independent pieces of work. Async is efficient for clear-cut tasks like "add tests for this function" or "fix this lint error".
90
+ - Use **sleep()** to give executors time to work before polling. Don't spam peek_session() in a tight loop.
91
+ - Use **list_sessions()** to see which sessions have `needs_attention=True` (recently completed or failed).
92
+ - Use **peek_session()** for quick status checks during polling.
93
+ - Use **check_session()** to get full details including all messages when you need to review the actual work.
94
+ - After **converse()**, always sleep() and poll - you won't get the response immediately.
84
95
 
85
- Async pattern for parallel work:
86
- 1. `delegate(task1, wait=False)` → session a
87
- 2. `delegate(task2, wait=False)` → session b
88
- 3. `delegate(task3, wait=False)` → session c
89
- 4. `list_sessions()` → check `needs_attention` flags
90
- 5. `peek_session(a)` → quick status check
91
- 6. `check_session(b)` → full details when ready
92
- 7. `converse(a, "now do X", wait=False)` → continue without blocking
96
+ **Sleep timing guidance:**
93
97
 
94
- When in doubt, prefer sync. The overhead of waiting is small compared to an executor going off in the wrong direction unsupervised.
98
+ - Simple tasks (single file edits, small fixes): 15-30 seconds
99
+ - Medium tasks (multiple files, tests): 30-60 seconds
100
+ - Complex tasks (new features, refactoring): 60-120 seconds
101
+ - If a session is still running after polling, sleep again rather than waiting forever
95
102
 
96
103
  ---
97
104
 
@@ -119,7 +126,7 @@ Never mark work as complete without verifying it actually works. This is the mos
119
126
 
120
127
  After an executor completes work, run the relevant verification commands. For Python projects, this typically means: pytest for tests, mypy or pyright for type checking, ruff or flake8 for linting. For JavaScript/TypeScript: npm test, tsc for type checking, eslint for linting. For compiled languages: ensure the build succeeds without errors.
121
128
 
122
- When verification fails, you have two options. If you're in a sync session, use converse() to share the error output and ask the executor to fix it. Be specific about what failed - paste the actual error message. If you're in an async session or the sync session has become too confused, end it with verdict="failed" and start a fresh session with a clearer task description that incorporates what you learned.
129
+ When verification fails, use converse() to share the error output and ask the executor to fix it. Be specific about what failed - paste the actual error message. Remember to sleep() and poll for the response. If the session has become too confused or gone too far down the wrong path, end it with verdict="failed" and start a fresh session with a clearer task description that incorporates what you learned.
123
130
 
124
131
  Do not rationalize failures. If the tests don't pass, the work isn't done. If the type checker complains, the work isn't done. If the linter shows errors, the work isn't done. Your job is to ensure quality, and that means holding firm on verification.
125
132
 
@@ -131,7 +138,7 @@ Executors will sometimes fail. They might misunderstand the task, produce buggy
131
138
 
132
139
  When you notice an executor has gone wrong, first diagnose the problem. What specifically is wrong? Is it a misunderstanding of requirements, a technical error, a missing piece of context? Understanding the root cause helps you correct effectively.
133
140
 
134
- For sync sessions, you can often recover through conversation. Explain what's wrong clearly and specifically. Don't just say "this is wrong" - explain why and what you expected instead. Provide the error messages, the failing test output, or a clear description of the incorrect behavior. Give the executor the information they need to fix the issue.
141
+ You can often recover through conversation using converse(). Explain what's wrong clearly and specifically. Don't just say "this is wrong" - explain why and what you expected instead. Provide the error messages, the failing test output, or a clear description of the incorrect behavior. Give the executor the information they need to fix the issue. Then sleep() and poll for their response.
135
142
 
136
143
  Sometimes a session becomes too confused or goes too far down the wrong path. In these cases, it's better to cut your losses: call end_session() with verdict="failed" and a summary of what went wrong, then start fresh with a new session that has a better task description informed by what you learned.
137
144
 
@@ -145,7 +152,7 @@ Complex tasks often require multiple executor sessions, either in sequence or in
145
152
 
146
153
  For sequential work with dependencies, complete each session fully before starting the next. Don't leave sessions hanging in an ambiguous state while you start new work. This creates confusion and makes it hard to track what's actually done.
147
154
 
148
- For parallel work on independent tasks, you can start multiple async sessions simultaneously. Use check_session() periodically to monitor progress, and end each session properly when complete. Keep mental track of what's running - don't lose track of sessions.
155
+ For parallel work on independent tasks, start multiple sessions and use the sleep-poll pattern to monitor them. Use list_sessions() to see which have needs_attention=True, check_session() for full details, and end each session properly when complete. Keep mental track of what's running - don't lose track of sessions.
149
156
 
150
157
  Prioritize completing in-progress work before starting new work. A half-finished feature is worth less than nothing - it's technical debt that will confuse future work. Better to have fewer things fully done than many things partially done.
151
158