swarmkit 0.1.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bridge/__init__.py +5 -0
- bridge/dist/bridge.bundle.cjs +8 -0
- swarmkit/__init__.py +152 -0
- swarmkit/agent.py +480 -0
- swarmkit/bridge.py +475 -0
- swarmkit/config.py +92 -0
- swarmkit/pipeline/__init__.py +59 -0
- swarmkit/pipeline/pipeline.py +487 -0
- swarmkit/pipeline/types.py +272 -0
- swarmkit/prompts/__init__.py +126 -0
- swarmkit/prompts/agent_md/judge.md +30 -0
- swarmkit/prompts/agent_md/reduce.md +7 -0
- swarmkit/prompts/agent_md/verify.md +33 -0
- swarmkit/prompts/user/judge.md +1 -0
- swarmkit/prompts/user/retry_feedback.md +9 -0
- swarmkit/prompts/user/verify.md +1 -0
- swarmkit/results.py +45 -0
- swarmkit/retry.py +133 -0
- swarmkit/schema.py +107 -0
- swarmkit/swarm/__init__.py +75 -0
- swarmkit/swarm/results.py +140 -0
- swarmkit/swarm/swarm.py +1751 -0
- swarmkit/swarm/types.py +193 -0
- swarmkit/utils.py +82 -0
- swarmkit-0.1.34.dist-info/METADATA +80 -0
- swarmkit-0.1.34.dist-info/RECORD +29 -0
- swarmkit-0.1.34.dist-info/WHEEL +5 -0
- swarmkit-0.1.34.dist-info/licenses/LICENSE +24 -0
- swarmkit-0.1.34.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""Pipeline Types - Fluent API for chaining Swarm operations."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Callable, Dict, Generic, List, Literal, Optional, TypeVar, Union
|
|
5
|
+
|
|
6
|
+
from ..swarm.types import (
|
|
7
|
+
BestOfConfig,
|
|
8
|
+
VerifyConfig,
|
|
9
|
+
SchemaType,
|
|
10
|
+
Prompt,
|
|
11
|
+
)
|
|
12
|
+
from ..swarm.results import SwarmResult, ReduceResult
|
|
13
|
+
from ..retry import RetryConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
T = TypeVar('T')
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# =============================================================================
|
|
20
|
+
# EMIT OPTION (filter only)
|
|
21
|
+
# =============================================================================
|
|
22
|
+
|
|
23
|
+
EmitOption = Literal["success", "filtered", "all"]
|
|
24
|
+
"""What filter emits to the next step.
|
|
25
|
+
|
|
26
|
+
- "success": Items that passed condition (default)
|
|
27
|
+
- "filtered": Items that failed condition
|
|
28
|
+
- "all": Both success and filtered
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# STEP CONFIGURATIONS
|
|
34
|
+
# =============================================================================
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class MapConfig(Generic[T]):
|
|
38
|
+
"""Map step configuration."""
|
|
39
|
+
prompt: Prompt
|
|
40
|
+
"""Task prompt."""
|
|
41
|
+
name: Optional[str] = None
|
|
42
|
+
"""Step name for observability (appears in events)."""
|
|
43
|
+
system_prompt: Optional[str] = None
|
|
44
|
+
"""System prompt override."""
|
|
45
|
+
schema: Optional[SchemaType] = None
|
|
46
|
+
"""Schema for structured output."""
|
|
47
|
+
schema_options: Optional[Dict[str, Any]] = None
|
|
48
|
+
"""Validation options for JSON Schema."""
|
|
49
|
+
agent: Optional[Any] = None
|
|
50
|
+
"""Agent override."""
|
|
51
|
+
mcp_servers: Optional[Dict[str, Any]] = None
|
|
52
|
+
"""MCP servers override (replaces swarm default for this step)."""
|
|
53
|
+
best_of: Optional[BestOfConfig] = None
|
|
54
|
+
"""BestOf configuration (mutually exclusive with verify)."""
|
|
55
|
+
verify: Optional[VerifyConfig] = None
|
|
56
|
+
"""Verify configuration (mutually exclusive with bestOf)."""
|
|
57
|
+
retry: Optional[RetryConfig] = None
|
|
58
|
+
"""Retry configuration."""
|
|
59
|
+
timeout_ms: Optional[int] = None
|
|
60
|
+
"""Timeout in ms."""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class FilterConfig(Generic[T]):
|
|
65
|
+
"""Filter step configuration."""
|
|
66
|
+
prompt: str
|
|
67
|
+
"""Evaluation prompt."""
|
|
68
|
+
schema: SchemaType
|
|
69
|
+
"""Schema for structured output (required)."""
|
|
70
|
+
condition: Callable[[Any], bool]
|
|
71
|
+
"""Condition function to determine pass/fail."""
|
|
72
|
+
name: Optional[str] = None
|
|
73
|
+
"""Step name for observability (appears in events)."""
|
|
74
|
+
system_prompt: Optional[str] = None
|
|
75
|
+
"""System prompt override."""
|
|
76
|
+
schema_options: Optional[Dict[str, Any]] = None
|
|
77
|
+
"""Validation options for JSON Schema."""
|
|
78
|
+
agent: Optional[Any] = None
|
|
79
|
+
"""Agent override."""
|
|
80
|
+
mcp_servers: Optional[Dict[str, Any]] = None
|
|
81
|
+
"""MCP servers override (replaces swarm default for this step)."""
|
|
82
|
+
emit: EmitOption = "success"
|
|
83
|
+
"""What to emit to next step (default: "success")."""
|
|
84
|
+
verify: Optional[VerifyConfig] = None
|
|
85
|
+
"""Verify configuration."""
|
|
86
|
+
retry: Optional[RetryConfig] = None
|
|
87
|
+
"""Retry configuration."""
|
|
88
|
+
timeout_ms: Optional[int] = None
|
|
89
|
+
"""Timeout in ms."""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class ReduceConfig(Generic[T]):
|
|
94
|
+
"""Reduce step configuration."""
|
|
95
|
+
prompt: str
|
|
96
|
+
"""Synthesis prompt."""
|
|
97
|
+
name: Optional[str] = None
|
|
98
|
+
"""Step name for observability (appears in events)."""
|
|
99
|
+
system_prompt: Optional[str] = None
|
|
100
|
+
"""System prompt override."""
|
|
101
|
+
schema: Optional[SchemaType] = None
|
|
102
|
+
"""Schema for structured output."""
|
|
103
|
+
schema_options: Optional[Dict[str, Any]] = None
|
|
104
|
+
"""Validation options for JSON Schema."""
|
|
105
|
+
agent: Optional[Any] = None
|
|
106
|
+
"""Agent override."""
|
|
107
|
+
mcp_servers: Optional[Dict[str, Any]] = None
|
|
108
|
+
"""MCP servers override (replaces swarm default for this step)."""
|
|
109
|
+
verify: Optional[VerifyConfig] = None
|
|
110
|
+
"""Verify configuration."""
|
|
111
|
+
retry: Optional[RetryConfig] = None
|
|
112
|
+
"""Retry configuration."""
|
|
113
|
+
timeout_ms: Optional[int] = None
|
|
114
|
+
"""Timeout in ms."""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# =============================================================================
|
|
118
|
+
# INTERNAL
|
|
119
|
+
# =============================================================================
|
|
120
|
+
|
|
121
|
+
StepType = Literal["map", "filter", "reduce"]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class Step:
|
|
126
|
+
"""Internal step representation."""
|
|
127
|
+
type: StepType
|
|
128
|
+
config: Union[MapConfig, FilterConfig, ReduceConfig]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# =============================================================================
|
|
132
|
+
# RESULTS
|
|
133
|
+
# =============================================================================
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class StepResult(Generic[T]):
|
|
137
|
+
"""Result of a single pipeline step."""
|
|
138
|
+
type: StepType
|
|
139
|
+
index: int
|
|
140
|
+
duration_ms: int
|
|
141
|
+
results: Union[List[SwarmResult[T]], ReduceResult[T]]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class PipelineResult(Generic[T]):
|
|
146
|
+
"""Final result from pipeline execution."""
|
|
147
|
+
run_id: str
|
|
148
|
+
steps: List[StepResult]
|
|
149
|
+
output: Union[List[SwarmResult[T]], ReduceResult[T]]
|
|
150
|
+
total_duration_ms: int
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# =============================================================================
|
|
154
|
+
# EVENTS
|
|
155
|
+
# =============================================================================
|
|
156
|
+
|
|
157
|
+
@dataclass
|
|
158
|
+
class StepEvent:
|
|
159
|
+
"""Step lifecycle event (base class)."""
|
|
160
|
+
type: StepType
|
|
161
|
+
index: int
|
|
162
|
+
name: Optional[str]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataclass
|
|
166
|
+
class StepStartEvent(StepEvent):
|
|
167
|
+
"""Emitted when step starts."""
|
|
168
|
+
item_count: int
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass
|
|
172
|
+
class StepCompleteEvent(StepEvent):
|
|
173
|
+
"""Emitted when step completes."""
|
|
174
|
+
duration_ms: int
|
|
175
|
+
success_count: int
|
|
176
|
+
error_count: int
|
|
177
|
+
filtered_count: int
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class StepErrorEvent(StepEvent):
|
|
182
|
+
"""Emitted when step errors."""
|
|
183
|
+
error: Exception
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class ItemRetryEvent:
|
|
188
|
+
"""Emitted on item retry."""
|
|
189
|
+
step_index: int
|
|
190
|
+
step_name: Optional[str]
|
|
191
|
+
item_index: int
|
|
192
|
+
attempt: int
|
|
193
|
+
error: str
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@dataclass
|
|
197
|
+
class WorkerCompleteEvent:
|
|
198
|
+
"""Emitted when verify worker completes."""
|
|
199
|
+
step_index: int
|
|
200
|
+
step_name: Optional[str]
|
|
201
|
+
item_index: int
|
|
202
|
+
attempt: int
|
|
203
|
+
status: Literal["success", "error"]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dataclass
|
|
207
|
+
class VerifierCompleteEvent:
|
|
208
|
+
"""Emitted when verifier completes."""
|
|
209
|
+
step_index: int
|
|
210
|
+
step_name: Optional[str]
|
|
211
|
+
item_index: int
|
|
212
|
+
attempt: int
|
|
213
|
+
passed: bool
|
|
214
|
+
feedback: Optional[str]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class CandidateCompleteEvent:
|
|
219
|
+
"""Emitted when bestOf candidate completes."""
|
|
220
|
+
step_index: int
|
|
221
|
+
step_name: Optional[str]
|
|
222
|
+
item_index: int
|
|
223
|
+
candidate_index: int
|
|
224
|
+
status: Literal["success", "error"]
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@dataclass
|
|
228
|
+
class JudgeCompleteEvent:
|
|
229
|
+
"""Emitted when bestOf judge completes."""
|
|
230
|
+
step_index: int
|
|
231
|
+
step_name: Optional[str]
|
|
232
|
+
item_index: int
|
|
233
|
+
winner_index: int
|
|
234
|
+
reasoning: str
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@dataclass
|
|
238
|
+
class PipelineEvents:
|
|
239
|
+
"""Event handlers."""
|
|
240
|
+
on_step_start: Optional[Callable[[StepStartEvent], None]] = None
|
|
241
|
+
on_step_complete: Optional[Callable[[StepCompleteEvent], None]] = None
|
|
242
|
+
on_step_error: Optional[Callable[[StepErrorEvent], None]] = None
|
|
243
|
+
on_item_retry: Optional[Callable[[ItemRetryEvent], None]] = None
|
|
244
|
+
on_worker_complete: Optional[Callable[[WorkerCompleteEvent], None]] = None
|
|
245
|
+
on_verifier_complete: Optional[Callable[[VerifierCompleteEvent], None]] = None
|
|
246
|
+
on_candidate_complete: Optional[Callable[[CandidateCompleteEvent], None]] = None
|
|
247
|
+
on_judge_complete: Optional[Callable[[JudgeCompleteEvent], None]] = None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# Event name mapping for chainable .on() style
|
|
251
|
+
PipelineEventMap = {
|
|
252
|
+
"step_start": "on_step_start",
|
|
253
|
+
"step_complete": "on_step_complete",
|
|
254
|
+
"step_error": "on_step_error",
|
|
255
|
+
"item_retry": "on_item_retry",
|
|
256
|
+
"worker_complete": "on_worker_complete",
|
|
257
|
+
"verifier_complete": "on_verifier_complete",
|
|
258
|
+
"candidate_complete": "on_candidate_complete",
|
|
259
|
+
"judge_complete": "on_judge_complete",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
# Event names (for type hints)
|
|
263
|
+
EventName = Literal[
|
|
264
|
+
"step_start",
|
|
265
|
+
"step_complete",
|
|
266
|
+
"step_error",
|
|
267
|
+
"item_retry",
|
|
268
|
+
"worker_complete",
|
|
269
|
+
"verifier_complete",
|
|
270
|
+
"candidate_complete",
|
|
271
|
+
"judge_complete",
|
|
272
|
+
]
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Prompt templates for Swarm abstractions.
|
|
2
|
+
|
|
3
|
+
Prompts are stored as markdown files for easy editing.
|
|
4
|
+
They are loaded at import time using importlib.resources.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from importlib import resources
|
|
8
|
+
from typing import Dict, Union
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _load_prompt(subdir: str, filename: str) -> str:
|
|
12
|
+
"""Load a prompt template from a .md file in a subdirectory."""
|
|
13
|
+
return resources.files(__package__).joinpath(subdir, filename).read_text(encoding='utf-8')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Load agent prompts (system prompts - goes into CLAUDE.md-like context)
|
|
17
|
+
JUDGE_PROMPT: str = _load_prompt('agent_md', 'judge.md')
|
|
18
|
+
VERIFY_PROMPT: str = _load_prompt('agent_md', 'verify.md')
|
|
19
|
+
REDUCE_PROMPT: str = _load_prompt('agent_md', 'reduce.md')
|
|
20
|
+
|
|
21
|
+
# Load user prompts (task prompts - passed to .run())
|
|
22
|
+
JUDGE_USER_PROMPT: str = _load_prompt('user', 'judge.md')
|
|
23
|
+
VERIFY_USER_PROMPT: str = _load_prompt('user', 'verify.md')
|
|
24
|
+
RETRY_FEEDBACK_PROMPT: str = _load_prompt('user', 'retry_feedback.md')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def apply_template(template: str, variables: Dict[str, str]) -> str:
|
|
28
|
+
"""Apply template variables to a template string.
|
|
29
|
+
|
|
30
|
+
Replaces {{variable}} with the corresponding value from variables dict.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
template: Template string with {{variable}} placeholders
|
|
34
|
+
variables: Dict mapping variable names to values
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Template with placeholders replaced
|
|
38
|
+
"""
|
|
39
|
+
result = template
|
|
40
|
+
for key, value in variables.items():
|
|
41
|
+
result = result.replace(f"{{{{{key}}}}}", value)
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def build_file_tree(files: Dict[str, Union[str, bytes]]) -> str:
|
|
46
|
+
"""Build a formatted file tree string for judge context.
|
|
47
|
+
|
|
48
|
+
Generates an ASCII tree representation of the files structure,
|
|
49
|
+
with comments explaining the purpose of each section.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
files: Dict mapping file paths to content
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Formatted file tree string
|
|
56
|
+
"""
|
|
57
|
+
if not files:
|
|
58
|
+
return "context/\n (empty)"
|
|
59
|
+
|
|
60
|
+
# Get unique top-level folders, sorted with worker_task first
|
|
61
|
+
folders = sorted(
|
|
62
|
+
set(path.split("/")[0] for path in files.keys()),
|
|
63
|
+
key=lambda f: ("" if f == "worker_task" else f)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if not folders:
|
|
67
|
+
return "context/\n (empty)"
|
|
68
|
+
|
|
69
|
+
# Check what exists in worker_task/
|
|
70
|
+
has_system_prompt = "worker_task/system_prompt.txt" in files
|
|
71
|
+
has_schema = "worker_task/schema.json" in files
|
|
72
|
+
has_input = any(p.startswith("worker_task/input/") for p in files.keys())
|
|
73
|
+
|
|
74
|
+
# Build entries with comments
|
|
75
|
+
entries: list[tuple[str, str]] = [] # (line, comment)
|
|
76
|
+
|
|
77
|
+
for i, folder in enumerate(folders):
|
|
78
|
+
is_last_folder = i == len(folders) - 1
|
|
79
|
+
folder_prefix = "└── " if is_last_folder else "├── "
|
|
80
|
+
child_indent = " " if is_last_folder else "│ "
|
|
81
|
+
|
|
82
|
+
if folder == "worker_task":
|
|
83
|
+
entries.append((f"{folder_prefix}{folder}/", "task given to workers"))
|
|
84
|
+
|
|
85
|
+
# Build worker_task children (only show what exists)
|
|
86
|
+
children: list[tuple[str, str]] = []
|
|
87
|
+
if has_system_prompt:
|
|
88
|
+
children.append(("system_prompt.txt", "worker system prompt"))
|
|
89
|
+
children.append(("user_prompt.txt", "worker task prompt"))
|
|
90
|
+
if has_schema:
|
|
91
|
+
children.append(("schema.json", "expected output schema"))
|
|
92
|
+
if has_input:
|
|
93
|
+
children.append(("input/", "worker input files"))
|
|
94
|
+
|
|
95
|
+
for j, (name, comment) in enumerate(children):
|
|
96
|
+
is_last_child = j == len(children) - 1
|
|
97
|
+
child_prefix = "└── " if is_last_child else "├── "
|
|
98
|
+
entries.append((f"{child_indent}{child_prefix}{name}", comment))
|
|
99
|
+
|
|
100
|
+
elif folder.startswith("candidate_"):
|
|
101
|
+
idx = folder.replace("candidate_", "")
|
|
102
|
+
entries.append((f"{folder_prefix}{folder}/", f"worker {idx} solution"))
|
|
103
|
+
elif folder == "worker_output":
|
|
104
|
+
entries.append((f"{folder_prefix}{folder}/", "output to verify"))
|
|
105
|
+
elif folder.startswith("item_"):
|
|
106
|
+
idx = folder.replace("item_", "")
|
|
107
|
+
entries.append((f"{folder_prefix}{folder}/", f"input {idx}"))
|
|
108
|
+
else:
|
|
109
|
+
entries.append((f"{folder_prefix}{folder}/", ""))
|
|
110
|
+
|
|
111
|
+
# Calculate max line width for alignment
|
|
112
|
+
max_width = max(len(line) for line, _ in entries)
|
|
113
|
+
|
|
114
|
+
# Build final output with aligned comments
|
|
115
|
+
lines = ["context/"]
|
|
116
|
+
for line, comment in entries:
|
|
117
|
+
if comment:
|
|
118
|
+
padding = " " * (max_width - len(line) + 3)
|
|
119
|
+
lines.append(f"{line}{padding}# {comment}")
|
|
120
|
+
else:
|
|
121
|
+
lines.append(line)
|
|
122
|
+
|
|
123
|
+
return "\n".join(lines)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
__all__ = ['JUDGE_PROMPT', 'JUDGE_USER_PROMPT', 'VERIFY_PROMPT', 'VERIFY_USER_PROMPT', 'REDUCE_PROMPT', 'RETRY_FEEDBACK_PROMPT', 'apply_template', 'build_file_tree']
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
### 1. YOUR ROLE: BEST OF N JUDGE
|
|
2
|
+
|
|
3
|
+
You are a judge. {{candidateCount}} AI workers attempted the same task independently. Your job is to analyze their solution attempts and pick the best one based on the evaluation criteria below.
|
|
4
|
+
|
|
5
|
+
### 2. CONTEXT STRUCTURE
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
{{fileTree}}
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
### 3. YOUR EVALUATION CRITERIA
|
|
12
|
+
|
|
13
|
+
You must judge their work based on:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
{{criteria}}
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### 4. YOUR PROCESS
|
|
20
|
+
|
|
21
|
+
1. Read `worker_task/` to understand the task:
|
|
22
|
+
- Review the worker system prompt and task prompt
|
|
23
|
+
- Check the expected output schema (if present)
|
|
24
|
+
- Examine the worker input files in `input/`
|
|
25
|
+
2. Carefully review EACH solution attempt in `candidate_i/`
|
|
26
|
+
3. Compare outputs against the evaluation criteria
|
|
27
|
+
4. Reason through your findings — perform all necessary evidence-based analyses and verifications before deciding
|
|
28
|
+
5. Pick the best candidate (0-indexed)
|
|
29
|
+
|
|
30
|
+
**IMPORTANT:** Be thorough. Do not skip steps. Your judgment must be evidence-based — cite specific files, outputs, or discrepancies to justify your decision.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
### 1. YOUR ROLE: OUTPUT VERIFIER
|
|
2
|
+
|
|
3
|
+
You are a quality verifier. An AI worker produced output for a task. Your job is to verify whether the output meets the specified quality criteria.
|
|
4
|
+
|
|
5
|
+
### 2. CONTEXT STRUCTURE
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
{{fileTree}}
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
### 3. VERIFICATION CRITERIA
|
|
12
|
+
|
|
13
|
+
The output must satisfy:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
{{criteria}}
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### 4. YOUR PROCESS
|
|
20
|
+
|
|
21
|
+
1. Read `worker_task/` to understand what was asked:
|
|
22
|
+
- Review the worker system prompt (if present)
|
|
23
|
+
- Review the task prompt
|
|
24
|
+
- Check the expected output schema (if present)
|
|
25
|
+
- Examine any input files in `input/`
|
|
26
|
+
2. Carefully review the worker's output in `worker_output/`
|
|
27
|
+
3. Evaluate against the verification criteria
|
|
28
|
+
4. Reason through your findings
|
|
29
|
+
5. Make your decision
|
|
30
|
+
|
|
31
|
+
**IMPORTANT:** Be thorough and fair. Cite specific evidence. If the output generally achieves the goal with minor issues, consider passing. Only fail if there are significant problems that violate the criteria.
|
|
32
|
+
|
|
33
|
+
If failing, provide specific, actionable feedback explaining what needs to be fixed.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Evaluate the candidates and select the best one. You must save your decision to the file `output/result.json`.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Verify the worker output against the criteria. You must save your decision to the file `output/result.json`.
|
swarmkit/results.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Result types for SwarmKit SDK."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Dict, Optional, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class AgentResponse:
|
|
9
|
+
"""Response from agent execution.
|
|
10
|
+
|
|
11
|
+
Matches TypeScript SDK's AgentResponse for exact parity.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
sandbox_id: Sandbox ID
|
|
15
|
+
exit_code: Command exit code
|
|
16
|
+
stdout: Standard output
|
|
17
|
+
stderr: Standard error
|
|
18
|
+
"""
|
|
19
|
+
sandbox_id: str
|
|
20
|
+
exit_code: int
|
|
21
|
+
stdout: str
|
|
22
|
+
stderr: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Backward compatibility alias
|
|
26
|
+
ExecuteResult = AgentResponse
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class OutputResult:
|
|
31
|
+
"""Result from get_output_files() with optional schema validation.
|
|
32
|
+
|
|
33
|
+
Matches TypeScript SDK's OutputResult<T> for exact parity.
|
|
34
|
+
Evidence: sdk-ts/src/types.ts lines 258-268
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
files: Output files from output/ folder
|
|
38
|
+
data: Parsed and validated result.json data (None if no schema or validation failed)
|
|
39
|
+
error: Validation or parse error message, if any
|
|
40
|
+
raw_data: Raw result.json string when parse or validation failed (for debugging)
|
|
41
|
+
"""
|
|
42
|
+
files: Dict[str, Union[str, bytes]] = field(default_factory=dict)
|
|
43
|
+
data: Optional[Any] = None
|
|
44
|
+
error: Optional[str] = None
|
|
45
|
+
raw_data: Optional[str] = None
|
swarmkit/retry.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Retry Utility for Swarm operations.
|
|
2
|
+
|
|
3
|
+
Generic retry with exponential backoff.
|
|
4
|
+
Works with any result type that has a status field.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any, Awaitable, Callable, Optional, TypeVar
|
|
10
|
+
|
|
11
|
+
# =============================================================================
|
|
12
|
+
# CONSTANTS
|
|
13
|
+
# =============================================================================
|
|
14
|
+
|
|
15
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
|
16
|
+
DEFAULT_BACKOFF_MS = 1000
|
|
17
|
+
DEFAULT_BACKOFF_MULTIPLIER = 2.0
|
|
18
|
+
|
|
19
|
+
# =============================================================================
|
|
20
|
+
# TYPES
|
|
21
|
+
# =============================================================================
|
|
22
|
+
|
|
23
|
+
# TypeVar for result types (SwarmResult, ReduceResult, etc.)
|
|
24
|
+
# Results must have a `status` field for default retry behavior.
|
|
25
|
+
TResult = TypeVar('TResult')
|
|
26
|
+
|
|
27
|
+
# Callback type for item retry events (must be defined before RetryConfig)
|
|
28
|
+
OnItemRetryCallback = Callable[[int, int, str], None] # (item_index, attempt, error)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_field(obj: Any, field: str, default: Any = None) -> Any:
|
|
32
|
+
"""Get field from dict or object (duck typing helper)."""
|
|
33
|
+
if isinstance(obj, dict):
|
|
34
|
+
return obj.get(field, default)
|
|
35
|
+
return getattr(obj, field, default)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class RetryConfig:
|
|
40
|
+
"""Per-item retry configuration.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
# Basic retry on error
|
|
44
|
+
RetryConfig(max_attempts=3)
|
|
45
|
+
|
|
46
|
+
# With exponential backoff
|
|
47
|
+
RetryConfig(max_attempts=3, backoff_ms=1000, backoff_multiplier=2)
|
|
48
|
+
|
|
49
|
+
# Custom retry condition
|
|
50
|
+
RetryConfig(max_attempts=3, retry_on=lambda r: r.status == "error" or "timeout" in (r.error or ""))
|
|
51
|
+
|
|
52
|
+
# With callback
|
|
53
|
+
RetryConfig(max_attempts=3, on_item_retry=lambda i, a, e: print(f"Item {i} retry {a}: {e}"))
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
max_attempts: Maximum retry attempts (default: 3)
|
|
57
|
+
backoff_ms: Initial backoff in ms (default: 1000)
|
|
58
|
+
backoff_multiplier: Exponential backoff multiplier (default: 2)
|
|
59
|
+
retry_on: Custom retry condition (default: status == "error")
|
|
60
|
+
on_item_retry: Callback when retry occurs (item_index, attempt, error)
|
|
61
|
+
"""
|
|
62
|
+
max_attempts: int = DEFAULT_MAX_ATTEMPTS
|
|
63
|
+
backoff_ms: int = DEFAULT_BACKOFF_MS
|
|
64
|
+
backoff_multiplier: float = DEFAULT_BACKOFF_MULTIPLIER
|
|
65
|
+
retry_on: Optional[Callable[[Any], bool]] = None
|
|
66
|
+
on_item_retry: Optional[OnItemRetryCallback] = None
|
|
67
|
+
|
|
68
|
+
def should_retry(self, result: Any) -> bool:
|
|
69
|
+
"""Check if result should be retried."""
|
|
70
|
+
if self.retry_on is not None:
|
|
71
|
+
return self.retry_on(result)
|
|
72
|
+
# Default: retry on error status
|
|
73
|
+
return _get_field(result, 'status') == "error"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# =============================================================================
|
|
77
|
+
# RETRY LOGIC
|
|
78
|
+
# =============================================================================
|
|
79
|
+
|
|
80
|
+
async def execute_with_retry(
|
|
81
|
+
fn: Callable[[int], Awaitable[TResult]],
|
|
82
|
+
config: Optional[RetryConfig] = None,
|
|
83
|
+
item_index: int = 0,
|
|
84
|
+
) -> TResult:
|
|
85
|
+
"""Execute a function with retry and exponential backoff.
|
|
86
|
+
|
|
87
|
+
Works with any result type that has a `status` field (SwarmResult, ReduceResult, etc.).
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
fn: Async function that receives attempt number (1-based) and returns a result
|
|
91
|
+
config: Retry configuration (optional, uses defaults if not provided)
|
|
92
|
+
item_index: Item index for callback (default: 0)
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Result from the function
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
result = await execute_with_retry(
|
|
99
|
+
lambda attempt: self._execute_map_item(item, prompt, index, run_id, params, timeout, attempt),
|
|
100
|
+
RetryConfig(max_attempts=3, backoff_ms=1000),
|
|
101
|
+
item_index=index,
|
|
102
|
+
)
|
|
103
|
+
"""
|
|
104
|
+
resolved = config or RetryConfig()
|
|
105
|
+
|
|
106
|
+
last_result: Optional[TResult] = None
|
|
107
|
+
attempts = 0
|
|
108
|
+
backoff = resolved.backoff_ms
|
|
109
|
+
|
|
110
|
+
while attempts < resolved.max_attempts:
|
|
111
|
+
attempts += 1
|
|
112
|
+
last_result = await fn(attempts)
|
|
113
|
+
|
|
114
|
+
# Check if we should retry
|
|
115
|
+
if not resolved.should_retry(last_result):
|
|
116
|
+
return last_result
|
|
117
|
+
|
|
118
|
+
# Don't retry if we've exhausted attempts
|
|
119
|
+
if attempts >= resolved.max_attempts:
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
# Notify of retry via callback in config
|
|
123
|
+
if resolved.on_item_retry is not None:
|
|
124
|
+
error = _get_field(last_result, 'error') or "Unknown error"
|
|
125
|
+
resolved.on_item_retry(item_index, attempts, error)
|
|
126
|
+
|
|
127
|
+
# Wait before retrying (convert ms to seconds)
|
|
128
|
+
await asyncio.sleep(backoff / 1000)
|
|
129
|
+
backoff = backoff * resolved.backoff_multiplier
|
|
130
|
+
|
|
131
|
+
# Return last result
|
|
132
|
+
assert last_result is not None
|
|
133
|
+
return last_result
|