swarmkit 0.1.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1751 @@
1
+ """Swarm - Functional programming abstractions for AI agents.
2
+
3
+ Provides map, filter, reduce, and bestOf operations for parallel AI agent execution.
4
+
5
+ Example:
6
+ ```python
7
+ from swarmkit import Swarm
8
+
9
+ # Minimal usage - uses SWARMKIT_API_KEY and E2B_API_KEY env vars
10
+ swarm = Swarm()
11
+
12
+ # Or with explicit config
13
+ from swarmkit import SwarmConfig, AgentConfig, E2BProvider
14
+ swarm = Swarm(SwarmConfig(
15
+ agent=AgentConfig(type="claude", api_key="..."),
16
+ sandbox=E2BProvider(api_key="..."),
17
+ ))
18
+
19
+ # Map: apply agent to each item
20
+ results = await swarm.map(
21
+ items=[{"doc.txt": "content1"}, {"doc.txt": "content2"}],
22
+ prompt="Analyze this document",
23
+ )
24
+
25
+ # Filter: evaluate and filter items
26
+ critical = await swarm.filter(
27
+ items=results.success,
28
+ prompt="Evaluate severity",
29
+ schema=SeveritySchema,
30
+ condition=lambda x: x.severity == "critical",
31
+ )
32
+
33
+ # Reduce: synthesize many into one
34
+ report = await swarm.reduce(
35
+ items=critical.success,
36
+ prompt="Create summary report",
37
+ )
38
+ ```
39
+ """
40
+
41
+ import asyncio
42
+ import base64
43
+ import json
44
+ import secrets
45
+ import warnings
46
+ from typing import Any, Callable, Dict, List, Optional, Type, Union
47
+
48
+ from ..bridge import BridgeManager
49
+ from ..schema import is_pydantic_model, is_dataclass, to_json_schema, validate_and_parse
50
+ from ..config import AgentConfig
51
+ from ..utils import _encode_files_for_transport, _filter_none
52
+ from ..prompts import JUDGE_PROMPT, JUDGE_USER_PROMPT, VERIFY_PROMPT, VERIFY_USER_PROMPT, REDUCE_PROMPT, RETRY_FEEDBACK_PROMPT, apply_template, build_file_tree
53
+ from ..retry import RetryConfig, execute_with_retry
54
+ from .types import (
55
+ FileMap,
56
+ SwarmConfig,
57
+ BestOfConfig,
58
+ VerifyConfig,
59
+ IndexedMeta,
60
+ ReduceMeta,
61
+ JudgeMeta,
62
+ VerifyMeta,
63
+ Prompt,
64
+ ItemInput,
65
+ SchemaType,
66
+ )
67
+ from .results import (
68
+ SwarmResult,
69
+ SwarmResultList,
70
+ ReduceResult,
71
+ BestOfResult,
72
+ BestOfInfo,
73
+ VerifyInfo,
74
+ is_swarm_result,
75
+ )
76
+
77
+
78
+ # =============================================================================
79
+ # CONSTANTS
80
+ # =============================================================================
81
+
82
+ MAX_CONCURRENCY = 100 # Cap to prevent resource exhaustion
83
+
84
+
85
+ # =============================================================================
86
+ # SWARM CLASS
87
+ # =============================================================================
88
+
89
+ class Swarm:
90
+ """Functional programming abstractions for AI agents.
91
+
92
+ Provides map, filter, reduce, and bestOf operations for parallel AI agent execution.
93
+ Uses a shared bridge process with multiple SwarmKit instances for efficiency.
94
+ """
95
+
96
+ def __init__(self, config: Optional[SwarmConfig] = None):
97
+ """Initialize Swarm with configuration.
98
+
99
+ Args:
100
+ config: SwarmConfig with agent, sandbox, concurrency settings
101
+ (optional - defaults to SWARMKIT_API_KEY and E2B_API_KEY env vars)
102
+
103
+ Raises:
104
+ ValueError: If concurrency exceeds MAX_CONCURRENCY
105
+ """
106
+ config = config or SwarmConfig()
107
+ if config.concurrency > MAX_CONCURRENCY:
108
+ raise ValueError(
109
+ f"concurrency={config.concurrency} exceeds max {MAX_CONCURRENCY}. "
110
+ f"For higher parallelism, scale horizontally with multiple processes."
111
+ )
112
+
113
+ self.config = config
114
+ self.semaphore = asyncio.Semaphore(config.concurrency)
115
+ self.bridge = BridgeManager()
116
+ self._bridge_started = False
117
+
118
+ async def _ensure_bridge(self):
119
+ """Ensure bridge is started."""
120
+ if not self._bridge_started:
121
+ await self.bridge.start()
122
+ self._bridge_started = True
123
+
124
+ # =========================================================================
125
+ # PUBLIC API
126
+ # =========================================================================
127
+
128
+ async def map(
129
+ self,
130
+ items: List[ItemInput],
131
+ prompt: Prompt,
132
+ system_prompt: Optional[str] = None,
133
+ schema: Optional[SchemaType] = None,
134
+ schema_options: Optional[Dict[str, Any]] = None,
135
+ agent: Optional[AgentConfig] = None,
136
+ mcp_servers: Optional[Dict[str, Any]] = None,
137
+ best_of: Optional[BestOfConfig] = None,
138
+ verify: Optional[VerifyConfig] = None,
139
+ retry: Optional[RetryConfig] = None,
140
+ timeout_ms: Optional[int] = None,
141
+ ) -> SwarmResultList:
142
+ """Apply an agent to each item in parallel.
143
+
144
+ Args:
145
+ items: List of items (FileMaps or SwarmResults from previous operation)
146
+ prompt: Task prompt (string or function(files, index) -> string)
147
+ system_prompt: Optional system prompt
148
+ schema: Optional Pydantic model or JSON Schema for structured output
149
+ schema_options: Optional validation options
150
+ agent: Optional agent override
151
+ mcp_servers: Optional MCP servers override (replaces swarm default)
152
+ best_of: Optional bestOf configuration for N candidates + judge per item (mutually exclusive with verify)
153
+ verify: Optional verify configuration for LLM-as-judge quality verification with retry (mutually exclusive with best_of)
154
+ retry: Optional retry configuration for failed items
155
+ timeout_ms: Optional timeout in ms
156
+
157
+ Returns:
158
+ SwarmResultList with results for each item
159
+ """
160
+ await self._ensure_bridge()
161
+ run_id = self._generate_run_id()
162
+ timeout = timeout_ms or self.config.timeout_ms
163
+ retry = retry or self.config.retry
164
+ resolved_mcp_servers = mcp_servers or self.config.mcp_servers
165
+
166
+ # best_of and verify are mutually exclusive
167
+ if best_of and verify:
168
+ raise ValueError("map() cannot use both best_of and verify options simultaneously")
169
+
170
+ async def process_item(item: ItemInput, index: int) -> SwarmResult:
171
+ # bestOf has internal per-candidate and judge retry - don't double-wrap
172
+ if best_of:
173
+ return await self._execute_map_item_with_best_of(
174
+ item, prompt, index, run_id, system_prompt, schema,
175
+ schema_options, agent, resolved_mcp_servers, best_of, retry, timeout
176
+ )
177
+
178
+ # verify has internal retry loop with feedback - don't double-wrap with retry
179
+ if verify:
180
+ return await self._execute_map_item_with_verify(
181
+ item, prompt, index, run_id, system_prompt, schema,
182
+ schema_options, agent, resolved_mcp_servers, verify, timeout, retry
183
+ )
184
+
185
+ # Wrap with retry if configured (simple map only)
186
+ if retry:
187
+ return await execute_with_retry(
188
+ lambda attempt: self._execute_map_item(
189
+ item, prompt, index, run_id, system_prompt, schema,
190
+ schema_options, agent, resolved_mcp_servers, timeout, attempt
191
+ ),
192
+ retry,
193
+ item_index=index,
194
+ )
195
+ return await self._execute_map_item(
196
+ item, prompt, index, run_id, system_prompt, schema,
197
+ schema_options, agent, resolved_mcp_servers, timeout
198
+ )
199
+
200
+ results = await asyncio.gather(*[
201
+ process_item(item, i) for i, item in enumerate(items)
202
+ ])
203
+
204
+ return SwarmResultList.from_results(list(results))
205
+
206
+ async def filter(
207
+ self,
208
+ items: List[ItemInput],
209
+ prompt: str,
210
+ schema: SchemaType,
211
+ condition: Callable[[Any], bool],
212
+ schema_options: Optional[Dict[str, Any]] = None,
213
+ system_prompt: Optional[str] = None,
214
+ agent: Optional[AgentConfig] = None,
215
+ mcp_servers: Optional[Dict[str, Any]] = None,
216
+ verify: Optional[VerifyConfig] = None,
217
+ retry: Optional[RetryConfig] = None,
218
+ timeout_ms: Optional[int] = None,
219
+ ) -> SwarmResultList:
220
+ """Two-step evaluation: agent assesses each item, then local condition applies threshold.
221
+
222
+ 1. Agent sees context files, evaluates per prompt, outputs result.json matching schema
223
+ 2. Condition function receives parsed data, returns true (success) or false (filtered)
224
+
225
+ Returns ALL items with status:
226
+ - "success": passed condition
227
+ - "filtered": evaluated but didn't pass condition
228
+ - "error": agent error
229
+
230
+ Use `.success` for passing items, `.filtered` for non-passing.
231
+
232
+ Args:
233
+ items: List of items to filter
234
+ prompt: Evaluation prompt
235
+ schema: Pydantic model or JSON Schema (required for filter)
236
+ condition: Function(data) -> bool to determine pass/fail
237
+ schema_options: Optional validation options
238
+ system_prompt: Optional system prompt
239
+ agent: Optional agent override
240
+ mcp_servers: Optional MCP servers override (replaces swarm default)
241
+ verify: Optional verify configuration for LLM-as-judge quality verification with retry
242
+ retry: Optional retry configuration for failed items
243
+ timeout_ms: Optional timeout in ms
244
+
245
+ Returns:
246
+ SwarmResultList with all items (success, filtered, or error status)
247
+ """
248
+ await self._ensure_bridge()
249
+ run_id = self._generate_run_id()
250
+ timeout = timeout_ms or self.config.timeout_ms
251
+ retry = retry or self.config.retry
252
+ resolved_mcp_servers = mcp_servers or self.config.mcp_servers
253
+
254
+ async def process_item(item: ItemInput, index: int) -> SwarmResult:
255
+ # verify has internal retry loop with feedback - don't double-wrap with retry
256
+ if verify:
257
+ return await self._execute_filter_item_with_verify(
258
+ item, prompt, index, run_id, system_prompt, schema,
259
+ schema_options, agent, resolved_mcp_servers, verify, timeout, retry
260
+ )
261
+
262
+ # Wrap with retry if configured
263
+ if retry:
264
+ return await execute_with_retry(
265
+ lambda attempt: self._execute_filter_item(
266
+ item, prompt, index, run_id, system_prompt, schema,
267
+ schema_options, agent, resolved_mcp_servers, timeout, attempt
268
+ ),
269
+ retry,
270
+ item_index=index,
271
+ )
272
+ return await self._execute_filter_item(
273
+ item, prompt, index, run_id, system_prompt, schema,
274
+ schema_options, agent, resolved_mcp_servers, timeout
275
+ )
276
+
277
+ evaluated = await asyncio.gather(*[
278
+ process_item(item, i) for i, item in enumerate(items)
279
+ ])
280
+
281
+ # Apply condition and set status accordingly
282
+ results: List[SwarmResult] = []
283
+ for r in evaluated:
284
+ if r.status == "error":
285
+ results.append(r)
286
+ elif r.data is not None:
287
+ try:
288
+ if condition(r.data):
289
+ results.append(r) # success
290
+ else:
291
+ # Didn't pass condition → filtered
292
+ results.append(SwarmResult(
293
+ status="filtered",
294
+ data=r.data,
295
+ files=r.files,
296
+ meta=r.meta,
297
+ verify=r.verify,
298
+ ))
299
+ except Exception as e:
300
+ # Condition threw → error (preserve raw_data if present)
301
+ results.append(SwarmResult(
302
+ status="error",
303
+ data=None,
304
+ files=r.files,
305
+ meta=r.meta,
306
+ error=f"Condition function threw: {e}",
307
+ raw_data=getattr(r, 'raw_data', None),
308
+ ))
309
+ else:
310
+ results.append(r)
311
+
312
+ return SwarmResultList.from_results(results)
313
+
314
+ async def reduce(
315
+ self,
316
+ items: List[ItemInput],
317
+ prompt: str,
318
+ system_prompt: Optional[str] = None,
319
+ schema: Optional[SchemaType] = None,
320
+ schema_options: Optional[Dict[str, Any]] = None,
321
+ agent: Optional[AgentConfig] = None,
322
+ mcp_servers: Optional[Dict[str, Any]] = None,
323
+ verify: Optional[VerifyConfig] = None,
324
+ retry: Optional[RetryConfig] = None,
325
+ timeout_ms: Optional[int] = None,
326
+ ) -> ReduceResult:
327
+ """Synthesize many items into one.
328
+
329
+ Args:
330
+ items: List of items to reduce
331
+ prompt: Synthesis prompt
332
+ system_prompt: Optional system prompt
333
+ schema: Optional Pydantic model or JSON Schema
334
+ schema_options: Optional validation options
335
+ agent: Optional agent override
336
+ mcp_servers: Optional MCP servers override (replaces swarm default)
337
+ verify: Optional verify configuration for LLM-as-judge quality verification with retry
338
+ retry: Optional retry configuration
339
+ timeout_ms: Optional timeout in ms
340
+
341
+ Returns:
342
+ ReduceResult with synthesized output
343
+ """
344
+ await self._ensure_bridge()
345
+ run_id = self._generate_run_id()
346
+ timeout = timeout_ms or self.config.timeout_ms
347
+ retry = retry or self.config.retry
348
+ resolved_mcp_servers = mcp_servers or self.config.mcp_servers
349
+
350
+ # Collect files and track original indices
351
+ all_files: List[FileMap] = []
352
+ indices: List[int] = []
353
+
354
+ for i, item in enumerate(items):
355
+ all_files.append(self._get_files(item))
356
+ indices.append(self._get_index(item, i))
357
+
358
+ # Build context: item_0/, item_1/, etc.
359
+ context: FileMap = {}
360
+ for i, files in enumerate(all_files):
361
+ for name, content in files.items():
362
+ context[f"item_{indices[i]}/{name}"] = content
363
+
364
+ # Build reduce system prompt (context structure + user's system_prompt)
365
+ file_tree = build_file_tree(context)
366
+ reduce_context_prompt = apply_template(REDUCE_PROMPT, {"fileTree": file_tree})
367
+ final_system_prompt = (
368
+ f"{reduce_context_prompt}\n\n{system_prompt}"
369
+ if system_prompt
370
+ else reduce_context_prompt
371
+ )
372
+
373
+ # Build meta (sandboxId/tag updated after execution)
374
+ def build_meta(result: Dict[str, Any]) -> ReduceMeta:
375
+ return ReduceMeta(
376
+ run_id=run_id,
377
+ operation="reduce",
378
+ tag=result["tag"],
379
+ sandbox_id=result["sandbox_id"],
380
+ input_count=len(items),
381
+ input_indices=indices,
382
+ )
383
+
384
+ # Shared execution logic
385
+ async def execute_once(prompt_to_use: str, tag_prefix: str) -> ReduceResult:
386
+ async with self.semaphore:
387
+ result = await self._execute(
388
+ context=context,
389
+ prompt=prompt_to_use,
390
+ system_prompt=final_system_prompt,
391
+ schema=schema,
392
+ schema_options=schema_options,
393
+ agent=agent,
394
+ mcp_servers=resolved_mcp_servers,
395
+ tag_prefix=tag_prefix,
396
+ timeout=timeout,
397
+ )
398
+
399
+ meta = build_meta(result)
400
+
401
+ if result.get("error"):
402
+ return ReduceResult(
403
+ status="error",
404
+ data=None,
405
+ files=result["files"],
406
+ meta=meta,
407
+ error=result["error"],
408
+ raw_data=result.get("raw_data"),
409
+ )
410
+
411
+ return ReduceResult(
412
+ status="success",
413
+ data=result["data"],
414
+ files=result["files"],
415
+ meta=meta,
416
+ )
417
+
418
+ base_tag = f"{self.config.tag}-reduce"
419
+
420
+ # verify has internal retry loop with feedback - don't double-wrap with retry
421
+ if verify:
422
+ return await self._run_with_verification(
423
+ worker_fn=lambda current_prompt, tag_prefix: execute_once(current_prompt, tag_prefix),
424
+ original_prompt=prompt,
425
+ input_files=context,
426
+ verify_config=verify,
427
+ mcp_servers=resolved_mcp_servers,
428
+ timeout=timeout,
429
+ system_prompt=final_system_prompt,
430
+ schema=schema,
431
+ run_id=run_id,
432
+ base_tag=base_tag,
433
+ retry=retry,
434
+ )
435
+
436
+ # Wrap with retry if configured
437
+ if retry:
438
+ async def execute_fn(attempt: int = 1) -> ReduceResult:
439
+ tag_prefix = f"{base_tag}-er{attempt - 1}" if attempt > 1 else base_tag
440
+ return await execute_once(prompt, tag_prefix)
441
+ return await execute_with_retry(execute_fn, retry)
442
+
443
+ return await execute_once(prompt, base_tag)
444
+
445
+ async def best_of(
446
+ self,
447
+ item: ItemInput,
448
+ prompt: str,
449
+ config: BestOfConfig,
450
+ system_prompt: Optional[str] = None,
451
+ schema: Optional[SchemaType] = None,
452
+ schema_options: Optional[Dict[str, Any]] = None,
453
+ retry: Optional[RetryConfig] = None,
454
+ timeout_ms: Optional[int] = None,
455
+ ) -> BestOfResult:
456
+ """Run N candidates on the same task, judge picks the best.
457
+
458
+ Args:
459
+ item: Single item to process
460
+ prompt: Task prompt
461
+ config: BestOf configuration (n, judge_criteria, mcp_servers, etc.)
462
+ system_prompt: Optional system prompt
463
+ schema: Optional Pydantic model or JSON Schema
464
+ schema_options: Optional validation options
465
+ retry: Optional retry configuration for candidates and judge
466
+ timeout_ms: Optional timeout in ms
467
+
468
+ Returns:
469
+ BestOfResult with winner, candidates, and judge info
470
+ """
471
+ await self._ensure_bridge()
472
+ retry = retry or self.config.retry
473
+
474
+ # Resolve n
475
+ n = config.n or (len(config.task_agents) if config.task_agents else None)
476
+ if n is None:
477
+ raise ValueError("bestOf requires n or task_agents")
478
+ if n < 2:
479
+ raise ValueError("bestOf requires n >= 2")
480
+
481
+ run_id = self._generate_run_id()
482
+ timeout = timeout_ms or self.config.timeout_ms
483
+ input_files = self._get_files(item)
484
+
485
+ # Resolve MCP servers for candidates and judge
486
+ candidate_mcp_servers = config.mcp_servers or self.config.mcp_servers
487
+ judge_mcp_servers = config.judge_mcp_servers or config.mcp_servers or self.config.mcp_servers
488
+
489
+ # Run candidates (semaphore acquired inside _execute_best_of_candidate)
490
+ async def run_candidate(candidate_index: int) -> SwarmResult:
491
+ if retry:
492
+ result = await execute_with_retry(
493
+ lambda attempt: self._execute_best_of_candidate(
494
+ input_files=input_files,
495
+ prompt=prompt,
496
+ candidate_index=candidate_index,
497
+ run_id=run_id,
498
+ config=config,
499
+ mcp_servers=candidate_mcp_servers,
500
+ system_prompt=system_prompt,
501
+ schema=schema,
502
+ schema_options=schema_options,
503
+ timeout=timeout,
504
+ attempt=attempt,
505
+ ),
506
+ retry,
507
+ item_index=0, # standalone bestOf uses item_index=0
508
+ )
509
+ else:
510
+ result = await self._execute_best_of_candidate(
511
+ input_files=input_files,
512
+ prompt=prompt,
513
+ candidate_index=candidate_index,
514
+ run_id=run_id,
515
+ config=config,
516
+ mcp_servers=candidate_mcp_servers,
517
+ system_prompt=system_prompt,
518
+ schema=schema,
519
+ schema_options=schema_options,
520
+ timeout=timeout,
521
+ )
522
+ # Call callback after candidate completes
523
+ if config.on_candidate_complete:
524
+ config.on_candidate_complete(0, candidate_index, result.status if result.status != "filtered" else "success")
525
+ return result
526
+
527
+ candidates = await asyncio.gather(*[
528
+ run_candidate(i) for i in range(n)
529
+ ])
530
+ candidates = list(candidates)
531
+
532
+ # Run judge (semaphore acquired inside _execute_best_of_judge)
533
+ # Judge uses default retry (status === "error"), not custom retry_on
534
+ if retry:
535
+ # Create a copy of retry config without custom retry_on for judge
536
+ judge_retry = RetryConfig(
537
+ max_attempts=retry.max_attempts,
538
+ backoff_ms=retry.backoff_ms,
539
+ backoff_multiplier=retry.backoff_multiplier,
540
+ retry_on=None, # Use default (status == "error")
541
+ )
542
+ judge = await execute_with_retry(
543
+ lambda attempt: self._execute_best_of_judge(
544
+ input_files=input_files,
545
+ task_prompt=prompt,
546
+ candidates=candidates,
547
+ config=config,
548
+ mcp_servers=judge_mcp_servers,
549
+ timeout=timeout,
550
+ system_prompt=system_prompt,
551
+ schema=schema,
552
+ attempt=attempt,
553
+ ),
554
+ judge_retry
555
+ )
556
+ else:
557
+ judge = await self._execute_best_of_judge(
558
+ input_files=input_files,
559
+ task_prompt=prompt,
560
+ candidates=candidates,
561
+ config=config,
562
+ mcp_servers=judge_mcp_servers,
563
+ timeout=timeout,
564
+ system_prompt=system_prompt,
565
+ schema=schema,
566
+ )
567
+
568
+ first_success = next((i for i, c in enumerate(candidates) if c.status == "success"), -1)
569
+ winner_index = judge["winner"] if judge["winner"] is not None else (first_success if first_success >= 0 else 0)
570
+
571
+ # Call judge callback
572
+ if config.on_judge_complete:
573
+ config.on_judge_complete(0, winner_index, judge.get("reasoning", ""))
574
+
575
+ judge_meta = JudgeMeta(
576
+ run_id=run_id,
577
+ operation="bestof-judge",
578
+ tag=judge["tag"],
579
+ sandbox_id=judge["sandbox_id"],
580
+ candidate_count=n,
581
+ )
582
+
583
+ return BestOfResult(
584
+ winner=candidates[winner_index] if winner_index < len(candidates) else candidates[0],
585
+ winner_index=winner_index,
586
+ judge_reasoning=judge.get("reasoning", "Judge failed to provide reasoning"),
587
+ judge_meta=judge_meta,
588
+ candidates=candidates,
589
+ )
590
+
591
+ async def close(self):
592
+ """Close the bridge connection."""
593
+ if self._bridge_started:
594
+ await self.bridge.stop()
595
+ self._bridge_started = False
596
+
597
+ async def __aenter__(self):
598
+ """Async context manager entry - ensures bridge is started."""
599
+ await self._ensure_bridge()
600
+ return self
601
+
602
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
603
+ """Async context manager exit - closes the bridge."""
604
+ await self.close()
605
+ return False
606
+
607
+ # =========================================================================
608
+ # PRIVATE: EXECUTION
609
+ # =========================================================================
610
+
611
+ async def _execute(
612
+ self,
613
+ context: FileMap,
614
+ prompt: str,
615
+ system_prompt: Optional[str],
616
+ schema: Optional[SchemaType],
617
+ schema_options: Optional[Dict[str, Any]],
618
+ agent: Optional[AgentConfig],
619
+ mcp_servers: Optional[Dict[str, Any]],
620
+ tag_prefix: str,
621
+ timeout: int,
622
+ ) -> Dict[str, Any]:
623
+ """Execute a single agent task."""
624
+ instance_id = f"{tag_prefix}-{secrets.token_hex(4)}"
625
+
626
+ # Build agent config (merges override with base config)
627
+ agent_config = self._build_agent_config(agent)
628
+
629
+ # Convert schema to JSON Schema
630
+ json_schema = to_json_schema(schema)
631
+
632
+ # Build init params with _filter_none to exclude None values
633
+ # TS SDK resolves defaults from env vars when not provided
634
+ init_params = _filter_none({
635
+ # Agent config (optional - TS SDK resolves from SWARMKIT_API_KEY)
636
+ 'agent_type': agent_config.type if agent_config else None,
637
+ 'api_key': agent_config.api_key if agent_config else None,
638
+ 'model': agent_config.model if agent_config else None,
639
+ 'reasoning_effort': agent_config.reasoning_effort if agent_config else None,
640
+ 'betas': agent_config.betas if agent_config else None,
641
+ # Sandbox (optional - TS SDK resolves from E2B_API_KEY)
642
+ 'sandbox_provider': {'type': self.config.sandbox.type, 'config': self.config.sandbox.config} if self.config.sandbox else None,
643
+ # Other settings
644
+ 'workspace_mode': self.config.workspace_mode,
645
+ 'session_tag_prefix': tag_prefix,
646
+ 'system_prompt': system_prompt,
647
+ 'schema': json_schema,
648
+ 'schema_options': schema_options,
649
+ 'context': _encode_files_for_transport(context) if context else None,
650
+ 'mcp_servers': mcp_servers,
651
+ })
652
+
653
+ files: FileMap = {}
654
+ data: Any = None
655
+ error: Optional[str] = None
656
+ raw_data: Optional[str] = None
657
+ sandbox_id = ""
658
+ tag = tag_prefix
659
+
660
+ try:
661
+ # Create instance
662
+ await self.bridge.create_instance(instance_id, init_params)
663
+
664
+ # Run prompt
665
+ run_result = await self.bridge.run_on_instance(
666
+ instance_id,
667
+ prompt,
668
+ timeout_ms=timeout,
669
+ call_timeout_s=(timeout / 1000) + 60, # Add buffer for RPC overhead
670
+ )
671
+ sandbox_id = run_result.get('sandbox_id', '')
672
+
673
+ # Get output
674
+ output = await self.bridge.get_output_on_instance(instance_id, recursive=True)
675
+ files = self._decode_files(output.get('files', {}))
676
+
677
+ if run_result.get('exit_code', 0) != 0:
678
+ error = f"Agent exited with code {run_result.get('exit_code')}"
679
+ elif json_schema:
680
+ # Validate result.json against schema
681
+ raw_json = files.get('result.json')
682
+ if raw_json is not None:
683
+ if isinstance(raw_json, bytes):
684
+ raw_json = raw_json.decode('utf-8')
685
+
686
+ if is_pydantic_model(schema) or is_dataclass(schema):
687
+ # Pydantic model or dataclass - validate and return instance
688
+ try:
689
+ strict = schema_options.get('mode') == 'strict' if schema_options else False
690
+ data = validate_and_parse(raw_json, schema, strict=strict)
691
+ except Exception as e:
692
+ error = f"Schema validation failed: {e}"
693
+ raw_data = raw_json
694
+ else:
695
+ # JSON Schema dict - use TS validation result
696
+ data = output.get('data')
697
+ if output.get('error'):
698
+ error = output['error']
699
+ if output.get('raw_data'):
700
+ raw_data = output['raw_data']
701
+ else:
702
+ error = "Schema provided but agent did not create output/result.json"
703
+ else:
704
+ data = files
705
+
706
+ except Exception as e:
707
+ error = str(e)
708
+ # Try to capture partial output even on failure (e.g., timeout)
709
+ try:
710
+ output = await self.bridge.get_output_on_instance(instance_id, recursive=True)
711
+ files = self._decode_files(output.get('files', {}))
712
+ except Exception:
713
+ pass # Sandbox may already be gone
714
+ finally:
715
+ # Always cleanup
716
+ try:
717
+ await self.bridge.kill_instance(instance_id)
718
+ except Exception:
719
+ pass
720
+
721
+ return {
722
+ "files": files,
723
+ "data": data,
724
+ "tag": tag,
725
+ "sandbox_id": sandbox_id,
726
+ "error": error,
727
+ "raw_data": raw_data,
728
+ }
729
+
730
+ # =========================================================================
731
+ # PRIVATE: MAP
732
+ # =========================================================================
733
+
734
+ async def _execute_map_item(
735
+ self,
736
+ item: ItemInput,
737
+ prompt: Prompt,
738
+ index: int,
739
+ run_id: str,
740
+ system_prompt: Optional[str],
741
+ schema: Optional[SchemaType],
742
+ schema_options: Optional[Dict[str, Any]],
743
+ agent: Optional[AgentConfig],
744
+ mcp_servers: Optional[Dict[str, Any]],
745
+ timeout: int,
746
+ attempt: int = 1,
747
+ ) -> SwarmResult:
748
+ """Execute a single map item."""
749
+ files = self._get_files(item)
750
+ tag_prefix = (
751
+ f"{self.config.tag}-map-{index}-er{attempt - 1}"
752
+ if attempt > 1
753
+ else f"{self.config.tag}-map-{index}"
754
+ )
755
+
756
+ try:
757
+ prompt_str = self._resolve_prompt(prompt, files, index)
758
+ except Exception as e:
759
+ return self._build_error_result(
760
+ f"Prompt function threw: {e}",
761
+ IndexedMeta(run_id=run_id, operation="map", tag=tag_prefix, sandbox_id="", index=index)
762
+ )
763
+
764
+ async with self.semaphore:
765
+ result = await self._execute(
766
+ context=files,
767
+ prompt=prompt_str,
768
+ system_prompt=system_prompt,
769
+ schema=schema,
770
+ schema_options=schema_options,
771
+ agent=agent,
772
+ mcp_servers=mcp_servers,
773
+ tag_prefix=tag_prefix,
774
+ timeout=timeout,
775
+ )
776
+
777
+ meta = IndexedMeta(
778
+ run_id=run_id,
779
+ operation="map",
780
+ tag=result["tag"],
781
+ sandbox_id=result["sandbox_id"],
782
+ index=index,
783
+ )
784
+
785
+ return self._build_result(result, meta)
786
+
787
+ async def _execute_map_item_with_best_of(
788
+ self,
789
+ item: ItemInput,
790
+ prompt: Prompt,
791
+ index: int,
792
+ run_id: str,
793
+ system_prompt: Optional[str],
794
+ schema: Optional[SchemaType],
795
+ schema_options: Optional[Dict[str, Any]],
796
+ agent: Optional[AgentConfig],
797
+ mcp_servers: Optional[Dict[str, Any]],
798
+ best_of_config: BestOfConfig,
799
+ retry: Optional[RetryConfig],
800
+ timeout: int,
801
+ ) -> SwarmResult:
802
+ """Execute a single map item with bestOf."""
803
+ files = self._get_files(item)
804
+ tag_prefix = f"{self.config.tag}-map-{index}"
805
+
806
+ try:
807
+ prompt_str = self._resolve_prompt(prompt, files, index)
808
+ except Exception as e:
809
+ return self._build_error_result(
810
+ f"Prompt function threw: {e}",
811
+ IndexedMeta(run_id=run_id, operation="map", tag=tag_prefix, sandbox_id="", index=index)
812
+ )
813
+
814
+ n = best_of_config.n or (len(best_of_config.task_agents) if best_of_config.task_agents else None)
815
+ if n is None or n < 2:
816
+ return self._build_error_result(
817
+ "bestOf requires n >= 2 or task_agents with at least 2 elements",
818
+ IndexedMeta(run_id=run_id, operation="map", tag=tag_prefix, sandbox_id="", index=index)
819
+ )
820
+
821
+ # Resolve MCP servers: bestOf.mcp_servers overrides operation-level
822
+ candidate_mcp_servers = best_of_config.mcp_servers or mcp_servers
823
+ judge_mcp_servers = best_of_config.judge_mcp_servers or best_of_config.mcp_servers or mcp_servers
824
+
825
+ # Run candidates in parallel (semaphore acquired inside _execute_best_of_candidate)
826
+ async def run_candidate(candidate_index: int) -> SwarmResult:
827
+ if retry:
828
+ result = await execute_with_retry(
829
+ lambda attempt: self._execute_best_of_candidate(
830
+ input_files=files,
831
+ prompt=prompt_str,
832
+ candidate_index=candidate_index,
833
+ run_id=run_id,
834
+ config=best_of_config,
835
+ mcp_servers=candidate_mcp_servers,
836
+ system_prompt=system_prompt,
837
+ schema=schema,
838
+ schema_options=schema_options,
839
+ timeout=timeout,
840
+ parent_index=index,
841
+ attempt=attempt,
842
+ ),
843
+ retry,
844
+ item_index=index, # map item index
845
+ )
846
+ else:
847
+ result = await self._execute_best_of_candidate(
848
+ input_files=files,
849
+ prompt=prompt_str,
850
+ candidate_index=candidate_index,
851
+ run_id=run_id,
852
+ config=best_of_config,
853
+ mcp_servers=candidate_mcp_servers,
854
+ system_prompt=system_prompt,
855
+ schema=schema,
856
+ schema_options=schema_options,
857
+ timeout=timeout,
858
+ parent_index=index,
859
+ )
860
+ # Call callback after candidate completes
861
+ if best_of_config.on_candidate_complete:
862
+ best_of_config.on_candidate_complete(index, candidate_index, result.status if result.status != "filtered" else "success")
863
+ return result
864
+
865
+ candidates = list(await asyncio.gather(*[
866
+ run_candidate(i) for i in range(n)
867
+ ]))
868
+
869
+ # Run judge (semaphore acquired inside _execute_best_of_judge)
870
+ # Judge uses default retry (status === "error"), not custom retry_on
871
+ if retry:
872
+ judge_retry = RetryConfig(
873
+ max_attempts=retry.max_attempts,
874
+ backoff_ms=retry.backoff_ms,
875
+ backoff_multiplier=retry.backoff_multiplier,
876
+ retry_on=None,
877
+ )
878
+ judge = await execute_with_retry(
879
+ lambda attempt: self._execute_best_of_judge(
880
+ input_files=files,
881
+ task_prompt=prompt_str,
882
+ candidates=candidates,
883
+ config=best_of_config,
884
+ mcp_servers=judge_mcp_servers,
885
+ timeout=timeout,
886
+ system_prompt=system_prompt,
887
+ schema=schema,
888
+ parent_index=index,
889
+ attempt=attempt,
890
+ ),
891
+ judge_retry
892
+ )
893
+ else:
894
+ judge = await self._execute_best_of_judge(
895
+ input_files=files,
896
+ task_prompt=prompt_str,
897
+ candidates=candidates,
898
+ config=best_of_config,
899
+ mcp_servers=judge_mcp_servers,
900
+ timeout=timeout,
901
+ system_prompt=system_prompt,
902
+ schema=schema,
903
+ parent_index=index,
904
+ )
905
+
906
+ first_success = next((i for i, c in enumerate(candidates) if c.status == "success"), -1)
907
+ winner_index = judge["winner"] if judge["winner"] is not None else (first_success if first_success >= 0 else 0)
908
+ winner = candidates[winner_index] if winner_index < len(candidates) else candidates[0]
909
+
910
+ # Call judge callback with map item index
911
+ if best_of_config.on_judge_complete:
912
+ best_of_config.on_judge_complete(index, winner_index, judge.get("reasoning", ""))
913
+
914
+ judge_meta = JudgeMeta(
915
+ run_id=run_id,
916
+ operation="bestof-judge",
917
+ tag=judge["tag"],
918
+ sandbox_id=judge["sandbox_id"],
919
+ candidate_count=n,
920
+ )
921
+
922
+ # Return winner with bestOf info
923
+ return SwarmResult(
924
+ status=winner.status,
925
+ data=winner.data,
926
+ files=winner.files,
927
+ meta=IndexedMeta(
928
+ run_id=run_id,
929
+ operation="map",
930
+ tag=winner.meta.tag,
931
+ sandbox_id=winner.meta.sandbox_id,
932
+ index=index,
933
+ ),
934
+ error=winner.error,
935
+ raw_data=winner.raw_data,
936
+ best_of=BestOfInfo(
937
+ winner_index=winner_index,
938
+ judge_reasoning=judge.get("reasoning", "Judge failed to provide reasoning"),
939
+ judge_meta=judge_meta,
940
+ candidates=candidates,
941
+ ),
942
+ )
943
+
944
+ async def _execute_map_item_with_verify(
945
+ self,
946
+ item: ItemInput,
947
+ prompt: Prompt,
948
+ index: int,
949
+ run_id: str,
950
+ system_prompt: Optional[str],
951
+ schema: Optional[SchemaType],
952
+ schema_options: Optional[Dict[str, Any]],
953
+ agent: Optional[AgentConfig],
954
+ mcp_servers: Optional[Dict[str, Any]],
955
+ verify_config: VerifyConfig,
956
+ timeout: int,
957
+ retry: Optional[RetryConfig] = None,
958
+ ) -> SwarmResult:
959
+ """Execute a single map item with verification."""
960
+ files = self._get_files(item)
961
+ base_tag = f"{self.config.tag}-map-{index}"
962
+
963
+ try:
964
+ prompt_str = self._resolve_prompt(prompt, files, index)
965
+ except Exception as e:
966
+ return self._build_error_result(
967
+ f"Prompt function threw: {e}",
968
+ IndexedMeta(run_id=run_id, operation="map", tag=base_tag, sandbox_id="", index=index)
969
+ )
970
+
971
+ # Worker function that executes map item (tag_prefix managed by _run_with_verification)
972
+ async def worker_fn(current_prompt: str, tag_prefix: str) -> SwarmResult:
973
+ async with self.semaphore:
974
+ result = await self._execute(
975
+ context=files,
976
+ prompt=current_prompt,
977
+ system_prompt=system_prompt,
978
+ schema=schema,
979
+ schema_options=schema_options,
980
+ agent=agent,
981
+ mcp_servers=mcp_servers,
982
+ tag_prefix=tag_prefix,
983
+ timeout=timeout,
984
+ )
985
+
986
+ meta = IndexedMeta(
987
+ run_id=run_id,
988
+ operation="map",
989
+ tag=result["tag"],
990
+ sandbox_id=result["sandbox_id"],
991
+ index=index,
992
+ )
993
+
994
+ return self._build_result(result, meta)
995
+
996
+ # Run with verification loop
997
+ return await self._run_with_verification(
998
+ worker_fn=worker_fn,
999
+ original_prompt=prompt_str,
1000
+ input_files=files,
1001
+ verify_config=verify_config,
1002
+ mcp_servers=mcp_servers,
1003
+ timeout=timeout,
1004
+ system_prompt=system_prompt,
1005
+ schema=schema,
1006
+ run_id=run_id,
1007
+ base_tag=base_tag,
1008
+ retry=retry,
1009
+ item_index=index,
1010
+ )
1011
+
1012
+ # =========================================================================
1013
+ # PRIVATE: FILTER
1014
+ # =========================================================================
1015
+
1016
+ async def _execute_filter_item(
1017
+ self,
1018
+ item: ItemInput,
1019
+ prompt: str,
1020
+ index: int,
1021
+ run_id: str,
1022
+ system_prompt: Optional[str],
1023
+ schema: SchemaType,
1024
+ schema_options: Optional[Dict[str, Any]],
1025
+ agent: Optional[AgentConfig],
1026
+ mcp_servers: Optional[Dict[str, Any]],
1027
+ timeout: int,
1028
+ attempt: int = 1,
1029
+ ) -> SwarmResult:
1030
+ """Execute a single filter item."""
1031
+ original_files = self._get_files(item)
1032
+ tag_prefix = (
1033
+ f"{self.config.tag}-filter-{index}-er{attempt - 1}"
1034
+ if attempt > 1
1035
+ else f"{self.config.tag}-filter-{index}"
1036
+ )
1037
+
1038
+ async with self.semaphore:
1039
+ result = await self._execute(
1040
+ context=original_files,
1041
+ prompt=prompt,
1042
+ system_prompt=system_prompt,
1043
+ schema=schema,
1044
+ schema_options=schema_options,
1045
+ agent=agent,
1046
+ mcp_servers=mcp_servers,
1047
+ tag_prefix=tag_prefix,
1048
+ timeout=timeout,
1049
+ )
1050
+
1051
+ meta = IndexedMeta(
1052
+ run_id=run_id,
1053
+ operation="filter",
1054
+ tag=result["tag"],
1055
+ sandbox_id=result["sandbox_id"],
1056
+ index=index,
1057
+ )
1058
+
1059
+ # Filter passes through ORIGINAL files, not output
1060
+ return self._build_result(result, meta, files_override=original_files)
1061
+
1062
+ async def _execute_filter_item_with_verify(
1063
+ self,
1064
+ item: ItemInput,
1065
+ prompt: str,
1066
+ index: int,
1067
+ run_id: str,
1068
+ system_prompt: Optional[str],
1069
+ schema: SchemaType,
1070
+ schema_options: Optional[Dict[str, Any]],
1071
+ agent: Optional[AgentConfig],
1072
+ mcp_servers: Optional[Dict[str, Any]],
1073
+ verify_config: VerifyConfig,
1074
+ timeout: int,
1075
+ retry: Optional[RetryConfig] = None,
1076
+ ) -> SwarmResult:
1077
+ """Execute a single filter item with verification."""
1078
+ original_files = self._get_files(item)
1079
+ base_tag = f"{self.config.tag}-filter-{index}"
1080
+
1081
+ # Worker function that executes filter item (tag_prefix managed by _run_with_verification)
1082
+ async def worker_fn(current_prompt: str, tag_prefix: str) -> SwarmResult:
1083
+ async with self.semaphore:
1084
+ result = await self._execute(
1085
+ context=original_files,
1086
+ prompt=current_prompt,
1087
+ system_prompt=system_prompt,
1088
+ schema=schema,
1089
+ schema_options=schema_options,
1090
+ agent=agent,
1091
+ mcp_servers=mcp_servers,
1092
+ tag_prefix=tag_prefix,
1093
+ timeout=timeout,
1094
+ )
1095
+
1096
+ meta = IndexedMeta(
1097
+ run_id=run_id,
1098
+ operation="filter",
1099
+ tag=result["tag"],
1100
+ sandbox_id=result["sandbox_id"],
1101
+ index=index,
1102
+ )
1103
+
1104
+ # Filter passes through ORIGINAL files, not output
1105
+ return self._build_result(result, meta, files_override=original_files)
1106
+
1107
+ # Run with verification loop
1108
+ return await self._run_with_verification(
1109
+ worker_fn=worker_fn,
1110
+ original_prompt=prompt,
1111
+ input_files=original_files,
1112
+ verify_config=verify_config,
1113
+ mcp_servers=mcp_servers,
1114
+ timeout=timeout,
1115
+ system_prompt=system_prompt,
1116
+ schema=schema,
1117
+ run_id=run_id,
1118
+ base_tag=base_tag,
1119
+ retry=retry,
1120
+ item_index=index,
1121
+ )
1122
+
1123
+ # =========================================================================
1124
+ # PRIVATE: VERIFY
1125
+ # =========================================================================
1126
+
1127
+ async def _run_with_verification(
1128
+ self,
1129
+ worker_fn: Callable[[str, str], Any], # async function(prompt, tag_prefix) -> result with status and files
1130
+ original_prompt: str,
1131
+ input_files: FileMap,
1132
+ verify_config: VerifyConfig,
1133
+ mcp_servers: Optional[Dict[str, Any]],
1134
+ timeout: int,
1135
+ system_prompt: Optional[str],
1136
+ schema: Optional[SchemaType],
1137
+ run_id: str,
1138
+ base_tag: str,
1139
+ retry: Optional[RetryConfig] = None,
1140
+ item_index: int = 0,
1141
+ ) -> Any:
1142
+ """Shared verification loop for map, filter, and reduce.
1143
+
1144
+ Runs worker function, verifies output, retries with feedback if needed.
1145
+
1146
+ Args:
1147
+ worker_fn: Async function that executes the worker with a given prompt and tag prefix
1148
+ original_prompt: The original user prompt
1149
+ input_files: Input files for the worker
1150
+ verify_config: Verification configuration
1151
+ mcp_servers: MCP servers for verifier (resolved from operation or swarm)
1152
+ timeout: Timeout in ms
1153
+ system_prompt: Optional system prompt
1154
+ schema: Optional schema
1155
+ run_id: Run ID for metadata
1156
+ base_tag: Base tag for worker/verifier
1157
+ retry: Optional retry config for verifier error retry
1158
+ item_index: Item index for callbacks (default: 0 for reduce)
1159
+
1160
+ Returns:
1161
+ Result with verify info attached
1162
+ """
1163
+ # Resolve verifier MCP servers
1164
+ verifier_mcp_servers = verify_config.verifier_mcp_servers or mcp_servers
1165
+ max_attempts = verify_config.max_attempts
1166
+
1167
+ current_prompt = original_prompt
1168
+ last_result = None
1169
+ verify_attempts = 0
1170
+
1171
+ while verify_attempts < max_attempts:
1172
+ verify_attempts += 1
1173
+
1174
+ # Build worker tag: base_tag, base_tag-vr1, base_tag-vr2, etc. (vr = verify retry)
1175
+ worker_tag = f"{base_tag}-vr{verify_attempts - 1}" if verify_attempts > 1 else base_tag
1176
+
1177
+ # Run worker (with error retry if configured)
1178
+ # Worker keeps retry_on (user-specified condition) and gets -er{n} tag suffix for error retries
1179
+ if retry:
1180
+ async def worker_with_retry(retry_attempt: int = 1):
1181
+ tag = f"{worker_tag}-er{retry_attempt - 1}" if retry_attempt > 1 else worker_tag
1182
+ return await worker_fn(current_prompt, tag)
1183
+ worker_result = await execute_with_retry(worker_with_retry, retry)
1184
+ else:
1185
+ worker_result = await worker_fn(current_prompt, worker_tag)
1186
+
1187
+ # If worker failed even after retries, return immediately
1188
+ if worker_result.status == "error":
1189
+ # Call worker callback with error status
1190
+ if verify_config.on_worker_complete:
1191
+ verify_config.on_worker_complete(item_index, verify_attempts, "error")
1192
+ return worker_result
1193
+
1194
+ # Call worker callback with success status
1195
+ if verify_config.on_worker_complete:
1196
+ verify_config.on_worker_complete(item_index, verify_attempts, "success")
1197
+
1198
+ last_result = worker_result
1199
+
1200
+ # Run verification (verifier tag = worker_tag-verify, with error retry like judge)
1201
+ if retry:
1202
+ async def verify_with_retry(retry_attempt: int = 1):
1203
+ return await self._execute_verify(
1204
+ input_files=input_files,
1205
+ output_files=worker_result.files,
1206
+ task_prompt=current_prompt,
1207
+ config=verify_config,
1208
+ mcp_servers=verifier_mcp_servers,
1209
+ timeout=timeout,
1210
+ system_prompt=system_prompt,
1211
+ schema=schema,
1212
+ run_id=run_id,
1213
+ worker_tag=worker_tag,
1214
+ retry_attempt=retry_attempt,
1215
+ )
1216
+ # Use retry but ignore custom retry_on (like judge)
1217
+ retry_config = RetryConfig(
1218
+ max_attempts=retry.max_attempts,
1219
+ backoff_ms=retry.backoff_ms,
1220
+ backoff_multiplier=retry.backoff_multiplier,
1221
+ )
1222
+ verification = await execute_with_retry(verify_with_retry, retry_config)
1223
+ else:
1224
+ verification = await self._execute_verify(
1225
+ input_files=input_files,
1226
+ output_files=worker_result.files,
1227
+ task_prompt=current_prompt,
1228
+ config=verify_config,
1229
+ mcp_servers=verifier_mcp_servers,
1230
+ timeout=timeout,
1231
+ system_prompt=system_prompt,
1232
+ schema=schema,
1233
+ run_id=run_id,
1234
+ worker_tag=worker_tag,
1235
+ )
1236
+
1237
+ # Call verifier callback
1238
+ if verify_config.on_verifier_complete:
1239
+ verify_config.on_verifier_complete(
1240
+ item_index,
1241
+ verify_attempts,
1242
+ bool(verification.get("passed")),
1243
+ verification.get("feedback"),
1244
+ )
1245
+
1246
+ # Build verify meta
1247
+ verify_meta = VerifyMeta(
1248
+ run_id=run_id,
1249
+ operation="verify",
1250
+ tag=verification["tag"],
1251
+ sandbox_id=verification["sandbox_id"],
1252
+ attempts=verify_attempts,
1253
+ )
1254
+
1255
+ # If verification passed, return result with verify info
1256
+ if verification.get("passed"):
1257
+ # Create a new result with verify info attached
1258
+ return self._attach_verify_info(
1259
+ worker_result,
1260
+ VerifyInfo(
1261
+ passed=True,
1262
+ reasoning=verification.get("reasoning", ""),
1263
+ verify_meta=verify_meta,
1264
+ attempts=verify_attempts,
1265
+ )
1266
+ )
1267
+
1268
+ # If verification failed and we have attempts left, rebuild prompt with feedback
1269
+ if verify_attempts < max_attempts:
1270
+ feedback = verification.get("feedback") or verification.get("reasoning") or "Output did not meet criteria"
1271
+ current_prompt = self._build_retry_prompt_with_feedback(original_prompt, feedback)
1272
+
1273
+ # Max retries exceeded - return last result with error status and verify info
1274
+ # Use last worker tag for consistency
1275
+ last_worker_tag = f"{base_tag}-vr{verify_attempts - 1}" if verify_attempts > 1 else base_tag
1276
+ verify_meta = VerifyMeta(
1277
+ run_id=run_id,
1278
+ operation="verify",
1279
+ tag=f"{last_worker_tag}-verifier",
1280
+ sandbox_id="",
1281
+ attempts=verify_attempts,
1282
+ )
1283
+
1284
+ return self._attach_verify_info(
1285
+ last_result,
1286
+ VerifyInfo(
1287
+ passed=False,
1288
+ reasoning="Max verification retries exceeded",
1289
+ verify_meta=verify_meta,
1290
+ attempts=verify_attempts,
1291
+ ),
1292
+ force_error=True
1293
+ )
1294
+
1295
+ async def _execute_verify(
1296
+ self,
1297
+ input_files: FileMap,
1298
+ output_files: FileMap,
1299
+ task_prompt: str,
1300
+ config: VerifyConfig,
1301
+ mcp_servers: Optional[Dict[str, Any]],
1302
+ timeout: int,
1303
+ system_prompt: Optional[str],
1304
+ schema: Optional[SchemaType],
1305
+ run_id: str,
1306
+ worker_tag: str,
1307
+ retry_attempt: int = 1,
1308
+ ) -> Dict[str, Any]:
1309
+ """Execute verifier to check if output meets criteria."""
1310
+ # Verifier tag = worker_tag-verifier, with -er{n} suffix for error retries
1311
+ tag_prefix = (
1312
+ f"{worker_tag}-verifier-er{retry_attempt - 1}"
1313
+ if retry_attempt > 1
1314
+ else f"{worker_tag}-verifier"
1315
+ )
1316
+
1317
+ # Build verify context
1318
+ context = self._build_verify_context(
1319
+ input_files=input_files,
1320
+ task_prompt=task_prompt,
1321
+ output_files=output_files,
1322
+ system_prompt=system_prompt,
1323
+ schema=schema,
1324
+ )
1325
+
1326
+ # Build verify system prompt
1327
+ file_tree = build_file_tree(context)
1328
+ verify_system_prompt = apply_template(VERIFY_PROMPT, {
1329
+ "criteria": config.criteria,
1330
+ "fileTree": file_tree,
1331
+ })
1332
+
1333
+ # Verify schema (always JSON Schema dict for simplicity)
1334
+ verify_schema = {
1335
+ "type": "object",
1336
+ "properties": {
1337
+ "passed": {"type": "boolean"},
1338
+ "reasoning": {"type": "string"},
1339
+ "feedback": {"type": "string"},
1340
+ },
1341
+ "required": ["passed", "reasoning"],
1342
+ }
1343
+
1344
+ async with self.semaphore:
1345
+ result = await self._execute(
1346
+ context=context,
1347
+ prompt=VERIFY_USER_PROMPT,
1348
+ system_prompt=verify_system_prompt,
1349
+ schema=verify_schema,
1350
+ schema_options=None,
1351
+ agent=config.verifier_agent,
1352
+ mcp_servers=mcp_servers,
1353
+ tag_prefix=tag_prefix,
1354
+ timeout=timeout,
1355
+ )
1356
+
1357
+ passed = None
1358
+ reasoning = "Verification completed"
1359
+ feedback = None
1360
+
1361
+ if result.get("data") and not result.get("error"):
1362
+ data = result["data"]
1363
+ if isinstance(data, dict):
1364
+ passed = data.get("passed")
1365
+ reasoning = data.get("reasoning", reasoning)
1366
+ feedback = data.get("feedback")
1367
+ elif result.get("raw_data"):
1368
+ # Validation failed but we have raw data - try to extract
1369
+ try:
1370
+ raw = json.loads(result["raw_data"])
1371
+ passed = bool(raw.get("passed"))
1372
+ reasoning = raw.get("reasoning", reasoning)
1373
+ feedback = raw.get("feedback")
1374
+ except Exception:
1375
+ warnings.warn(
1376
+ f"Verify validation failed: {result.get('error')}",
1377
+ stacklevel=2
1378
+ )
1379
+
1380
+ return {
1381
+ "status": "success" if passed is not None else "error",
1382
+ "passed": passed,
1383
+ "reasoning": reasoning,
1384
+ "feedback": feedback,
1385
+ "tag": result["tag"],
1386
+ "sandbox_id": result["sandbox_id"],
1387
+ "error": None if passed is not None else "Verifier failed to produce valid decision",
1388
+ }
1389
+
1390
+ def _build_verify_context(
1391
+ self,
1392
+ input_files: FileMap,
1393
+ task_prompt: str,
1394
+ output_files: FileMap,
1395
+ system_prompt: Optional[str],
1396
+ schema: Optional[SchemaType],
1397
+ ) -> FileMap:
1398
+ """Build verify context containing worker task info and output to verify."""
1399
+ # Start with shared worker_task structure
1400
+ context = self._build_evaluator_context(
1401
+ input_files=input_files,
1402
+ task_prompt=task_prompt,
1403
+ system_prompt=system_prompt,
1404
+ schema=schema,
1405
+ )
1406
+
1407
+ # Add output files to verify
1408
+ for name, content in output_files.items():
1409
+ context[f"worker_output/{name}"] = content
1410
+
1411
+ return context
1412
+
1413
+ def _build_evaluator_context(
1414
+ self,
1415
+ input_files: FileMap,
1416
+ task_prompt: str,
1417
+ system_prompt: Optional[str],
1418
+ schema: Optional[SchemaType],
1419
+ ) -> FileMap:
1420
+ """Build evaluator context (shared by judge and verify).
1421
+
1422
+ Creates worker_task/ structure with input files, prompts, schema.
1423
+ """
1424
+ context: FileMap = {}
1425
+
1426
+ if system_prompt:
1427
+ context["worker_task/system_prompt.txt"] = system_prompt
1428
+ context["worker_task/user_prompt.txt"] = task_prompt
1429
+
1430
+ json_schema = to_json_schema(schema)
1431
+ if json_schema:
1432
+ context["worker_task/schema.json"] = json.dumps(json_schema, indent=2)
1433
+
1434
+ for name, content in input_files.items():
1435
+ context[f"worker_task/input/{name}"] = content
1436
+
1437
+ return context
1438
+
1439
+ @staticmethod
1440
+ def _build_retry_prompt_with_feedback(original_prompt: str, feedback: str) -> str:
1441
+ """Build a retry prompt with verifier feedback."""
1442
+ return apply_template(RETRY_FEEDBACK_PROMPT, {
1443
+ "originalPrompt": original_prompt,
1444
+ "feedback": feedback,
1445
+ })
1446
+
1447
+ def _attach_verify_info(
1448
+ self,
1449
+ result: Any,
1450
+ verify_info: VerifyInfo,
1451
+ force_error: bool = False,
1452
+ ) -> Any:
1453
+ """Attach verify info to a result, creating a new result object."""
1454
+ if isinstance(result, SwarmResult):
1455
+ return SwarmResult(
1456
+ status="error" if force_error else result.status,
1457
+ data=result.data,
1458
+ files=result.files,
1459
+ meta=result.meta,
1460
+ error=result.error,
1461
+ raw_data=result.raw_data,
1462
+ best_of=result.best_of,
1463
+ verify=verify_info,
1464
+ )
1465
+ elif isinstance(result, ReduceResult):
1466
+ return ReduceResult(
1467
+ status="error" if force_error else result.status,
1468
+ data=result.data,
1469
+ files=result.files,
1470
+ meta=result.meta,
1471
+ error=result.error,
1472
+ raw_data=result.raw_data,
1473
+ verify=verify_info,
1474
+ )
1475
+ else:
1476
+ # Fallback - just set verify attribute
1477
+ result.verify = verify_info
1478
+ if force_error:
1479
+ result.status = "error"
1480
+ return result
1481
+
1482
+ # =========================================================================
1483
+ # PRIVATE: BESTOF
1484
+ # =========================================================================
1485
+
1486
+ async def _execute_best_of_candidate(
1487
+ self,
1488
+ input_files: FileMap,
1489
+ prompt: str,
1490
+ candidate_index: int,
1491
+ run_id: str,
1492
+ config: BestOfConfig,
1493
+ mcp_servers: Optional[Dict[str, Any]],
1494
+ system_prompt: Optional[str],
1495
+ schema: Optional[SchemaType],
1496
+ schema_options: Optional[Dict[str, Any]],
1497
+ timeout: int,
1498
+ parent_index: Optional[int] = None,
1499
+ attempt: int = 1,
1500
+ ) -> SwarmResult:
1501
+ """Execute a single bestOf candidate."""
1502
+ base_tag = (
1503
+ f"{self.config.tag}-map-{parent_index}-bestof-cand-{candidate_index}"
1504
+ if parent_index is not None
1505
+ else f"{self.config.tag}-bestof-cand-{candidate_index}"
1506
+ )
1507
+ tag_prefix = f"{base_tag}-er{attempt - 1}" if attempt > 1 else base_tag
1508
+
1509
+ # Get agent override for this candidate
1510
+ candidate_agent = config.task_agents[candidate_index] if config.task_agents and candidate_index < len(config.task_agents) else None
1511
+
1512
+ # Acquire semaphore here (inside retry loop) so it's released during backoff
1513
+ async with self.semaphore:
1514
+ result = await self._execute(
1515
+ context=input_files,
1516
+ prompt=prompt,
1517
+ system_prompt=system_prompt,
1518
+ schema=schema,
1519
+ schema_options=schema_options,
1520
+ agent=candidate_agent,
1521
+ mcp_servers=mcp_servers,
1522
+ tag_prefix=tag_prefix,
1523
+ timeout=timeout,
1524
+ )
1525
+
1526
+ meta = IndexedMeta(
1527
+ run_id=run_id,
1528
+ operation="bestof-cand",
1529
+ tag=result["tag"],
1530
+ sandbox_id=result["sandbox_id"],
1531
+ index=candidate_index,
1532
+ )
1533
+
1534
+ return self._build_result(result, meta)
1535
+
1536
+ async def _execute_best_of_judge(
1537
+ self,
1538
+ input_files: FileMap,
1539
+ task_prompt: str,
1540
+ candidates: List[SwarmResult],
1541
+ config: BestOfConfig,
1542
+ mcp_servers: Optional[Dict[str, Any]],
1543
+ timeout: int,
1544
+ system_prompt: Optional[str],
1545
+ schema: Optional[SchemaType],
1546
+ parent_index: Optional[int] = None,
1547
+ attempt: int = 1,
1548
+ ) -> Dict[str, Any]:
1549
+ """Execute bestOf judge.
1550
+
1551
+ Returns a dict with status field for retry compatibility.
1552
+ """
1553
+ base_tag = (
1554
+ f"{self.config.tag}-map-{parent_index}-bestof-judge"
1555
+ if parent_index is not None
1556
+ else f"{self.config.tag}-bestof-judge"
1557
+ )
1558
+ tag_prefix = f"{base_tag}-er{attempt - 1}" if attempt > 1 else base_tag
1559
+
1560
+ # Build judge context
1561
+ context = self._build_judge_context(
1562
+ input_files=input_files,
1563
+ task_prompt=task_prompt,
1564
+ candidates=candidates,
1565
+ system_prompt=system_prompt,
1566
+ schema=schema,
1567
+ )
1568
+
1569
+ # Build judge system prompt
1570
+ file_tree = build_file_tree(context)
1571
+ judge_system_prompt = apply_template(JUDGE_PROMPT, {
1572
+ "candidateCount": str(len(candidates)),
1573
+ "criteria": config.judge_criteria,
1574
+ "fileTree": file_tree,
1575
+ })
1576
+
1577
+ # Judge schema (always JSON Schema dict for simplicity)
1578
+ judge_schema = {
1579
+ "type": "object",
1580
+ "properties": {
1581
+ "winner": {"type": "integer", "minimum": 0, "maximum": len(candidates) - 1},
1582
+ "reasoning": {"type": "string"},
1583
+ },
1584
+ "required": ["winner", "reasoning"],
1585
+ }
1586
+
1587
+ # Acquire semaphore here (inside retry loop) so it's released during backoff
1588
+ async with self.semaphore:
1589
+ result = await self._execute(
1590
+ context=context,
1591
+ prompt=JUDGE_USER_PROMPT,
1592
+ system_prompt=judge_system_prompt,
1593
+ schema=judge_schema,
1594
+ schema_options=None,
1595
+ agent=config.judge_agent,
1596
+ mcp_servers=mcp_servers,
1597
+ tag_prefix=tag_prefix,
1598
+ timeout=timeout,
1599
+ )
1600
+
1601
+ winner = None
1602
+ reasoning = "Judge failed to provide reasoning"
1603
+
1604
+ if result.get("data") and not result.get("error"):
1605
+ data = result["data"]
1606
+ if isinstance(data, dict):
1607
+ winner = data.get("winner")
1608
+ reasoning = data.get("reasoning", reasoning)
1609
+ elif result.get("raw_data"):
1610
+ # Validation failed but we have raw data - extract reasoning and default winner to 0
1611
+ try:
1612
+ raw = json.loads(result["raw_data"])
1613
+ warnings.warn(
1614
+ f"Judge returned invalid winner {raw.get('winner')}, defaulting to candidate 0",
1615
+ stacklevel=2
1616
+ )
1617
+ winner = 0
1618
+ reasoning = raw.get("reasoning", reasoning)
1619
+ except Exception:
1620
+ warnings.warn(
1621
+ f"Judge validation failed: {result.get('error')}",
1622
+ stacklevel=2
1623
+ )
1624
+
1625
+ return {
1626
+ "status": "success" if winner is not None else "error",
1627
+ "winner": winner,
1628
+ "reasoning": reasoning,
1629
+ "tag": result["tag"],
1630
+ "sandbox_id": result["sandbox_id"],
1631
+ "error": None if winner is not None else "Judge failed to produce valid decision",
1632
+ }
1633
+
1634
+ def _build_judge_context(
1635
+ self,
1636
+ input_files: FileMap,
1637
+ task_prompt: str,
1638
+ candidates: List[SwarmResult],
1639
+ system_prompt: Optional[str],
1640
+ schema: Optional[SchemaType],
1641
+ ) -> FileMap:
1642
+ """Build judge context containing worker task info and candidate outputs."""
1643
+ # Start with shared worker_task structure
1644
+ context = self._build_evaluator_context(
1645
+ input_files=input_files,
1646
+ task_prompt=task_prompt,
1647
+ system_prompt=system_prompt,
1648
+ schema=schema,
1649
+ )
1650
+
1651
+ # Add candidate outputs
1652
+ for i, c in enumerate(candidates):
1653
+ if c.status == "error":
1654
+ context[f"candidate_{i}/_failed.txt"] = f"STATUS: FAILED\n\nError: {c.error or 'Unknown error'}"
1655
+ for name, content in c.files.items():
1656
+ context[f"candidate_{i}/{name}"] = content
1657
+
1658
+ return context
1659
+
1660
+ # =========================================================================
1661
+ # PRIVATE: UTILITIES
1662
+ # =========================================================================
1663
+
1664
+ def _generate_run_id(self) -> str:
1665
+ """Generate a unique run ID."""
1666
+ return secrets.token_hex(8)
1667
+
1668
+ def _get_files(self, item: ItemInput) -> FileMap:
1669
+ """Extract files from an item (FileMap or SwarmResult)."""
1670
+ if is_swarm_result(item):
1671
+ files = dict(item.files)
1672
+ # Rename result.json → data.json for clarity when used as input
1673
+ if "result.json" in files:
1674
+ files["data.json"] = files.pop("result.json")
1675
+ return files
1676
+ return item
1677
+
1678
+ def _get_index(self, item: ItemInput, fallback: int) -> int:
1679
+ """Get index from item (for SwarmResult) or use fallback."""
1680
+ if is_swarm_result(item):
1681
+ return item.meta.index
1682
+ return fallback
1683
+
1684
+ def _resolve_prompt(self, prompt: Prompt, files: FileMap, index: int) -> str:
1685
+ """Resolve prompt (string or callable) to string."""
1686
+ return prompt(files, index) if callable(prompt) else prompt
1687
+
1688
+ def _build_agent_config(self, override: Optional[AgentConfig]) -> Optional[AgentConfig]:
1689
+ """Build agent config with optional override.
1690
+
1691
+ If override provided, merge with base config (apiKey inherited from base).
1692
+ If no override and no base config, return None (TS SDK resolves from env).
1693
+ """
1694
+ base = self.config.agent
1695
+ if override:
1696
+ return AgentConfig(
1697
+ type=override.type,
1698
+ api_key=base.api_key if base else None,
1699
+ model=override.model,
1700
+ reasoning_effort=override.reasoning_effort,
1701
+ betas=override.betas,
1702
+ )
1703
+ return base
1704
+
1705
+ def _build_result(
1706
+ self,
1707
+ result: Dict[str, Any],
1708
+ meta: IndexedMeta,
1709
+ files_override: Optional[FileMap] = None,
1710
+ ) -> SwarmResult:
1711
+ """Build SwarmResult from execution result."""
1712
+ files = files_override if files_override is not None else result["files"]
1713
+
1714
+ if result.get("error"):
1715
+ return SwarmResult(
1716
+ status="error",
1717
+ data=None,
1718
+ files=files,
1719
+ meta=meta,
1720
+ error=result["error"],
1721
+ raw_data=result.get("raw_data"),
1722
+ )
1723
+
1724
+ return SwarmResult(
1725
+ status="success",
1726
+ data=result["data"],
1727
+ files=files,
1728
+ meta=meta,
1729
+ )
1730
+
1731
+ def _build_error_result(self, error: str, meta: IndexedMeta) -> SwarmResult:
1732
+ """Build error SwarmResult."""
1733
+ return SwarmResult(
1734
+ status="error",
1735
+ data=None,
1736
+ files={},
1737
+ meta=meta,
1738
+ error=error,
1739
+ )
1740
+
1741
+ def _decode_files(self, encoded: Dict[str, Any]) -> FileMap:
1742
+ """Decode files from bridge response."""
1743
+ files: FileMap = {}
1744
+ for name, file_data in encoded.items():
1745
+ content = file_data.get('content', '')
1746
+ encoding = file_data.get('encoding', 'text')
1747
+ if encoding == 'base64':
1748
+ files[name] = base64.b64decode(content)
1749
+ else:
1750
+ files[name] = content
1751
+ return files