swarmkit 0.1.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
swarmkit/bridge.py ADDED
@@ -0,0 +1,475 @@
1
+ """Node.js bridge subprocess manager for JSON-RPC communication."""
2
+
3
+ import asyncio
4
+ import atexit
5
+ import json
6
+ import logging
7
+ import os
8
+ import signal
9
+ from pathlib import Path
10
+ from typing import Any, Callable, Dict, List, Optional
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Global registry of bridge process PIDs for atexit cleanup
16
+ # We store PIDs instead of process objects because asyncio.subprocess.Process
17
+ # doesn't have sync wait() - we use os.kill/os.waitpid for cleanup
18
+ _bridge_pids: List[int] = []
19
+
20
+
21
+ def _atexit_cleanup():
22
+ """Kill all bridge processes on Python exit."""
23
+ for pid in _bridge_pids:
24
+ try:
25
+ os.kill(pid, signal.SIGTERM)
26
+ except (OSError, ProcessLookupError):
27
+ pass # Process already dead
28
+ try:
29
+ # Non-blocking wait to reap zombie
30
+ os.waitpid(pid, os.WNOHANG)
31
+ except (OSError, ChildProcessError):
32
+ pass
33
+ _bridge_pids.clear()
34
+
35
+
36
+ atexit.register(_atexit_cleanup)
37
+
38
+
39
+ class SandboxNotFoundError(Exception):
40
+ """Raised when sandbox is not found (expired or killed)."""
41
+ pass
42
+
43
+
44
+ class BridgeConnectionError(Exception):
45
+ """Raised when bridge process fails to start or dies unexpectedly."""
46
+ pass
47
+
48
+
49
+ class BridgeBuildError(Exception):
50
+ """Raised when bridge build (npm install/build) fails."""
51
+ pass
52
+
53
+
54
+ class BridgeManager:
55
+ """Manages Node.js subprocess running the JSON-RPC bridge.
56
+
57
+ Uses asyncio.create_subprocess_exec for native async I/O.
58
+ This allows proper cancellation and clean shutdown without blocking threads.
59
+ """
60
+
61
+ def __init__(self):
62
+ self.process: Optional[asyncio.subprocess.Process] = None
63
+ self.request_id = 0
64
+ self.pending_requests: Dict[int, asyncio.Future] = {}
65
+ # Serialize writes to stdin to avoid interleaved JSON-RPC requests
66
+ self._write_lock = asyncio.Lock()
67
+ # Default RPC timeout to prevent hangs (overridden per-call for long runs)
68
+ self.default_call_timeout_s: float = 120.0
69
+ self.stderr_task: Optional[asyncio.Task] = None
70
+ # Buffers for chunked stdout/stderr events from the bridge.
71
+ # The Node bridge may emit {seq, done} chunks for oversized streams.
72
+ self._stream_buffers: Dict[str, List[str]] = {"stdout": [], "stderr": []}
73
+ self.event_callbacks: Dict[str, List[Callable]] = {
74
+ 'stdout': [], # str data
75
+ 'stderr': [], # str data
76
+ 'content': [], # dict params
77
+ }
78
+ self.reader_task: Optional[asyncio.Task] = None
79
+ self._pid: Optional[int] = None
80
+
81
+ async def start(self):
82
+ """Start the Node.js bridge process."""
83
+ if self.process is not None:
84
+ return
85
+
86
+ # Find bridge script (bundled version for distribution)
87
+ bridge_dir = Path(__file__).parent.parent / 'bridge'
88
+ bridge_script = bridge_dir / 'dist' / 'bridge.bundle.cjs'
89
+
90
+ # Fallback to unbundled version for development
91
+ if not bridge_script.exists():
92
+ bridge_script = bridge_dir / 'dist' / 'bridge.js'
93
+
94
+ # Auto-build bridge if missing (turnkey experience)
95
+ if not bridge_script.exists():
96
+ await self._build_bridge(bridge_dir)
97
+
98
+ # Start Node.js process with native asyncio subprocess
99
+ self.process = await asyncio.create_subprocess_exec(
100
+ 'node', str(bridge_script),
101
+ stdin=asyncio.subprocess.PIPE,
102
+ stdout=asyncio.subprocess.PIPE,
103
+ stderr=asyncio.subprocess.PIPE,
104
+ )
105
+ self._pid = self.process.pid
106
+
107
+ # Register PID for atexit cleanup
108
+ _bridge_pids.append(self._pid)
109
+
110
+ # Start reading responses (native async - no blocking threads)
111
+ self.reader_task = asyncio.create_task(self._read_responses())
112
+ # Drain stderr to avoid pipe backpressure
113
+ if self.process.stderr is not None:
114
+ self.stderr_task = asyncio.create_task(self._drain_stderr())
115
+
116
+ async def _build_bridge(self, bridge_dir: Path):
117
+ """Build the bridge if missing (first run experience)."""
118
+ import shutil
119
+ import subprocess
120
+
121
+ # Check if Node.js and npm are installed
122
+ if not shutil.which('node'):
123
+ raise BridgeBuildError(
124
+ "Bridge build failed: Node.js not found in PATH.\n"
125
+ "SwarmKit requires Node.js 18+ to run the TypeScript bridge.\n"
126
+ "Install from https://nodejs.org/ or run 'make build-dev' manually from packages/sdk-py/."
127
+ )
128
+
129
+ if not shutil.which('npm'):
130
+ raise BridgeBuildError(
131
+ "Bridge build failed: npm not found in PATH.\n"
132
+ "npm is usually installed with Node.js - check your Node.js installation.\n"
133
+ "Alternatively, run 'make build-dev' manually from packages/sdk-py/."
134
+ )
135
+
136
+ logger.info("First run: building Node.js bridge...")
137
+ try:
138
+ # Run npm install/build in executor to avoid blocking event loop
139
+ loop = asyncio.get_running_loop()
140
+
141
+ # Install dependencies
142
+ await loop.run_in_executor(
143
+ None,
144
+ lambda: subprocess.run(
145
+ ['npm', 'install'],
146
+ cwd=bridge_dir,
147
+ check=True,
148
+ capture_output=True,
149
+ text=True
150
+ )
151
+ )
152
+
153
+ # Build bridge (dev mode for readable debugging)
154
+ await loop.run_in_executor(
155
+ None,
156
+ lambda: subprocess.run(
157
+ ['npm', 'run', 'build:dev'],
158
+ cwd=bridge_dir,
159
+ check=True,
160
+ capture_output=True,
161
+ text=True
162
+ )
163
+ )
164
+ logger.info("Bridge built successfully")
165
+ except subprocess.CalledProcessError as e:
166
+ raise BridgeBuildError(
167
+ f"Bridge build failed during npm execution.\n"
168
+ f"Error: {e.stderr}\n"
169
+ f"Try running 'make build-dev' or 'make build-prod' manually from packages/sdk-py/ to see the full error."
170
+ ) from e
171
+
172
+ async def stop(self):
173
+ """Stop the Node.js bridge process."""
174
+ if self.process is None:
175
+ return
176
+
177
+ # Note: We intentionally do NOT clear event_callbacks here
178
+ # User-registered callbacks should persist across bridge restarts
179
+
180
+ # Remove from atexit registry
181
+ if self._pid and self._pid in _bridge_pids:
182
+ _bridge_pids.remove(self._pid)
183
+
184
+ # Cancel reader tasks first (they will exit cleanly now that process is terminating)
185
+ if self.reader_task:
186
+ self.reader_task.cancel()
187
+ try:
188
+ await self.reader_task
189
+ except asyncio.CancelledError:
190
+ pass
191
+
192
+ if self.stderr_task:
193
+ self.stderr_task.cancel()
194
+ try:
195
+ await self.stderr_task
196
+ except asyncio.CancelledError:
197
+ pass
198
+ self.stderr_task = None
199
+
200
+ # Terminate process
201
+ try:
202
+ self.process.terminate()
203
+ await asyncio.wait_for(self.process.wait(), timeout=5)
204
+ except asyncio.TimeoutError:
205
+ self.process.kill()
206
+ await self.process.wait()
207
+
208
+ self.process = None
209
+ self.reader_task = None
210
+ self._pid = None
211
+
212
+ def on(self, event_type: str, callback: Callable):
213
+ """Register event callback."""
214
+ if event_type in self.event_callbacks:
215
+ self.event_callbacks[event_type].append(callback)
216
+
217
+ async def call(
218
+ self,
219
+ method: str,
220
+ params: Optional[Dict[str, Any]] = None,
221
+ timeout_s: Optional[float] = None,
222
+ ) -> Any:
223
+ """Call a JSON-RPC method and wait for response.
224
+
225
+ Args:
226
+ method: JSON-RPC method name
227
+ params: JSON-RPC params dict
228
+ timeout_s: Optional timeout in seconds. If None, uses default_call_timeout_s.
229
+ """
230
+ if self.process is None or self.process.stdin is None:
231
+ raise BridgeConnectionError("Bridge not started. Call start() first.")
232
+
233
+ async with self._write_lock:
234
+ self.request_id += 1
235
+ request_id = self.request_id
236
+
237
+ request = {
238
+ 'jsonrpc': '2.0',
239
+ 'method': method,
240
+ 'params': params or {},
241
+ 'id': request_id,
242
+ }
243
+
244
+ # Create future for response
245
+ future: asyncio.Future = asyncio.Future()
246
+ self.pending_requests[request_id] = future
247
+
248
+ # Send request (native async write)
249
+ payload = json.dumps(request).encode('utf-8')
250
+ frame = len(payload).to_bytes(4, byteorder='big') + payload
251
+ self.process.stdin.write(frame)
252
+ await self.process.stdin.drain()
253
+
254
+ # Wait for response (error handling done in _handle_response)
255
+ timeout = timeout_s if timeout_s is not None else self.default_call_timeout_s
256
+ try:
257
+ return await asyncio.wait_for(future, timeout=timeout)
258
+ except asyncio.TimeoutError as e:
259
+ # Drop pending request to avoid leaks; late response will be ignored.
260
+ self.pending_requests.pop(request_id, None)
261
+ raise BridgeConnectionError(
262
+ f"Bridge call timed out after {timeout:.1f}s: {method}"
263
+ ) from e
264
+
265
+ async def _read_responses(self):
266
+ """Read framed JSON-RPC messages from bridge stdout.
267
+
268
+ Uses native asyncio reads - fully cancellable, no blocking threads.
269
+ """
270
+ if self.process is None or self.process.stdout is None:
271
+ return
272
+
273
+ # 50MB cap on incoming frames. This applies to ALL bridge responses including
274
+ # RPC results (run().stdout, get_output_files(), etc.). If a response exceeds
275
+ # 50MB, the bridge connection fails. This is stricter than the TS SDK which
276
+ # has no response size limit. For very large outputs, consider streaming via
277
+ # stdout/stderr events or fetching files individually.
278
+ max_frame_bytes = 50 * 1024 * 1024
279
+
280
+ try:
281
+ async def read_exact(n: int) -> Optional[bytes]:
282
+ """Read exactly n bytes from stdout."""
283
+ chunks: List[bytes] = []
284
+ remaining = n
285
+ while remaining > 0:
286
+ chunk = await self.process.stdout.read(remaining)
287
+ if not chunk:
288
+ return None
289
+ chunks.append(chunk)
290
+ remaining -= len(chunk)
291
+ return b"".join(chunks)
292
+
293
+ while True:
294
+ header = await read_exact(4)
295
+ if not header:
296
+ break
297
+ length = int.from_bytes(header, byteorder='big')
298
+ if length <= 0 or length > max_frame_bytes:
299
+ logger.error(f"Invalid frame length from bridge: {length}")
300
+ break
301
+
302
+ payload = await read_exact(length)
303
+ if payload is None:
304
+ break
305
+
306
+ try:
307
+ text = payload.decode('utf-8')
308
+ message = json.loads(text)
309
+ except Exception:
310
+ logger.exception("Failed to parse bridge frame")
311
+ continue
312
+
313
+ if isinstance(message, dict) and message.get('method') == 'event':
314
+ self._handle_event(message.get('params') or {})
315
+ elif isinstance(message, dict) and 'id' in message:
316
+ self._handle_response(message)
317
+
318
+ except asyncio.CancelledError:
319
+ # Clean cancellation - expected during stop()
320
+ raise
321
+ except Exception as e:
322
+ logger.error(f"Bridge reader died: {e}")
323
+ finally:
324
+ # Fail all pending requests so callers don't hang
325
+ error = BridgeConnectionError("Bridge process terminated unexpectedly")
326
+ for request_id, future in list(self.pending_requests.items()):
327
+ if not future.done():
328
+ future.set_exception(error)
329
+ self.pending_requests.clear()
330
+
331
+ async def _drain_stderr(self):
332
+ """Drain bridge stderr to prevent blocking.
333
+
334
+ Uses native asyncio reads - fully cancellable.
335
+ """
336
+ if self.process is None or self.process.stderr is None:
337
+ return
338
+ try:
339
+ while True:
340
+ line = await self.process.stderr.readline()
341
+ if not line:
342
+ break
343
+ try:
344
+ text = line.decode("utf-8", errors="ignore").rstrip()
345
+ except Exception:
346
+ text = str(line).rstrip()
347
+ if text:
348
+ logger.debug(f"[bridge stderr] {text}")
349
+ except asyncio.CancelledError:
350
+ pass
351
+ except Exception as e:
352
+ logger.debug(f"Bridge stderr drain died: {e}")
353
+
354
+ def _handle_event(self, params: Dict[str, Any]):
355
+ """Handle event notification from bridge."""
356
+ event_type = params.get('type')
357
+ callbacks = self.event_callbacks.get(event_type, [])
358
+
359
+ if event_type in ('stdout', 'stderr'):
360
+ data = params.get('data', '')
361
+ seq = params.get('seq')
362
+ done = params.get('done')
363
+
364
+ # If chunk metadata is present, reassemble to preserve "NDJSON line" semantics.
365
+ if seq is not None or done is not None:
366
+ buf = self._stream_buffers.setdefault(event_type, [])
367
+ if seq == 0 and buf:
368
+ # Best-effort flush of previous incomplete sequence.
369
+ prev = "".join(buf)
370
+ buf.clear()
371
+ for callback in callbacks:
372
+ try:
373
+ callback(prev)
374
+ except Exception:
375
+ logger.exception("Error in %s callback", event_type)
376
+
377
+ buf.append(data)
378
+ if done:
379
+ full = "".join(buf)
380
+ buf.clear()
381
+ for callback in callbacks:
382
+ try:
383
+ callback(full)
384
+ except Exception:
385
+ logger.exception("Error in %s callback", event_type)
386
+ return
387
+
388
+ # No chunk metadata → emit directly.
389
+ for callback in callbacks:
390
+ try:
391
+ callback(data)
392
+ except Exception:
393
+ logger.exception("Error in %s callback", event_type)
394
+ elif event_type == 'content':
395
+ for callback in callbacks:
396
+ try:
397
+ callback(params)
398
+ except Exception:
399
+ logger.exception("Error in content callback")
400
+
401
+ def _handle_response(self, message: Dict[str, Any]):
402
+ """Handle JSON-RPC response."""
403
+ request_id = message.get('id')
404
+ if request_id is None or request_id not in self.pending_requests:
405
+ return
406
+
407
+ future = self.pending_requests.pop(request_id)
408
+
409
+ if 'error' in message:
410
+ error = message['error']
411
+ error_code = error.get('code', -32603)
412
+ error_message = error.get('message', 'Unknown error')
413
+
414
+ # Check for NotFoundError (code -32001 or message pattern)
415
+ if error_code == -32001 or 'not found' in error_message.lower():
416
+ future.set_exception(SandboxNotFoundError(error_message))
417
+ else:
418
+ future.set_exception(Exception(error_message))
419
+ else:
420
+ future.set_result(message.get('result'))
421
+
422
+ # =========================================================================
423
+ # MULTI-INSTANCE METHODS (for Swarm)
424
+ # =========================================================================
425
+
426
+ async def create_instance(
427
+ self,
428
+ instance_id: str,
429
+ params: Dict[str, Any],
430
+ timeout_s: Optional[float] = None,
431
+ ) -> Any:
432
+ """Create a new SwarmKit instance in the bridge."""
433
+ return await self.call(
434
+ 'create_instance',
435
+ {'instance_id': instance_id, **params},
436
+ timeout_s=timeout_s,
437
+ )
438
+
439
+ async def run_on_instance(
440
+ self,
441
+ instance_id: str,
442
+ prompt: str,
443
+ timeout_ms: Optional[int] = None,
444
+ call_timeout_s: Optional[float] = None,
445
+ ) -> Any:
446
+ """Run prompt on a specific SwarmKit instance."""
447
+ params = {'instance_id': instance_id, 'prompt': prompt}
448
+ if timeout_ms is not None:
449
+ params['timeout_ms'] = timeout_ms
450
+ return await self.call('run_on_instance', params, timeout_s=call_timeout_s)
451
+
452
+ async def get_output_on_instance(
453
+ self,
454
+ instance_id: str,
455
+ recursive: bool = False,
456
+ timeout_s: Optional[float] = None,
457
+ ) -> Any:
458
+ """Get output files from a specific SwarmKit instance."""
459
+ return await self.call(
460
+ 'get_output_on_instance',
461
+ {'instance_id': instance_id, 'recursive': recursive},
462
+ timeout_s=timeout_s,
463
+ )
464
+
465
+ async def kill_instance(
466
+ self,
467
+ instance_id: str,
468
+ timeout_s: Optional[float] = None,
469
+ ) -> Any:
470
+ """Kill and remove a specific SwarmKit instance."""
471
+ return await self.call(
472
+ 'kill_instance',
473
+ {'instance_id': instance_id},
474
+ timeout_s=timeout_s,
475
+ )
swarmkit/config.py ADDED
@@ -0,0 +1,92 @@
1
+ """Configuration types for SwarmKit SDK."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import List, Literal, Optional, Protocol, runtime_checkable
5
+
6
+
7
+ AgentType = Literal['codex', 'claude', 'gemini', 'qwen']
8
+ WorkspaceMode = Literal['knowledge', 'swe']
9
+ ReasoningEffort = Literal['low', 'medium', 'high', 'xhigh']
10
+ ValidationMode = Literal['strict', 'loose']
11
+
12
+
13
+ @dataclass
14
+ class SchemaOptions:
15
+ """Validation options for schema validation.
16
+
17
+ Args:
18
+ mode: Validation mode - 'strict' (exact types) or 'loose' (coerce types, default)
19
+ """
20
+ mode: ValidationMode = 'loose'
21
+
22
+
23
+ @dataclass
24
+ class AgentConfig:
25
+ """Agent configuration.
26
+
27
+ All fields are optional - TS SDK resolves defaults from environment:
28
+ - type defaults to 'claude'
29
+ - api_key defaults to SWARMKIT_API_KEY env var
30
+
31
+ Args:
32
+ type: Agent type (codex, claude, gemini, qwen) - defaults to 'claude'
33
+ api_key: SwarmKit API key from https://dashboard.swarmlink.ai (defaults to SWARMKIT_API_KEY env var)
34
+ model: Model name (optional - uses agent's default if not specified)
35
+ reasoning_effort: Reasoning effort for Codex models (optional)
36
+ betas: Beta headers for Claude (Sonnet 4.5 only; e.g. ["context-1m-2025-08-07"] for 1M context)
37
+ """
38
+ type: Optional[AgentType] = None
39
+ api_key: Optional[str] = None
40
+ model: Optional[str] = None
41
+ reasoning_effort: Optional[ReasoningEffort] = None
42
+ betas: Optional[List[str]] = None
43
+
44
+
45
+ @runtime_checkable
46
+ class SandboxProvider(Protocol):
47
+ """Sandbox provider protocol.
48
+
49
+ Any sandbox provider must implement this protocol.
50
+ Currently supported: E2BProvider
51
+
52
+ To add a new provider:
53
+ 1. Create a class with `type` and `config` properties
54
+ 2. Add handling in bridge/src/adapter.ts
55
+ """
56
+
57
+ @property
58
+ def type(self) -> str:
59
+ """Provider type identifier (e.g., 'e2b')."""
60
+ ...
61
+
62
+ @property
63
+ def config(self) -> dict:
64
+ """Provider configuration dict for the bridge."""
65
+ ...
66
+
67
+
68
+ @dataclass
69
+ class E2BProvider:
70
+ """E2B sandbox provider configuration.
71
+
72
+ Args:
73
+ api_key: E2B API key (defaults to E2B_API_KEY env var)
74
+ timeout_ms: Sandbox timeout in milliseconds (default: 3600000 = 1 hour)
75
+ """
76
+ api_key: Optional[str] = None
77
+ timeout_ms: int = 3600000
78
+
79
+ @property
80
+ def type(self) -> Literal['e2b']:
81
+ """Provider type."""
82
+ return 'e2b'
83
+
84
+ @property
85
+ def config(self) -> dict:
86
+ """Provider configuration dict."""
87
+ result = {}
88
+ if self.api_key:
89
+ result['apiKey'] = self.api_key
90
+ if self.timeout_ms:
91
+ result['defaultTimeoutMs'] = self.timeout_ms
92
+ return result
@@ -0,0 +1,59 @@
1
+ """Pipeline - Fluent API for chaining Swarm operations."""
2
+
3
+ from .pipeline import Pipeline, TerminalPipeline
4
+ from .types import (
5
+ # Step configs
6
+ MapConfig,
7
+ FilterConfig,
8
+ ReduceConfig,
9
+ # Results
10
+ StepResult,
11
+ PipelineResult,
12
+ # Events
13
+ PipelineEvents,
14
+ PipelineEventMap,
15
+ StepEvent,
16
+ StepStartEvent,
17
+ StepCompleteEvent,
18
+ StepErrorEvent,
19
+ ItemRetryEvent,
20
+ WorkerCompleteEvent,
21
+ VerifierCompleteEvent,
22
+ CandidateCompleteEvent,
23
+ JudgeCompleteEvent,
24
+ # Types
25
+ EmitOption,
26
+ EventName,
27
+ Step,
28
+ StepType,
29
+ )
30
+
31
+ __all__ = [
32
+ # Main classes
33
+ 'Pipeline',
34
+ 'TerminalPipeline',
35
+ # Step configs
36
+ 'MapConfig',
37
+ 'FilterConfig',
38
+ 'ReduceConfig',
39
+ # Results
40
+ 'StepResult',
41
+ 'PipelineResult',
42
+ # Events
43
+ 'PipelineEvents',
44
+ 'PipelineEventMap',
45
+ 'StepEvent',
46
+ 'StepStartEvent',
47
+ 'StepCompleteEvent',
48
+ 'StepErrorEvent',
49
+ 'ItemRetryEvent',
50
+ 'WorkerCompleteEvent',
51
+ 'VerifierCompleteEvent',
52
+ 'CandidateCompleteEvent',
53
+ 'JudgeCompleteEvent',
54
+ # Types
55
+ 'EmitOption',
56
+ 'EventName',
57
+ 'Step',
58
+ 'StepType',
59
+ ]