weft-docker 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual environments
25
+ venv/
26
+ ENV/
27
+ env/
28
+ .venv
29
+ .weft/broker.db
30
+
31
+ # IDEs
32
+ .vscode/
33
+ .idea/
34
+ *.swp
35
+ *.swo
36
+ *~
37
+
38
+ # Testing
39
+ .coverage
40
+ .coverage.*
41
+ .pytest_cache/
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .mypy_cache/
46
+ .dmypy.json
47
+ dmypy.json
48
+ .ruff_cache/
49
+ .ruff/
50
+ .pytest_cache/
51
+
52
+ # SimpleBroker specific
53
+ *.db-shm
54
+ *.db-wal
55
+ .broker.db*
56
+ test.db
57
+ benchmark_pragma.py
58
+
59
+ # OS
60
+ .DS_Store
61
+ Thumbs.db
62
+
63
+ # Temporary files
64
+ *.tmp
65
+ *.bak
66
+ *.log
67
+
68
+ # Multi-agent
69
+ .claude
70
+ .mcp.json
71
+ agent_history/
72
+ .broker.db
73
+ .broker.db-shm
74
+ .broker.db-wal
75
+ .broker.connection.done
76
+ .broker.connection.lock
77
+ .broker.optimization.done
78
+ .broker.optimization.lock
79
+ *comments*.md
80
+ .code/
81
+ # This is in context for agents but we don't want it to check it in here
82
+ simplebroker/
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: weft-docker
3
+ Version: 0.5.0
4
+ Summary: Docker runner plugin for Weft
5
+ Author-email: Van Lindberg <van@modelmonster.ai>
6
+ License: MIT
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: docker<8,>=7.1.0
9
+ Requires-Dist: weft<1,>=0.6.4
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=7.0; extra == 'dev'
12
+ Description-Content-Type: text/markdown
13
+
14
+ # weft-docker
15
+
16
+ Docker runner plugin for Weft.
17
+
18
+ This extension adds the `docker` runner via the `weft.runners` entry-point
19
+ group. It currently supports one-shot `command` TaskSpecs only.
20
+
21
+ Current host support:
22
+
23
+ - Linux: supported
24
+ - macOS: supported
25
+ - Windows: not currently supported
26
+
27
+ Release tag:
28
+
29
+ - `weft_docker/vX.Y.Z`
@@ -0,0 +1,16 @@
1
+ # weft-docker
2
+
3
+ Docker runner plugin for Weft.
4
+
5
+ This extension adds the `docker` runner via the `weft.runners` entry-point
6
+ group. It currently supports one-shot `command` TaskSpecs only.
7
+
8
+ Current host support:
9
+
10
+ - Linux: supported
11
+ - macOS: supported
12
+ - Windows: not currently supported
13
+
14
+ Release tag:
15
+
16
+ - `weft_docker/vX.Y.Z`
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "weft-docker"
7
+ version = "0.5.0"
8
+ description = "Docker runner plugin for Weft"
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Van Lindberg", email = "van@modelmonster.ai"},
14
+ ]
15
+ dependencies = [
16
+ "docker>=7.1.0,<8",
17
+ "weft>=0.6.4,<1",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest>=7.0",
23
+ ]
24
+
25
+ [project.entry-points."weft.runners"]
26
+ docker = "weft_docker.plugin:get_runner_plugin"
27
+
28
+ [tool.hatch.build]
29
+ include = [
30
+ "weft_docker/**/*.py",
31
+ "README.md",
32
+ ]
@@ -0,0 +1,5 @@
1
+ """Docker runner plugin for Weft."""
2
+
3
+ from .plugin import get_runner_plugin
4
+
5
+ __all__ = ["get_runner_plugin"]
@@ -0,0 +1,923 @@
1
+ """Docker runner plugin for Weft.
2
+
3
+ Spec references:
4
+ - docs/specifications/01-Core_Components.md [CC-3.1], [CC-3.2], [CC-3.4]
5
+ - docs/specifications/02-TaskSpec.md [TS-1.3]
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import shutil
12
+ import subprocess
13
+ import time
14
+ import uuid
15
+ from collections.abc import Callable, Iterator, Mapping, Sequence
16
+ from contextlib import contextmanager
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from simplebroker import BrokerTarget
21
+ from weft.core.resource_monitor import ResourceMetrics
22
+ from weft.core.runners import RunnerOutcome
23
+ from weft.core.runners.subprocess_runner import (
24
+ prepare_command_invocation,
25
+ run_monitored_subprocess,
26
+ )
27
+ from weft.core.tasks.runner import AgentSession, CommandSession
28
+ from weft.ext import (
29
+ RunnerCapabilities,
30
+ RunnerHandle,
31
+ RunnerPlugin,
32
+ RunnerRuntimeDescription,
33
+ )
34
+
35
+ _CONTAINER_LOOKUP_TIMEOUT = 2.0
36
+ _CONTAINER_LOOKUP_INTERVAL = 0.05
37
+
38
+
39
+ class DockerContainerMonitor:
40
+ """Collect Docker-native metrics for a running container."""
41
+
42
+ def __init__(
43
+ self,
44
+ *,
45
+ runtime_id: str,
46
+ limits: Any | None,
47
+ image: str,
48
+ ) -> None:
49
+ self._runtime_id = runtime_id
50
+ self._limits = limits
51
+ self._image = image
52
+ self._client: Any | None = None
53
+ self._last_metrics: ResourceMetrics | None = None
54
+
55
+ def start(self, pid: int) -> None:
56
+ del pid
57
+ self._client = _docker_client_from_env()
58
+
59
+ def stop(self) -> None:
60
+ client = self._client
61
+ self._client = None
62
+ if client is not None:
63
+ client.close()
64
+
65
+ def snapshot(self) -> ResourceMetrics:
66
+ container = self._get_container()
67
+ if container is None:
68
+ raise RuntimeError(f"Docker container '{self._runtime_id}' is not running")
69
+ stats = container.stats(stream=False)
70
+ metrics = _stats_to_metrics(stats)
71
+ self._last_metrics = metrics
72
+ return metrics
73
+
74
+ def last_metrics(self) -> ResourceMetrics | None:
75
+ return self._last_metrics
76
+
77
+ def check_limits(self) -> tuple[bool, str | None]:
78
+ metrics = self.snapshot()
79
+ memory_limit = _limit_int(self._limits, "memory_mb")
80
+ if memory_limit is not None and metrics.memory_mb > memory_limit:
81
+ return False, f"Container exceeded memory limit of {memory_limit}MB"
82
+ return True, None
83
+
84
+ def _get_container(self) -> Any | None:
85
+ client = self._client
86
+ if client is None:
87
+ raise RuntimeError("Docker monitor has not been started")
88
+ return _lookup_container(client, self._runtime_id)
89
+
90
+
91
+ class DockerCommandRunner:
92
+ """One-shot command runner that executes inside Docker."""
93
+
94
+ def __init__(
95
+ self,
96
+ *,
97
+ tid: str | None,
98
+ process_target: str | None,
99
+ args: Sequence[Any] | None,
100
+ env: Mapping[str, str] | None,
101
+ working_dir: str | None,
102
+ timeout: float | None,
103
+ limits: Any | None,
104
+ monitor_class: str | None,
105
+ monitor_interval: float | None,
106
+ runner_options: Mapping[str, Any] | None,
107
+ db_path: BrokerTarget | str | None = None,
108
+ config: dict[str, Any] | None = None,
109
+ ) -> None:
110
+ del db_path, config, monitor_class
111
+ if not isinstance(process_target, str) or not process_target.strip():
112
+ raise ValueError("Docker runner requires spec.process_target")
113
+
114
+ options = dict(runner_options or {})
115
+ image = options.get("image")
116
+ if not isinstance(image, str) or not image.strip():
117
+ raise ValueError("Docker runner requires spec.runner.options.image")
118
+
119
+ self._tid = tid
120
+ self._process_target = process_target.strip()
121
+ self._args = list(args or [])
122
+ self._env = {str(key): str(value) for key, value in dict(env or {}).items()}
123
+ self._working_dir = working_dir
124
+ self._timeout = timeout
125
+ self._limits = limits
126
+ self._monitor_interval = monitor_interval or 1.0
127
+ self._image = image.strip()
128
+ self._docker_binary = str(options.get("docker_binary") or "docker")
129
+ self._docker_args = _string_list(
130
+ options.get("docker_args"),
131
+ name="spec.runner.options.docker_args",
132
+ )
133
+ self._container_workdir = (
134
+ str(options["container_workdir"])
135
+ if options.get("container_workdir") is not None
136
+ else None
137
+ )
138
+ self._mount_workdir = bool(options.get("mount_workdir", True))
139
+
140
+ def run(self, work_item: Any) -> RunnerOutcome:
141
+ return self.run_with_hooks(work_item)
142
+
143
+ def run_with_hooks(
144
+ self,
145
+ work_item: Any,
146
+ *,
147
+ cancel_requested: Callable[[], bool] | None = None,
148
+ on_worker_started: Callable[[int | None], None] | None = None,
149
+ on_runtime_handle_started: Callable[[RunnerHandle], None] | None = None,
150
+ ) -> RunnerOutcome:
151
+ executable = _resolve_docker_binary(self._docker_binary)
152
+ container_name = _container_name(self._tid)
153
+ command, stdin_data = self._build_docker_command(
154
+ work_item,
155
+ container_name,
156
+ executable=executable,
157
+ )
158
+ process = subprocess.Popen(
159
+ command,
160
+ stdin=subprocess.PIPE if stdin_data is not None else None,
161
+ stdout=subprocess.PIPE,
162
+ stderr=subprocess.PIPE,
163
+ text=True,
164
+ encoding="utf-8",
165
+ errors="replace",
166
+ env=os.environ.copy(),
167
+ )
168
+
169
+ with _docker_client() as client:
170
+ container = _wait_for_container(
171
+ client,
172
+ runtime_id=container_name,
173
+ process=process,
174
+ )
175
+ runtime_handle = _runtime_handle_for_container(
176
+ container_name=container_name,
177
+ image=self._image,
178
+ docker_binary=self._docker_binary,
179
+ container=container,
180
+ )
181
+ monitor = DockerContainerMonitor(
182
+ runtime_id=container_name,
183
+ limits=self._limits,
184
+ image=self._image,
185
+ )
186
+
187
+ def _stop_runtime() -> None:
188
+ _docker_stop(client, container_name, timeout=2.0)
189
+
190
+ def _kill_runtime() -> None:
191
+ _docker_kill(client, container_name)
192
+
193
+ outcome = run_monitored_subprocess(
194
+ process=process,
195
+ stdin_data=stdin_data,
196
+ timeout=self._timeout,
197
+ limits=self._limits,
198
+ monitor_class=None,
199
+ monitor_interval=self._monitor_interval,
200
+ monitor=monitor,
201
+ db_path=None,
202
+ config=None,
203
+ runtime_handle=runtime_handle,
204
+ cancel_requested=cancel_requested,
205
+ on_worker_started=on_worker_started,
206
+ on_runtime_handle_started=on_runtime_handle_started,
207
+ stop_runtime=_stop_runtime,
208
+ kill_runtime=_kill_runtime,
209
+ worker_pid=process.pid,
210
+ )
211
+
212
+ final_description = _describe_runtime(
213
+ client,
214
+ runtime_id=container_name,
215
+ base_metadata=dict(runtime_handle.metadata),
216
+ )
217
+ outcome = _apply_terminal_state(
218
+ outcome,
219
+ final_description=final_description,
220
+ limits=self._limits,
221
+ )
222
+
223
+ updated_handle = _handle_with_runtime_metadata(
224
+ runtime_handle,
225
+ final_description,
226
+ )
227
+ outcome.runtime_handle = updated_handle
228
+
229
+ _remove_container(client, container_name)
230
+ return outcome
231
+
232
+ def start_session(self) -> CommandSession:
233
+ raise ValueError("Docker runner does not support interactive sessions")
234
+
235
+ def start_agent_session(self) -> AgentSession:
236
+ raise ValueError("Docker runner does not support agent sessions")
237
+
238
+ def _build_docker_command(
239
+ self,
240
+ work_item: Any,
241
+ container_name: str,
242
+ *,
243
+ executable: str,
244
+ ) -> tuple[list[str], str | None]:
245
+ inner_command, stdin_data = prepare_command_invocation(
246
+ self._process_target,
247
+ work_item,
248
+ args=self._args,
249
+ )
250
+ docker_command = [
251
+ executable,
252
+ "run",
253
+ "--name",
254
+ container_name,
255
+ "-i",
256
+ *self._docker_args,
257
+ ]
258
+
259
+ memory_limit_mb = _limit_int(self._limits, "memory_mb")
260
+ if memory_limit_mb is not None:
261
+ docker_command.extend(["--memory", f"{memory_limit_mb}m"])
262
+
263
+ cpu_percent = _limit_int(self._limits, "cpu_percent")
264
+ if cpu_percent is not None:
265
+ host_cpus = max(os.cpu_count() or 1, 1)
266
+ docker_cpus = max((cpu_percent / 100.0) * host_cpus, 0.01)
267
+ docker_command.extend(["--cpus", f"{docker_cpus:.2f}"])
268
+
269
+ max_fds = _limit_int(self._limits, "max_fds")
270
+ if max_fds is not None:
271
+ docker_command.extend(["--ulimit", f"nofile={max_fds}:{max_fds}"])
272
+
273
+ max_connections = _limit_value(self._limits, "max_connections")
274
+ if max_connections == 0:
275
+ docker_command.extend(["--network", "none"])
276
+
277
+ if self._mount_workdir and self._working_dir:
278
+ host_workdir = str(Path(self._working_dir).expanduser().resolve())
279
+ container_workdir = self._container_workdir or host_workdir
280
+ docker_command.extend(
281
+ [
282
+ "--volume",
283
+ f"{host_workdir}:{container_workdir}",
284
+ "--workdir",
285
+ container_workdir,
286
+ ]
287
+ )
288
+
289
+ for key, value in sorted(self._env.items()):
290
+ docker_command.extend(["--env", f"{key}={value}"])
291
+
292
+ docker_command.extend([self._image, *inner_command])
293
+ return docker_command, stdin_data
294
+
295
+
296
+ class DockerRunnerPlugin:
297
+ """Runner plugin for Docker-backed one-shot command tasks."""
298
+
299
+ name = "docker"
300
+ capabilities = RunnerCapabilities(
301
+ supported_types=("command",),
302
+ supports_interactive=False,
303
+ supports_persistent=False,
304
+ supports_agent_sessions=False,
305
+ )
306
+
307
+ def check_version(self) -> None:
308
+ _load_docker_sdk()
309
+
310
+ def validate_taskspec(
311
+ self,
312
+ taskspec_payload: Mapping[str, Any],
313
+ *,
314
+ preflight: bool = False,
315
+ ) -> None:
316
+ if os.name == "nt":
317
+ raise ValueError(
318
+ "Docker runner is currently supported only on Linux and macOS"
319
+ )
320
+ spec = _require_mapping(taskspec_payload.get("spec"), name="spec")
321
+ if spec.get("type") != "command":
322
+ raise ValueError("Docker runner supports only spec.type='command'")
323
+ if bool(spec.get("interactive", False)):
324
+ raise ValueError("Docker runner does not support interactive tasks")
325
+ if bool(spec.get("persistent", False)):
326
+ raise ValueError("Docker runner does not support persistent tasks")
327
+
328
+ runner = _require_mapping(spec.get("runner"), name="spec.runner")
329
+ options = _require_mapping(runner.get("options"), name="spec.runner.options")
330
+ image = options.get("image")
331
+ if not isinstance(image, str) or not image.strip():
332
+ raise ValueError("Docker runner requires spec.runner.options.image")
333
+ docker_args = _string_list(
334
+ options.get("docker_args"),
335
+ name="spec.runner.options.docker_args",
336
+ )
337
+ _validate_extra_docker_args(docker_args)
338
+
339
+ limits = spec.get("limits")
340
+ if isinstance(limits, Mapping):
341
+ max_connections = limits.get("max_connections")
342
+ if max_connections not in (None, 0, 0.0):
343
+ raise ValueError(
344
+ "Docker runner supports spec.limits.max_connections only when "
345
+ "it is 0 (mapped to Docker network isolation)"
346
+ )
347
+
348
+ if preflight:
349
+ docker_binary = str(options.get("docker_binary") or "docker")
350
+ _resolve_docker_binary(docker_binary)
351
+ with _docker_client(timeout=5) as client:
352
+ client.ping()
353
+
354
+ def create_runner(
355
+ self,
356
+ *,
357
+ target_type: str,
358
+ tid: str | None,
359
+ function_target: str | None,
360
+ process_target: str | None,
361
+ agent: Mapping[str, Any] | None,
362
+ args: Sequence[Any] | None,
363
+ kwargs: Mapping[str, Any] | None,
364
+ env: Mapping[str, str] | None,
365
+ working_dir: str | None,
366
+ timeout: float | None,
367
+ limits: Any | None,
368
+ monitor_class: str | None,
369
+ monitor_interval: float | None,
370
+ runner_options: Mapping[str, Any] | None,
371
+ persistent: bool,
372
+ interactive: bool,
373
+ db_path: BrokerTarget | str | None = None,
374
+ config: dict[str, Any] | None = None,
375
+ ) -> DockerCommandRunner:
376
+ del target_type, function_target, agent, kwargs, persistent, interactive
377
+ if os.name == "nt":
378
+ raise ValueError(
379
+ "Docker runner is currently supported only on Linux and macOS"
380
+ )
381
+ return DockerCommandRunner(
382
+ tid=tid,
383
+ process_target=process_target,
384
+ args=args,
385
+ env=env,
386
+ working_dir=working_dir,
387
+ timeout=timeout,
388
+ limits=limits,
389
+ monitor_class=monitor_class,
390
+ monitor_interval=monitor_interval,
391
+ runner_options=runner_options,
392
+ db_path=db_path,
393
+ config=config,
394
+ )
395
+
396
+ def stop(self, handle: RunnerHandle, *, timeout: float = 2.0) -> bool:
397
+ with _docker_client() as client:
398
+ return _docker_stop(client, handle.runtime_id, timeout=timeout)
399
+
400
+ def kill(self, handle: RunnerHandle, *, timeout: float = 2.0) -> bool:
401
+ del timeout
402
+ with _docker_client() as client:
403
+ return _docker_kill(client, handle.runtime_id)
404
+
405
+ def describe(self, handle: RunnerHandle) -> RunnerRuntimeDescription | None:
406
+ with _docker_client() as client:
407
+ return _describe_runtime(
408
+ client,
409
+ runtime_id=handle.runtime_id,
410
+ base_metadata=dict(handle.metadata),
411
+ )
412
+
413
+
414
+ _PLUGIN = DockerRunnerPlugin()
415
+
416
+
417
+ def get_runner_plugin() -> RunnerPlugin:
418
+ return _PLUGIN
419
+
420
+
421
+ def _container_name(tid: str | None) -> str:
422
+ suffix = (
423
+ tid[-8:] if isinstance(tid, str) and len(tid) >= 8 else uuid.uuid4().hex[:8]
424
+ )
425
+ return f"weft-{suffix}-{uuid.uuid4().hex[:8]}"
426
+
427
+
428
+ def _apply_terminal_state(
429
+ outcome: RunnerOutcome,
430
+ *,
431
+ final_description: RunnerRuntimeDescription | None,
432
+ limits: Any | None,
433
+ ) -> RunnerOutcome:
434
+ metadata = dict(final_description.metadata) if final_description is not None else {}
435
+ oom_killed = bool(metadata.get("oom_killed"))
436
+ if not oom_killed:
437
+ if outcome.metrics is None:
438
+ outcome.metrics = _metrics_from_runtime_metadata(metadata)
439
+ return outcome
440
+
441
+ memory_limit = _limit_int(limits, "memory_mb")
442
+ if memory_limit is None:
443
+ error = "Container exceeded its configured memory limit"
444
+ else:
445
+ error = f"Container exceeded memory limit of {memory_limit}MB"
446
+
447
+ outcome.status = "limit"
448
+ outcome.error = error
449
+ if outcome.metrics is None:
450
+ outcome.metrics = _metrics_from_runtime_metadata(metadata)
451
+ return outcome
452
+
453
+
454
+ def _metrics_from_runtime_metadata(
455
+ metadata: Mapping[str, Any],
456
+ ) -> ResourceMetrics | None:
457
+ memory_usage = metadata.get("memory_usage_mb")
458
+ cpu_percent = metadata.get("cpu_percent")
459
+ memory_mb_value: float | None = None
460
+ if isinstance(memory_usage, (int, float)):
461
+ memory_mb_value = float(memory_usage)
462
+ cpu_percent_value: float | None = None
463
+ if isinstance(cpu_percent, (int, float)):
464
+ cpu_percent_value = float(cpu_percent)
465
+ if memory_mb_value is None and cpu_percent_value is None:
466
+ return None
467
+ return ResourceMetrics(
468
+ timestamp=time.time_ns(),
469
+ memory_mb=memory_mb_value or 0.0,
470
+ cpu_percent=cpu_percent_value or 0.0,
471
+ open_files=0,
472
+ connections=0,
473
+ )
474
+
475
+
476
+ def _handle_with_runtime_metadata(
477
+ handle: RunnerHandle,
478
+ description: RunnerRuntimeDescription | None,
479
+ ) -> RunnerHandle:
480
+ metadata = dict(handle.metadata)
481
+ if description is not None:
482
+ metadata.update(description.metadata)
483
+ return RunnerHandle(
484
+ runner_name=handle.runner_name,
485
+ runtime_id=handle.runtime_id,
486
+ host_pids=handle.host_pids,
487
+ metadata=metadata,
488
+ )
489
+
490
+
491
+ def _runtime_handle_for_container(
492
+ *,
493
+ container_name: str,
494
+ image: str,
495
+ docker_binary: str,
496
+ container: Any | None,
497
+ ) -> RunnerHandle:
498
+ metadata: dict[str, Any] = {
499
+ "container_name": container_name,
500
+ "docker_binary": docker_binary,
501
+ "image": image,
502
+ }
503
+ if container is not None:
504
+ metadata["container_id"] = container.id
505
+ return RunnerHandle(
506
+ runner_name="docker",
507
+ runtime_id=container_name,
508
+ metadata=metadata,
509
+ )
510
+
511
+
512
+ def _describe_runtime(
513
+ client: Any,
514
+ *,
515
+ runtime_id: str,
516
+ base_metadata: Mapping[str, Any],
517
+ ) -> RunnerRuntimeDescription:
518
+ metadata = dict(base_metadata)
519
+ container_id = metadata.get("container_id")
520
+ container = _lookup_container(
521
+ client,
522
+ runtime_id,
523
+ fallback_id=container_id if isinstance(container_id, str) else None,
524
+ )
525
+ if container is None:
526
+ return RunnerRuntimeDescription(
527
+ runner_name="docker",
528
+ runtime_id=runtime_id,
529
+ state="missing",
530
+ metadata=metadata,
531
+ )
532
+
533
+ container.reload()
534
+ attrs = container.attrs
535
+ state_payload = attrs.get("State") if isinstance(attrs, Mapping) else {}
536
+ if not isinstance(state_payload, Mapping):
537
+ state_payload = {}
538
+
539
+ metadata["container_id"] = container.id
540
+ metadata["container_name"] = attrs.get("Name", "").lstrip("/") or runtime_id
541
+ image = _image_name_from_attrs(attrs)
542
+ if image:
543
+ metadata["image"] = image
544
+
545
+ state = state_payload.get("Status")
546
+ if isinstance(state, str):
547
+ metadata["status"] = state
548
+ metadata["oom_killed"] = bool(state_payload.get("OOMKilled"))
549
+ metadata["exit_code"] = state_payload.get("ExitCode")
550
+ host_pid = state_payload.get("Pid")
551
+ if isinstance(host_pid, int) and host_pid > 0:
552
+ metadata["host_pid"] = host_pid
553
+ started_at = state_payload.get("StartedAt")
554
+ if isinstance(started_at, str) and started_at:
555
+ metadata["started_at"] = started_at
556
+ finished_at = state_payload.get("FinishedAt")
557
+ if isinstance(finished_at, str) and finished_at:
558
+ metadata["finished_at"] = finished_at
559
+ error = state_payload.get("Error")
560
+ if isinstance(error, str) and error:
561
+ metadata["engine_error"] = error
562
+ host_config = attrs.get("HostConfig") if isinstance(attrs, Mapping) else {}
563
+ if isinstance(host_config, Mapping):
564
+ network_mode = host_config.get("NetworkMode")
565
+ if isinstance(network_mode, str) and network_mode:
566
+ metadata["network_mode"] = network_mode
567
+
568
+ stats_metadata = _docker_stats_metadata(container.stats(stream=False))
569
+ metadata.update(stats_metadata)
570
+
571
+ return RunnerRuntimeDescription(
572
+ runner_name="docker",
573
+ runtime_id=runtime_id,
574
+ state=state if isinstance(state, str) else "unknown",
575
+ metadata=metadata,
576
+ )
577
+
578
+
579
+ def _image_name_from_attrs(attrs: Mapping[str, Any]) -> str | None:
580
+ config = attrs.get("Config")
581
+ if not isinstance(config, Mapping):
582
+ return None
583
+ image = config.get("Image")
584
+ return image if isinstance(image, str) and image else None
585
+
586
+
587
+ def _docker_stats_metadata(payload: Mapping[str, Any]) -> dict[str, Any]:
588
+ metadata: dict[str, Any] = {}
589
+ metrics = _stats_to_metrics(payload)
590
+ metadata["cpu_percent"] = round(metrics.cpu_percent, 2)
591
+ metadata["memory_usage_mb"] = round(metrics.memory_mb, 3)
592
+
593
+ memory_limit = _memory_limit_mb(payload)
594
+ if memory_limit is not None:
595
+ metadata["memory_limit_mb"] = memory_limit
596
+
597
+ pids = _pids_current(payload)
598
+ if pids is not None:
599
+ metadata["pids"] = pids
600
+
601
+ network_io = _network_io_bytes(payload)
602
+ if network_io is not None:
603
+ metadata["network_io_bytes"] = network_io
604
+
605
+ block_io = _block_io_bytes(payload)
606
+ if block_io is not None:
607
+ metadata["block_io_bytes"] = block_io
608
+
609
+ return metadata
610
+
611
+
612
+ def _stats_to_metrics(payload: Mapping[str, Any]) -> ResourceMetrics:
613
+ return ResourceMetrics(
614
+ timestamp=time.time_ns(),
615
+ memory_mb=_memory_usage_mb(payload),
616
+ cpu_percent=_cpu_percent(payload),
617
+ open_files=0,
618
+ connections=0,
619
+ )
620
+
621
+
622
+ def _memory_usage_mb(payload: Mapping[str, Any]) -> float:
623
+ memory_stats = payload.get("memory_stats")
624
+ if not isinstance(memory_stats, Mapping):
625
+ return 0.0
626
+ usage = memory_stats.get("usage")
627
+ if not isinstance(usage, (int, float)):
628
+ return 0.0
629
+ return round(float(usage) / (1024 * 1024), 3)
630
+
631
+
632
+ def _memory_limit_mb(payload: Mapping[str, Any]) -> float | None:
633
+ memory_stats = payload.get("memory_stats")
634
+ if not isinstance(memory_stats, Mapping):
635
+ return None
636
+ limit = memory_stats.get("limit")
637
+ if not isinstance(limit, (int, float)) or limit <= 0:
638
+ return None
639
+ return round(float(limit) / (1024 * 1024), 3)
640
+
641
+
642
+ def _cpu_percent(payload: Mapping[str, Any]) -> float:
643
+ cpu_stats = payload.get("cpu_stats")
644
+ precpu_stats = payload.get("precpu_stats")
645
+ if not isinstance(cpu_stats, Mapping) or not isinstance(precpu_stats, Mapping):
646
+ return 0.0
647
+
648
+ cpu_usage = cpu_stats.get("cpu_usage")
649
+ precpu_usage = precpu_stats.get("cpu_usage")
650
+ if not isinstance(cpu_usage, Mapping) or not isinstance(precpu_usage, Mapping):
651
+ return 0.0
652
+
653
+ total_usage = cpu_usage.get("total_usage")
654
+ previous_total = precpu_usage.get("total_usage")
655
+ system_usage = cpu_stats.get("system_cpu_usage")
656
+ previous_system = precpu_stats.get("system_cpu_usage")
657
+ if not isinstance(total_usage, (int, float)):
658
+ return 0.0
659
+ if not isinstance(previous_total, (int, float)):
660
+ return 0.0
661
+ if not isinstance(system_usage, (int, float)):
662
+ return 0.0
663
+ if not isinstance(previous_system, (int, float)):
664
+ return 0.0
665
+
666
+ total_usage_value = float(total_usage)
667
+ previous_total_value = float(previous_total)
668
+ system_usage_value = float(system_usage)
669
+ previous_system_value = float(previous_system)
670
+
671
+ cpu_delta = total_usage_value - previous_total_value
672
+ system_delta = system_usage_value - previous_system_value
673
+ if cpu_delta <= 0 or system_delta <= 0:
674
+ return 0.0
675
+
676
+ online_cpus = cpu_stats.get("online_cpus")
677
+ if not isinstance(online_cpus, int) or online_cpus <= 0:
678
+ percpu_usage = cpu_usage.get("percpu_usage")
679
+ if isinstance(percpu_usage, Sequence) and not isinstance(
680
+ percpu_usage, (str, bytes)
681
+ ):
682
+ online_cpus = max(len(percpu_usage), 1)
683
+ else:
684
+ online_cpus = 1
685
+
686
+ return round((cpu_delta / system_delta) * float(online_cpus) * 100.0, 2)
687
+
688
+
689
+ def _pids_current(payload: Mapping[str, Any]) -> int | None:
690
+ pids_stats = payload.get("pids_stats")
691
+ if not isinstance(pids_stats, Mapping):
692
+ return None
693
+ current = pids_stats.get("current")
694
+ if isinstance(current, int) and current >= 0:
695
+ return current
696
+ return None
697
+
698
+
699
+ def _network_io_bytes(payload: Mapping[str, Any]) -> dict[str, int] | None:
700
+ networks = payload.get("networks")
701
+ if not isinstance(networks, Mapping):
702
+ return None
703
+ rx_total = 0
704
+ tx_total = 0
705
+ seen = False
706
+ for value in networks.values():
707
+ if not isinstance(value, Mapping):
708
+ continue
709
+ rx_bytes = value.get("rx_bytes")
710
+ tx_bytes = value.get("tx_bytes")
711
+ if isinstance(rx_bytes, int) and rx_bytes >= 0:
712
+ rx_total += rx_bytes
713
+ seen = True
714
+ if isinstance(tx_bytes, int) and tx_bytes >= 0:
715
+ tx_total += tx_bytes
716
+ seen = True
717
+ if not seen:
718
+ return None
719
+ return {"rx": rx_total, "tx": tx_total}
720
+
721
+
722
+ def _block_io_bytes(payload: Mapping[str, Any]) -> dict[str, int] | None:
723
+ blkio_stats = payload.get("blkio_stats")
724
+ if not isinstance(blkio_stats, Mapping):
725
+ return None
726
+ entries = blkio_stats.get("io_service_bytes_recursive")
727
+ if not isinstance(entries, Sequence) or isinstance(entries, (str, bytes)):
728
+ return None
729
+ read_total = 0
730
+ write_total = 0
731
+ seen = False
732
+ for entry in entries:
733
+ if not isinstance(entry, Mapping):
734
+ continue
735
+ op = entry.get("op")
736
+ value = entry.get("value")
737
+ if not isinstance(op, str) or not isinstance(value, int):
738
+ continue
739
+ normalized = op.lower()
740
+ if normalized == "read":
741
+ read_total += value
742
+ seen = True
743
+ elif normalized == "write":
744
+ write_total += value
745
+ seen = True
746
+ if not seen:
747
+ return None
748
+ return {"read": read_total, "write": write_total}
749
+
750
+
751
+ def _lookup_container(
752
+ client: Any,
753
+ runtime_id: str,
754
+ *,
755
+ fallback_id: str | None = None,
756
+ ) -> Any | None:
757
+ docker = _load_docker_sdk()
758
+
759
+ def _get(identifier: str) -> Any | None:
760
+ try:
761
+ return client.containers.get(identifier)
762
+ except docker.errors.NotFound:
763
+ return None
764
+
765
+ container = _get(runtime_id)
766
+ if container is not None:
767
+ return container
768
+
769
+ if isinstance(fallback_id, str) and fallback_id and fallback_id != runtime_id:
770
+ container = _get(fallback_id)
771
+ if container is not None:
772
+ return container
773
+
774
+ list_method = getattr(client.containers, "list", None)
775
+ if not callable(list_method):
776
+ return None
777
+ try:
778
+ candidates = list_method(all=True, filters={"name": runtime_id})
779
+ except Exception: # pragma: no cover - defensive Docker API fallback
780
+ return None
781
+ for candidate in candidates:
782
+ attrs = getattr(candidate, "attrs", None)
783
+ if isinstance(attrs, Mapping):
784
+ name = attrs.get("Name")
785
+ if isinstance(name, str) and name.lstrip("/") == runtime_id:
786
+ return candidate
787
+ candidate_name = getattr(candidate, "name", None)
788
+ if isinstance(candidate_name, str) and candidate_name == runtime_id:
789
+ return candidate
790
+ return candidates[0] if candidates else None
791
+
792
+
793
+ def _wait_for_container(
794
+ client: Any,
795
+ *,
796
+ runtime_id: str,
797
+ process: subprocess.Popen[str],
798
+ ) -> Any | None:
799
+ deadline = time.monotonic() + _CONTAINER_LOOKUP_TIMEOUT
800
+ while time.monotonic() < deadline:
801
+ container = _lookup_container(client, runtime_id)
802
+ if container is not None:
803
+ return container
804
+ if process.poll() is not None:
805
+ break
806
+ time.sleep(_CONTAINER_LOOKUP_INTERVAL)
807
+ return _lookup_container(client, runtime_id)
808
+
809
+
810
+ def _docker_stop(client: Any, runtime_id: str, *, timeout: float | None) -> bool:
811
+ container = _lookup_container(client, runtime_id)
812
+ if container is None:
813
+ return False
814
+ try:
815
+ stop_timeout = max(int(timeout or 2.0), 1)
816
+ container.stop(timeout=stop_timeout)
817
+ except Exception: # pragma: no cover - Docker daemon edge conditions
818
+ return False
819
+ return True
820
+
821
+
822
+ def _docker_kill(client: Any, runtime_id: str) -> bool:
823
+ container = _lookup_container(client, runtime_id)
824
+ if container is None:
825
+ return False
826
+ try:
827
+ container.kill()
828
+ except Exception: # pragma: no cover - Docker daemon edge conditions
829
+ return False
830
+ return True
831
+
832
+
833
+ def _remove_container(client: Any, runtime_id: str) -> None:
834
+ container = _lookup_container(client, runtime_id)
835
+ if container is None:
836
+ return
837
+ try:
838
+ container.remove(force=True)
839
+ except Exception: # pragma: no cover - best-effort cleanup
840
+ return
841
+
842
+
843
+ @contextmanager
844
+ def _docker_client(*, timeout: int = 10) -> Iterator[Any]:
845
+ client = _docker_client_from_env(timeout=timeout)
846
+ try:
847
+ yield client
848
+ finally:
849
+ client.close()
850
+
851
+
852
+ def _docker_client_from_env(*, timeout: int = 10) -> Any:
853
+ docker = _load_docker_sdk()
854
+ return docker.from_env(version="auto", timeout=timeout)
855
+
856
+
857
+ def _load_docker_sdk() -> Any:
858
+ try:
859
+ import docker
860
+ except Exception as exc: # pragma: no cover - dependency guard
861
+ raise RuntimeError(
862
+ "Docker runner requires the Docker SDK for Python. Install weft[docker]."
863
+ ) from exc
864
+ return docker
865
+
866
+
867
+ def _resolve_docker_binary(docker_binary: str) -> str:
868
+ executable = shutil.which(docker_binary)
869
+ if executable is None:
870
+ raise ValueError(f"Docker binary '{docker_binary}' is not available on PATH")
871
+ return executable
872
+
873
+
874
+ def _validate_extra_docker_args(args: Sequence[str]) -> None:
875
+ reserved = {
876
+ "--cpus",
877
+ "--env",
878
+ "--interactive",
879
+ "--memory",
880
+ "--name",
881
+ "--network",
882
+ "--rm",
883
+ "--ulimit",
884
+ "--volume",
885
+ "--workdir",
886
+ "-e",
887
+ "-i",
888
+ "-v",
889
+ "-w",
890
+ }
891
+ for arg in args:
892
+ if arg in reserved:
893
+ raise ValueError(
894
+ f"Docker runner option '{arg}' is managed by TaskSpec fields and "
895
+ "cannot be passed through spec.runner.options.docker_args"
896
+ )
897
+
898
+
899
+ def _require_mapping(value: object, *, name: str) -> Mapping[str, Any]:
900
+ if not isinstance(value, Mapping):
901
+ raise ValueError(f"{name} must be an object")
902
+ return value
903
+
904
+
905
+ def _string_list(value: object, *, name: str) -> list[str]:
906
+ if value is None:
907
+ return []
908
+ if not isinstance(value, Sequence) or isinstance(value, (str, bytes)):
909
+ raise ValueError(f"{name} must be a list of strings")
910
+ return [str(item) for item in value]
911
+
912
+
913
+ def _limit_int(limits: Any | None, field_name: str) -> int | None:
914
+ value = _limit_value(limits, field_name)
915
+ if isinstance(value, int) and value > 0:
916
+ return value
917
+ return None
918
+
919
+
920
+ def _limit_value(limits: Any | None, field_name: str) -> Any | None:
921
+ if limits is None:
922
+ return None
923
+ return getattr(limits, field_name, None)