PyPI - zu-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

zu-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

zu_cli/__init__.py +0 -0
zu_cli/build.py +111 -0
zu_cli/config.py +738 -0
zu_cli/construct.py +318 -0
zu_cli/construct_sandbox.py +139 -0
zu_cli/contribute.py +104 -0
zu_cli/demo.py +373 -0
zu_cli/deploy.py +207 -0
zu_cli/explore.py +93 -0
zu_cli/guardrails.py +102 -0
zu_cli/harden.py +221 -0
zu_cli/main.py +1126 -0
zu_cli/mcp_server.py +444 -0
zu_cli/observe.py +69 -0
zu_cli/offline.py +335 -0
zu_cli/sandbox.py +276 -0
zu_cli/scaffold.py +116 -0
zu_cli/server.py +363 -0
zu_cli/trace.py +111 -0
zu_cli-0.1.0.dist-info/METADATA +26 -0
zu_cli-0.1.0.dist-info/RECORD +23 -0
zu_cli-0.1.0.dist-info/WHEEL +4 -0
zu_cli-0.1.0.dist-info/entry_points.txt +4 -0

zu_cli/offline.py ADDED Viewed

@@ -0,0 +1,335 @@
+"""Offline replay — run a whole agent against a captured ``fixtures/`` bundle, with no
+model and no network, at ~$0. The keystone of the construction sequence: capture once
+live, then iterate against fixtures freely.
+The bundle (``fixtures/capture.json``) is projected from a live run's event log by
+:func:`project_capture` (see ``zu capture``). :func:`rebind_offline` swaps the run's
+live model for a ``ScriptedProvider`` replaying the captured moves, and rebinds the
+off-box tools (``http_fetch``, ``render_dom``, ``browser``) to fixture doubles that
+replay the captured observations in order — reusing each tool's real class through its
+existing injection seam (``HttpFetch(transport=)``, ``RenderDom(backend=)``,
+``Browser(backend=)``), so tier, schema, egress and capability metadata stay exactly as
+in a live run. Detectors, validators and the event sink stay real — only the model and
+the off-box reach are doubled, so the loop, ``track.json`` recording and ``cost.jsonl``
+telemetry are exercised just as they are live.
+The browser tier never had an offline seam before — ``render_dom`` and ``http_fetch``
+did (see ``demo.py``), but the persistent ``browser`` session did not. The new
+:class:`FixtureSessionBackend` is that seam: an ordered observation replay, faithful to
+the loop's soft-miss handling, and LOUD on overrun (so a fixture that runs short fails
+the run instead of silently passing).
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+from zu_core.ports import ToolCall
+# The three off-box tools that need a fixture double; everything else a web agent
+# carries (html_parse, recall) is pure and runs unchanged offline.
+_DOUBLED_TOOLS = ("http_fetch", "render_dom", "browser")
+FIXTURES_DIR = "fixtures"
+BUNDLE_FILE = "capture.json"
+class OfflineError(RuntimeError):
+    """A fixtures bundle is missing or malformed — surfaced as a clean ConfigError
+    by the CLI so ``--offline`` without a capture fails with an actionable message."""
+# --- the bundle --------------------------------------------------------------
+@dataclass
+class Bundle:
+    """A captured run: the model's ``moves`` (ordered, for a ScriptedProvider) and the
+    per-tool ``observations`` (ordered, replayed by the tool doubles). ``task`` is the
+    query it was captured for; ``model`` is provenance — which model pathfound it."""
+    task: str
+    moves: list[dict] = field(default_factory=list)
+    observations: dict[str, list[dict]] = field(default_factory=dict)
+    model: str | None = None
+    def to_json(self) -> str:
+        return json.dumps(
+            {"task": self.task, "model": self.model,
+             "moves": self.moves, "observations": self.observations},
+            indent=2,
+        )
+    @classmethod
+    def from_json(cls, text: str) -> Bundle:
+        data = json.loads(text)
+        if not isinstance(data, dict):
+            raise OfflineError("capture.json must be a JSON object")
+        return cls(
+            task=data.get("task", ""),
+            model=data.get("model"),
+            moves=list(data.get("moves", [])),
+            observations={k: list(v) for k, v in (data.get("observations") or {}).items()},
+        )
+    def save(self, path: str | Path) -> None:
+        Path(path).write_text(self.to_json(), encoding="utf-8")
+    @classmethod
+    def load(cls, path: str | Path) -> Bundle:
+        p = Path(path)
+        try:
+            return cls.from_json(p.read_text(encoding="utf-8"))
+        except FileNotFoundError as exc:
+            raise OfflineError(
+                f"no fixtures bundle at {p} — run `zu capture` once (live) to record one "
+                "before `zu run --offline`."
+            ) from exc
+        except (ValueError, json.JSONDecodeError) as exc:
+            raise OfflineError(f"malformed fixtures bundle at {p}: {exc}") from exc
+def bundle_path(agent_dir: str | Path) -> Path:
+    """Where an agent's captured bundle lives: ``<agent_dir>/fixtures/capture.json``."""
+    return Path(agent_dir) / FIXTURES_DIR / BUNDLE_FILE
+# --- the ordered-observation cursor (shared by all three doubles) ------------
+class _Cursor:
+    """Pops a tool's recorded observations in invocation order. LOUD on overrun: when
+    the run asks for more observations than were captured, it returns an error
+    observation (not a silent repeat or empty) so the loop ends the run as a challenge
+    rather than passing a short fixture off as a success."""
+    def __init__(self, tool: str, observations: list[dict]) -> None:
+        self._tool = tool
+        self._obs = observations
+        self._i = 0
+    def next(self) -> dict:
+        if self._i >= len(self._obs):
+            return {"error": f"{self._tool} fixture overrun (recorded {len(self._obs)} "
+                             "observations; the offline run asked for more — the captured "
+                             "path is shorter than this run, re-capture with `zu capture`)"}
+        obs = self._obs[self._i]
+        self._i += 1
+        return dict(obs)
+# --- the browser seam: a persistent session over a recorded sequence ---------
+class _FixtureSession:
+    """A BrowserSessionHandle that replays a recorded observation sequence: each
+    ``send`` (op=open/act/read) returns the next captured browser observation. A
+    recorded soft miss (``action_error_kind == 'soft'``) replays verbatim, so the
+    loop's soft-miss tolerance (``loop._is_soft_miss``) sees it exactly as live."""
+    def __init__(self, cursor: _Cursor) -> None:
+        self._cursor = cursor
+    async def send(self, cmd: dict) -> dict:
+        return self._cursor.next()
+    async def close(self) -> None:
+        return None
+class FixtureSessionBackend:
+    """A SessionBackend double for the persistent ``browser`` tool — the offline seam
+    the browser tier never had. ``open_session`` hands back a session that replays the
+    recorded ``browser`` observations in order; the tool's own ``Browser._normalise``
+    shapes them, so the doubles need only emit the captured dicts.
+    One backend instance serves one run: each ``open_session`` shares the same ordered
+    cursor, so reopening mid-run continues the recorded sequence rather than rewinding
+    (a reopen during construction is a wasted step, not a reset of the fixture)."""
+    name = "fixture-browser-session"
+    def __init__(self, observations: list[dict]) -> None:
+        self._cursor = _Cursor("browser", observations)
+    async def open_session(self, spec: dict) -> _FixtureSession:
+        return _FixtureSession(self._cursor)
+# --- the render_dom seam: a one-shot SandboxBackend over a recorded sequence --
+class FixtureRenderBackend:
+    """A one-shot SandboxBackend double for ``render_dom`` — the ``demo._FixtureBrowser``
+    pattern, but data-driven from a captured sequence instead of a single constant.
+    ``exec`` returns the next recorded ``render_dom`` observation; ``RenderDom`` re-adds
+    ``rendered: True`` and copies the content keys, so the captured tool output round-
+    trips faithfully."""
+    name = "fixture-render"
+    def __init__(self, observations: list[dict]) -> None:
+        self._cursor = _Cursor("render_dom", observations)
+        self.launched: list[dict] = []
+        self.destroyed = 0
+    async def launch(self, spec: dict) -> dict:
+        self.launched.append(spec)
+        return {"id": f"sbx-{len(self.launched)}", "spec": spec}
+    async def exec(self, sandbox: dict, call: ToolCall) -> dict:
+        return self._cursor.next()
+    async def destroy(self, sandbox: dict) -> None:
+        self.destroyed += 1
+# --- the http_fetch seam: a MockTransport over a recorded sequence -----------
+def _fetch_transport(observations: list[dict]) -> Any:
+    """An ``httpx.MockTransport`` whose handler replays the recorded ``http_fetch``
+    observations in order — the ``demo.py`` handler pattern, list-driven. The captured
+    observation carries the fetched ``html`` and ``status``; the real ``HttpFetch``
+    re-reads the body, so feeding it back as the response text round-trips."""
+    import httpx
+    cursor = _Cursor("http_fetch", observations)
+    def handler(request: httpx.Request) -> httpx.Response:
+        obs = cursor.next()
+        if "error" in obs:
+            # Overrun → a 5xx so HttpFetch surfaces it as an error observation and the
+            # loop ends the run (rather than the fixture silently running short).
+            return httpx.Response(502, text=obs["error"])
+        return httpx.Response(int(obs.get("status", 200)), text=str(obs.get("html", "")))
+    return httpx.MockTransport(handler)
+# --- rebinding the registry + provider for an offline run --------------------
+def rebind_offline(registry: Any, bundle: Bundle) -> Any:
+    """Rebind an assembled run for offline replay and return the ScriptedProvider that
+    replaces the live model. Mutates ``registry`` in place: for each off-box tool the
+    agent declares, re-register a fixture double bound to that tool's recorded
+    observations (all ``allow_private=True`` — the offline host is non-resolvable, as in
+    the offline demo). Detectors, validators and the sink are left untouched."""
+    import logging
+    from zu_providers.scripted import ScriptedProvider
+    obs = bundle.observations
+    present = set(registry.names("tools"))
+    def _swap(name: str, double: Any) -> None:
+        # Preserve the agent's tier stamp: build_registry put the tool at the tier the
+        # agent declared (which may differ from the tool's class default), and the
+        # ladder gates tools by it — the double must sit at the same rung.
+        double.tier = getattr(registry.get("tools", name), "tier", double.tier)
+        # Replacing the real tool with its fixture double is the WHOLE point here, so
+        # silence the registry's shadow-collision warning for this deliberate swap.
+        reg_log = logging.getLogger("zu.registry")
+        prev = reg_log.level
+        reg_log.setLevel(logging.ERROR)
+        try:
+            registry.register("tools", name, double)
+        finally:
+            reg_log.setLevel(prev)
+    if "http_fetch" in present:
+        from zu_tools.fetch import HttpFetch
+        _swap("http_fetch", HttpFetch(
+            allow_private=True, transport=_fetch_transport(obs.get("http_fetch", []))))
+    if "render_dom" in present:
+        from zu_tools.render import RenderDom
+        _swap("render_dom", RenderDom(
+            backend=FixtureRenderBackend(obs.get("render_dom", [])), allow_private=True))
+    if "browser" in present:
+        from zu_tools.browser import Browser
+        _swap("browser", Browser(
+            backend=FixtureSessionBackend(obs.get("browser", [])), allow_private=True))
+    return ScriptedProvider.from_moves(bundle.moves)
+# --- projecting a live run's event log into a bundle (the capture half) ------
+def project_capture(events: list[Any], result: Any, *, task: str, model: str | None = None) -> Bundle:
+    """Project a live run's event log + result into a replayable bundle — the capture
+    counterpart to ``record_track`` (same ``harness.tool.invoked`` events for the
+    moves; the paired ``harness.tool.returned`` events for the observations).
+    ``moves`` is one ScriptedProvider move per tool invocation, in order, followed by a
+    final text move carrying the run's result value — so an offline replay reproduces
+    both the navigation and the extraction. ``observations[tool]`` is the ordered list
+    of that tool's returned observations; a ``browser`` ``op=close`` returns without a
+    session ``send``, so its observation is skipped to keep the replay sequence aligned.
+    Assumes sequential tool use (one call per model turn) — the same shape
+    ``record_track`` projects and the construction loop produces; a single turn that
+    fans out parallel tool calls is not captured faithfully."""
+    moves: list[dict] = []
+    observations: dict[str, list[dict]] = {}
+    pending: dict | None = None
+    for ev in events:
+        type_ = getattr(ev, "type", "")
+        payload = getattr(ev, "payload", {}) or {}
+        if type_ == "harness.tool.invoked":
+            tool = payload.get("tool")
+            if not tool:
+                continue
+            args = dict(payload.get("args", {}))
+            moves.append({"tool": tool, "args": args})
+            pending = {"tool": tool, "args": args}
+        elif type_ == "harness.tool.returned":
+            tool = payload.get("tool")
+            if not tool:
+                continue
+            obs = payload.get("observation")
+            is_close = bool(pending and pending["tool"] == "browser"
+                            and pending["args"].get("op") == "close")
+            if isinstance(obs, dict) and not is_close:
+                observations.setdefault(tool, []).append(dict(obs))
+            pending = None
+    value = getattr(result, "value", None)
+    if value is not None:
+        moves.append({"text": json.dumps(value), "finish": "stop"})
+    return Bundle(task=task, moves=moves, observations=observations, model=model)
+# --- the reusable offline runner ---------------------------------------------
+async def replay_offline(spec: Any, cfg: Any, bundle: Bundle) -> tuple[Any, list]:
+    """Run an agent offline against ``bundle`` and return ``(result, events)``. Builds a
+    fresh registry, rebinds it to the bundle, and drives the real loop on a sink-free
+    bus — no model, no network, no filesystem writes. The reusable core behind
+    ``zu run --offline`` (the keystone) and ``zu harden`` (replaying perturbed bundles)."""
+    from zu_core.bus import EventBus
+    from zu_core.loop import run_task
+    from .config import build_registry
+    registry = build_registry(cfg)
+    provider = rebind_offline(registry, bundle)
+    bus = EventBus()
+    try:
+        result = await run_task(
+            spec, provider, registry, bus,
+            containment=cfg.containment,
+            max_observation_chars=cfg.max_observation_chars,
+            observation_strategy=cfg.observation_strategy,
+            max_context_chars=cfg.max_context_chars,
+        )
+        return result, await bus.query()
+    finally:
+        await bus.aclose()

zu_cli/sandbox.py ADDED Viewed

@@ -0,0 +1,276 @@
+"""Whole-agent-in-container containment — the launcher for ``containment: required``.
+How runs actually get contained: the agent cannot police a hostile *tool* in
+process — a tool is just Python running in your interpreter, so by the time the
+loop sees a call the tool's code has already run. Real tool containment is an OS
+boundary. This launcher runs the **entire agent inside a hardened container**
+whose only route off-box is an egress proxy on an internal (default-DROP)
+network, with all caps dropped, no-new-privileges, and a blocking seccomp
+profile. Inside that box ``ZU_SANDBOXED=1`` is set, so the fail-closed floor
+(:func:`zu_core.security.enforce_containment`) is satisfied and tools may run —
+the container, not the loop, is what contains them.
+Two halves:
+* :func:`run_contained_from_env` — the in-container entrypoint (console script
+  ``zu-run-contained``). Reads the task + config from the environment, runs the
+  agent, and writes ``{"result": ..., "events": [...]}`` as one JSON object on
+  stdout. It runs *as contained* only because the launcher set ``ZU_SANDBOXED``.
+* :class:`SandboxLauncher` — the host side. Launches the proxy, launches the
+  hardened container on the internal network, execs the entrypoint with the
+  task/config in its env, parses the Result back, and tears everything down.
+The backend and proxy are injected (``LocalDockerBackend`` + ``LocalEgressProxy``
+in production, fakes in tests), so the orchestration is exercised without a
+daemon. The Docker daemon itself is the only un-fakeable part — the same P0/P1
+boundary the red-team container form documents.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from zu_core.contracts import Result
+from zu_core.loop import run_task
+from zu_core.security import SANDBOX_ENV
+def _seccomp_block_profile() -> str:
+    """The host path of the shipped blocking seccomp profile (Docker reads the
+    profile from the client host). Resolved lazily so importing this module never
+    requires zu-backends to be installed."""
+    import zu_backends
+    return str(Path(zu_backends.__file__).parent / "seccomp" / "redteam-block.json")
+def _last_json_object(out: str) -> dict:
+    """Parse the last non-empty line of stdout as the result JSON. The entrypoint
+    writes exactly one JSON object, but taking the last line tolerates any
+    incidental log line the image might print before it."""
+    for line in reversed(out.splitlines()):
+        line = line.strip()
+        if line:
+            return json.loads(line)
+    raise ValueError("no output to parse")
+async def _run_in_process(task: dict, config: dict) -> tuple[Result, list]:
+    """Run one task in this process and return (Result, events). Used by the
+    in-container entrypoint, where ``ZU_SANDBOXED`` is already set by the launcher
+    so the containment floor passes."""
+    from .config import assemble, coerce_config, coerce_task
+    cfg = coerce_config(config)
+    spec = coerce_task(task, cfg.budget, allow_paths=False)
+    provider, registry, bus, providers = assemble(cfg)
+    try:
+        result = await run_task(
+            spec, provider, registry, bus,
+            providers=providers, containment=cfg.containment,
+            max_observation_chars=cfg.max_observation_chars,
+            observation_strategy=cfg.observation_strategy,
+            max_context_chars=cfg.max_context_chars,
+        )
+        return result, await bus.query()
+    finally:
+        await bus.aclose()
+def run_contained_from_env(argv: list[str] | None = None) -> int:
+    """Console-script entrypoint (``zu-run-contained``) executed INSIDE the
+    container. Reads ``ZU_TASK`` / ``ZU_CONFIG`` (JSON) from the environment, runs
+    the agent, and emits the Result + event log as one JSON object on stdout."""
+    # The launcher mounts a bundle at ZU_BUNDLE and sets PYTHONPATH to it; add it
+    # to sys.path explicitly too, so the bundle's tools/ import-refs resolve even
+    # if PYTHONPATH wasn't honored by the exec environment.
+    bundle = os.environ.get("ZU_BUNDLE")
+    if bundle:
+        # The mounted bundle carries its own gitignored .env (e.g. EXA_API_KEY);
+        # load it so the agent's tools find their keys inside the box, then put the
+        # bundle on the path so its tools/ import-refs resolve.
+        from .config import load_dotenv
+        load_dotenv(Path(bundle) / ".env")
+        if bundle not in sys.path:
+            sys.path.insert(0, bundle)
+    task = json.loads(os.environ.get("ZU_TASK") or "{}")
+    config = json.loads(os.environ.get("ZU_CONFIG") or "{}")
+    result, events = asyncio.run(_run_in_process(task, config))
+    json.dump(
+        {
+            "result": result.model_dump(mode="json"),
+            "events": [e.model_dump(mode="json") for e in events],
+        },
+        sys.stdout,
+        default=str,
+    )
+    sys.stdout.write("\n")
+    return 0
+@dataclass
+class SandboxLauncher:
+    """Run the whole agent inside a hardened container whose sole egress is a proxy
+    SIDECAR on an internal (default-DROP) network — the faithful topology
+    (RED_TEAM_CONTAINER.md §3), the same one ``SidecarContainerGate`` enforces.
+    A host-side proxy cannot be the sole egress of an ``--internal`` container, so
+    the proxy runs as its own container on the internal network (the target's only
+    route off-box) with a second leg on bridge so IT — and only it — reaches the
+    outside. The target is internal-only, routed through the proxy by name, kept
+    alive with ``sleep infinity`` so we exec ``zu-run-contained`` into it.
+    ``backend`` is a ``LocalDockerBackend`` (its docker client manages the network
+    and sidecar). ``allowlist`` on :meth:`run` is what the proxy permits — the real
+    egress boundary; every other host (and every internal/metadata host) is refused
+    and logged. ``"*"`` permits any host: pass an explicit list for a real boundary."""
+    backend: Any
+    image: str
+    network_name: str = "zu-sandbox-net"
+    proxy_port: int = 8080
+    seccomp: str | None = None          # None -> the shipped blocking profile
+    exec_timeout_s: float | None = None
+    ready_timeout_s: float = 20.0
+    async def run(
+        self, task: dict, config: dict, *, allowlist: list[str], bundle_dir: str | None = None
+    ) -> tuple[Result, list[dict]]:
+        """Run a whole AGENT inside the box (the ``zu-run-contained`` entrypoint) and parse
+        its Result + event log back. A thin wrapper over :meth:`run_entrypoint`."""
+        payload = await self.run_entrypoint(
+            ["zu-run-contained"],
+            {"ZU_TASK": json.dumps(task), "ZU_CONFIG": json.dumps(config)},
+            allowlist=allowlist, bundle_dir=bundle_dir,
+        )
+        result = Result.model_validate(payload["result"])
+        return result, payload.get("events", [])
+    async def run_entrypoint(
+        self, entrypoint: list[str], exec_env: dict, *, allowlist: list[str],
+        bundle_dir: str | None = None,
+    ) -> dict:
+        """Launch the hardened box (internal network + egress-proxy sidecar, caps dropped,
+        blocking seccomp), exec ``entrypoint`` inside it with the contained-floor + proxy env
+        merged with ``exec_env``, and return the last JSON object it wrote to stdout. The
+        reusable core of every contained run — an agent run (:meth:`run`) and contained
+        construction (``construct_sandbox.launch_contained_construction``) both go through
+        here, so the container/egress topology lives in exactly one audited place."""
+        client = self.backend._docker()
+        proxy_name = f"{self.network_name}-proxy"
+        # Clear any resources a crashed prior run may have left behind.
+        await self._remove_container(client, proxy_name)
+        await self._remove_network(client, self.network_name)
+        net = await asyncio.to_thread(client.networks.create, self.network_name, internal=True)
+        proxy = None
+        sandbox = None
+        try:
+            # The egress-proxy sidecar: a STABLE name (so the target resolves it via
+            # the internal network's embedded DNS), on the internal network, plus a
+            # bridge leg so it — and only it — reaches the outside.
+            proxy_env = {
+                "ZU_EGRESS_ALLOWLIST": ",".join(allowlist),
+                "ZU_EGRESS_PORT": str(self.proxy_port),
+            }
+            # The proxy is trusted control-plane infra (the egress boundary itself),
+            # run as root so it can bind/log/write regardless of the image's default
+            # user. The untrusted target below keeps the image's non-root user.
+            proxy = await asyncio.to_thread(
+                client.containers.run, self.image, ["zu-egress-proxy"], name=proxy_name,
+                network=self.network_name, environment=proxy_env, user="0", detach=True)
+            bridge = await asyncio.to_thread(client.networks.get, "bridge")
+            await asyncio.to_thread(bridge.connect, proxy)
+            await self._await_proxy_ready(proxy)
+            # The target: internal-only (the proxy is the only route off-box), caps
+            # dropped + blocking seccomp, kept alive so we exec the entrypoint into it.
+            target_spec: dict = {
+                "image": self.image,
+                "network": "isolated",
+                "network_name": self.network_name,
+                "proxy": {"host": proxy_name, "port": self.proxy_port},
+                "seccomp": self.seccomp or _seccomp_block_profile(),
+                "command": ["sleep", "infinity"],
+            }
+            # A bundle's own tools/ are not in the image — mount the bundle dir
+            # READ-ONLY at /bundle so the agent's `tools.x:Class` import-refs
+            # resolve inside the box. The user owns this code; the mount is ro and
+            # the container is still caps-dropped + egress-gated.
+            if bundle_dir is not None:
+                target_spec["volumes"] = {
+                    str(Path(bundle_dir).resolve()): {"bind": "/bundle", "mode": "ro"}
+                }
+            sandbox = await self.backend.launch(target_spec)
+            # `docker exec` does not inherit the container's runtime proxy env, so
+            # pass it explicitly. ZU_SANDBOXED marks the run contained — set HERE,
+            # where the boundary is actually established, never baked into the image.
+            proxy_url = f"http://{proxy_name}:{self.proxy_port}"
+            env = {
+                SANDBOX_ENV: "1",
+                "HTTP_PROXY": proxy_url, "HTTPS_PROXY": proxy_url,
+                "http_proxy": proxy_url, "https_proxy": proxy_url,
+                "NO_PROXY": "localhost,127.0.0.1",
+            }
+            if bundle_dir is not None:
+                # Put the mounted bundle on the path so its tools/ import-refs resolve.
+                env["PYTHONPATH"] = "/bundle"
+                env["ZU_BUNDLE"] = "/bundle"
+            env.update(exec_env)  # the caller's entrypoint-specific vars (task/config, opts)
+            code, out, err = await self.backend.exec_entrypoint(
+                sandbox, entrypoint, environment=env, timeout_s=self.exec_timeout_s,
+            )
+            if not out.strip():
+                raise RuntimeError(
+                    f"contained run produced no output (exit {code}): {err[:300]}"
+                )
+            return _last_json_object(out)
+        finally:
+            if sandbox is not None:
+                await self.backend.destroy(sandbox)
+            await self._best_effort(proxy, "remove", force=True)
+            await self._best_effort(net, "remove")
+    async def _await_proxy_ready(self, proxy: Any) -> None:
+        deadline = time.monotonic() + self.ready_timeout_s
+        while time.monotonic() < deadline:
+            await asyncio.to_thread(proxy.reload)
+            logs = (await asyncio.to_thread(proxy.logs)).decode("utf-8", "replace")
+            if "proxy.ready" in logs:
+                return
+            if getattr(proxy, "status", "") in ("exited", "dead"):
+                raise RuntimeError(f"proxy sidecar exited before ready: {logs[-300:]}")
+            await asyncio.sleep(0.2)
+        raise RuntimeError("proxy sidecar did not become ready in time")
+    @staticmethod
+    async def _best_effort(obj: Any, method: str, **kw: Any) -> None:
+        if obj is None:
+            return
+        try:
+            await asyncio.to_thread(getattr(obj, method), **kw)
+        except Exception:  # noqa: BLE001 - teardown must not raise over the result
+            pass
+    @staticmethod
+    async def _remove_container(client: Any, name: str) -> None:
+        try:
+            c = await asyncio.to_thread(client.containers.get, name)
+            await asyncio.to_thread(c.remove, force=True)
+        except Exception:  # noqa: BLE001 - absent is the normal case
+            pass
+    @staticmethod
+    async def _remove_network(client: Any, name: str) -> None:
+        try:
+            n = await asyncio.to_thread(client.networks.get, name)
+            await asyncio.to_thread(n.remove)
+        except Exception:  # noqa: BLE001 - absent is the normal case
+            pass