zu-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_cli/offline.py ADDED
@@ -0,0 +1,335 @@
1
+ """Offline replay — run a whole agent against a captured ``fixtures/`` bundle, with no
2
+ model and no network, at ~$0. The keystone of the construction sequence: capture once
3
+ live, then iterate against fixtures freely.
4
+
5
+ The bundle (``fixtures/capture.json``) is projected from a live run's event log by
6
+ :func:`project_capture` (see ``zu capture``). :func:`rebind_offline` swaps the run's
7
+ live model for a ``ScriptedProvider`` replaying the captured moves, and rebinds the
8
+ off-box tools (``http_fetch``, ``render_dom``, ``browser``) to fixture doubles that
9
+ replay the captured observations in order — reusing each tool's real class through its
10
+ existing injection seam (``HttpFetch(transport=)``, ``RenderDom(backend=)``,
11
+ ``Browser(backend=)``), so tier, schema, egress and capability metadata stay exactly as
12
+ in a live run. Detectors, validators and the event sink stay real — only the model and
13
+ the off-box reach are doubled, so the loop, ``track.json`` recording and ``cost.jsonl``
14
+ telemetry are exercised just as they are live.
15
+
16
+ The browser tier never had an offline seam before — ``render_dom`` and ``http_fetch``
17
+ did (see ``demo.py``), but the persistent ``browser`` session did not. The new
18
+ :class:`FixtureSessionBackend` is that seam: an ordered observation replay, faithful to
19
+ the loop's soft-miss handling, and LOUD on overrun (so a fixture that runs short fails
20
+ the run instead of silently passing).
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ from dataclasses import dataclass, field
27
+ from pathlib import Path
28
+ from typing import Any
29
+
30
+ from zu_core.ports import ToolCall
31
+
32
+ # The three off-box tools that need a fixture double; everything else a web agent
33
+ # carries (html_parse, recall) is pure and runs unchanged offline.
34
+ _DOUBLED_TOOLS = ("http_fetch", "render_dom", "browser")
35
+
36
+ FIXTURES_DIR = "fixtures"
37
+ BUNDLE_FILE = "capture.json"
38
+
39
+
40
+ class OfflineError(RuntimeError):
41
+ """A fixtures bundle is missing or malformed — surfaced as a clean ConfigError
42
+ by the CLI so ``--offline`` without a capture fails with an actionable message."""
43
+
44
+
45
+ # --- the bundle --------------------------------------------------------------
46
+
47
+
48
+ @dataclass
49
+ class Bundle:
50
+ """A captured run: the model's ``moves`` (ordered, for a ScriptedProvider) and the
51
+ per-tool ``observations`` (ordered, replayed by the tool doubles). ``task`` is the
52
+ query it was captured for; ``model`` is provenance — which model pathfound it."""
53
+
54
+ task: str
55
+ moves: list[dict] = field(default_factory=list)
56
+ observations: dict[str, list[dict]] = field(default_factory=dict)
57
+ model: str | None = None
58
+
59
+ def to_json(self) -> str:
60
+ return json.dumps(
61
+ {"task": self.task, "model": self.model,
62
+ "moves": self.moves, "observations": self.observations},
63
+ indent=2,
64
+ )
65
+
66
+ @classmethod
67
+ def from_json(cls, text: str) -> Bundle:
68
+ data = json.loads(text)
69
+ if not isinstance(data, dict):
70
+ raise OfflineError("capture.json must be a JSON object")
71
+ return cls(
72
+ task=data.get("task", ""),
73
+ model=data.get("model"),
74
+ moves=list(data.get("moves", [])),
75
+ observations={k: list(v) for k, v in (data.get("observations") or {}).items()},
76
+ )
77
+
78
+ def save(self, path: str | Path) -> None:
79
+ Path(path).write_text(self.to_json(), encoding="utf-8")
80
+
81
+ @classmethod
82
+ def load(cls, path: str | Path) -> Bundle:
83
+ p = Path(path)
84
+ try:
85
+ return cls.from_json(p.read_text(encoding="utf-8"))
86
+ except FileNotFoundError as exc:
87
+ raise OfflineError(
88
+ f"no fixtures bundle at {p} — run `zu capture` once (live) to record one "
89
+ "before `zu run --offline`."
90
+ ) from exc
91
+ except (ValueError, json.JSONDecodeError) as exc:
92
+ raise OfflineError(f"malformed fixtures bundle at {p}: {exc}") from exc
93
+
94
+
95
+ def bundle_path(agent_dir: str | Path) -> Path:
96
+ """Where an agent's captured bundle lives: ``<agent_dir>/fixtures/capture.json``."""
97
+ return Path(agent_dir) / FIXTURES_DIR / BUNDLE_FILE
98
+
99
+
100
+ # --- the ordered-observation cursor (shared by all three doubles) ------------
101
+
102
+
103
+ class _Cursor:
104
+ """Pops a tool's recorded observations in invocation order. LOUD on overrun: when
105
+ the run asks for more observations than were captured, it returns an error
106
+ observation (not a silent repeat or empty) so the loop ends the run as a challenge
107
+ rather than passing a short fixture off as a success."""
108
+
109
+ def __init__(self, tool: str, observations: list[dict]) -> None:
110
+ self._tool = tool
111
+ self._obs = observations
112
+ self._i = 0
113
+
114
+ def next(self) -> dict:
115
+ if self._i >= len(self._obs):
116
+ return {"error": f"{self._tool} fixture overrun (recorded {len(self._obs)} "
117
+ "observations; the offline run asked for more — the captured "
118
+ "path is shorter than this run, re-capture with `zu capture`)"}
119
+ obs = self._obs[self._i]
120
+ self._i += 1
121
+ return dict(obs)
122
+
123
+
124
+ # --- the browser seam: a persistent session over a recorded sequence ---------
125
+
126
+
127
+ class _FixtureSession:
128
+ """A BrowserSessionHandle that replays a recorded observation sequence: each
129
+ ``send`` (op=open/act/read) returns the next captured browser observation. A
130
+ recorded soft miss (``action_error_kind == 'soft'``) replays verbatim, so the
131
+ loop's soft-miss tolerance (``loop._is_soft_miss``) sees it exactly as live."""
132
+
133
+ def __init__(self, cursor: _Cursor) -> None:
134
+ self._cursor = cursor
135
+
136
+ async def send(self, cmd: dict) -> dict:
137
+ return self._cursor.next()
138
+
139
+ async def close(self) -> None:
140
+ return None
141
+
142
+
143
+ class FixtureSessionBackend:
144
+ """A SessionBackend double for the persistent ``browser`` tool — the offline seam
145
+ the browser tier never had. ``open_session`` hands back a session that replays the
146
+ recorded ``browser`` observations in order; the tool's own ``Browser._normalise``
147
+ shapes them, so the doubles need only emit the captured dicts.
148
+
149
+ One backend instance serves one run: each ``open_session`` shares the same ordered
150
+ cursor, so reopening mid-run continues the recorded sequence rather than rewinding
151
+ (a reopen during construction is a wasted step, not a reset of the fixture)."""
152
+
153
+ name = "fixture-browser-session"
154
+
155
+ def __init__(self, observations: list[dict]) -> None:
156
+ self._cursor = _Cursor("browser", observations)
157
+
158
+ async def open_session(self, spec: dict) -> _FixtureSession:
159
+ return _FixtureSession(self._cursor)
160
+
161
+
162
+ # --- the render_dom seam: a one-shot SandboxBackend over a recorded sequence --
163
+
164
+
165
+ class FixtureRenderBackend:
166
+ """A one-shot SandboxBackend double for ``render_dom`` — the ``demo._FixtureBrowser``
167
+ pattern, but data-driven from a captured sequence instead of a single constant.
168
+ ``exec`` returns the next recorded ``render_dom`` observation; ``RenderDom`` re-adds
169
+ ``rendered: True`` and copies the content keys, so the captured tool output round-
170
+ trips faithfully."""
171
+
172
+ name = "fixture-render"
173
+
174
+ def __init__(self, observations: list[dict]) -> None:
175
+ self._cursor = _Cursor("render_dom", observations)
176
+ self.launched: list[dict] = []
177
+ self.destroyed = 0
178
+
179
+ async def launch(self, spec: dict) -> dict:
180
+ self.launched.append(spec)
181
+ return {"id": f"sbx-{len(self.launched)}", "spec": spec}
182
+
183
+ async def exec(self, sandbox: dict, call: ToolCall) -> dict:
184
+ return self._cursor.next()
185
+
186
+ async def destroy(self, sandbox: dict) -> None:
187
+ self.destroyed += 1
188
+
189
+
190
+ # --- the http_fetch seam: a MockTransport over a recorded sequence -----------
191
+
192
+
193
+ def _fetch_transport(observations: list[dict]) -> Any:
194
+ """An ``httpx.MockTransport`` whose handler replays the recorded ``http_fetch``
195
+ observations in order — the ``demo.py`` handler pattern, list-driven. The captured
196
+ observation carries the fetched ``html`` and ``status``; the real ``HttpFetch``
197
+ re-reads the body, so feeding it back as the response text round-trips."""
198
+ import httpx
199
+
200
+ cursor = _Cursor("http_fetch", observations)
201
+
202
+ def handler(request: httpx.Request) -> httpx.Response:
203
+ obs = cursor.next()
204
+ if "error" in obs:
205
+ # Overrun → a 5xx so HttpFetch surfaces it as an error observation and the
206
+ # loop ends the run (rather than the fixture silently running short).
207
+ return httpx.Response(502, text=obs["error"])
208
+ return httpx.Response(int(obs.get("status", 200)), text=str(obs.get("html", "")))
209
+
210
+ return httpx.MockTransport(handler)
211
+
212
+
213
+ # --- rebinding the registry + provider for an offline run --------------------
214
+
215
+
216
+ def rebind_offline(registry: Any, bundle: Bundle) -> Any:
217
+ """Rebind an assembled run for offline replay and return the ScriptedProvider that
218
+ replaces the live model. Mutates ``registry`` in place: for each off-box tool the
219
+ agent declares, re-register a fixture double bound to that tool's recorded
220
+ observations (all ``allow_private=True`` — the offline host is non-resolvable, as in
221
+ the offline demo). Detectors, validators and the sink are left untouched."""
222
+ import logging
223
+
224
+ from zu_providers.scripted import ScriptedProvider
225
+
226
+ obs = bundle.observations
227
+ present = set(registry.names("tools"))
228
+
229
+ def _swap(name: str, double: Any) -> None:
230
+ # Preserve the agent's tier stamp: build_registry put the tool at the tier the
231
+ # agent declared (which may differ from the tool's class default), and the
232
+ # ladder gates tools by it — the double must sit at the same rung.
233
+ double.tier = getattr(registry.get("tools", name), "tier", double.tier)
234
+ # Replacing the real tool with its fixture double is the WHOLE point here, so
235
+ # silence the registry's shadow-collision warning for this deliberate swap.
236
+ reg_log = logging.getLogger("zu.registry")
237
+ prev = reg_log.level
238
+ reg_log.setLevel(logging.ERROR)
239
+ try:
240
+ registry.register("tools", name, double)
241
+ finally:
242
+ reg_log.setLevel(prev)
243
+
244
+ if "http_fetch" in present:
245
+ from zu_tools.fetch import HttpFetch
246
+
247
+ _swap("http_fetch", HttpFetch(
248
+ allow_private=True, transport=_fetch_transport(obs.get("http_fetch", []))))
249
+ if "render_dom" in present:
250
+ from zu_tools.render import RenderDom
251
+
252
+ _swap("render_dom", RenderDom(
253
+ backend=FixtureRenderBackend(obs.get("render_dom", [])), allow_private=True))
254
+ if "browser" in present:
255
+ from zu_tools.browser import Browser
256
+
257
+ _swap("browser", Browser(
258
+ backend=FixtureSessionBackend(obs.get("browser", [])), allow_private=True))
259
+
260
+ return ScriptedProvider.from_moves(bundle.moves)
261
+
262
+
263
+ # --- projecting a live run's event log into a bundle (the capture half) ------
264
+
265
+
266
+ def project_capture(events: list[Any], result: Any, *, task: str, model: str | None = None) -> Bundle:
267
+ """Project a live run's event log + result into a replayable bundle — the capture
268
+ counterpart to ``record_track`` (same ``harness.tool.invoked`` events for the
269
+ moves; the paired ``harness.tool.returned`` events for the observations).
270
+
271
+ ``moves`` is one ScriptedProvider move per tool invocation, in order, followed by a
272
+ final text move carrying the run's result value — so an offline replay reproduces
273
+ both the navigation and the extraction. ``observations[tool]`` is the ordered list
274
+ of that tool's returned observations; a ``browser`` ``op=close`` returns without a
275
+ session ``send``, so its observation is skipped to keep the replay sequence aligned.
276
+
277
+ Assumes sequential tool use (one call per model turn) — the same shape
278
+ ``record_track`` projects and the construction loop produces; a single turn that
279
+ fans out parallel tool calls is not captured faithfully."""
280
+ moves: list[dict] = []
281
+ observations: dict[str, list[dict]] = {}
282
+ pending: dict | None = None
283
+ for ev in events:
284
+ type_ = getattr(ev, "type", "")
285
+ payload = getattr(ev, "payload", {}) or {}
286
+ if type_ == "harness.tool.invoked":
287
+ tool = payload.get("tool")
288
+ if not tool:
289
+ continue
290
+ args = dict(payload.get("args", {}))
291
+ moves.append({"tool": tool, "args": args})
292
+ pending = {"tool": tool, "args": args}
293
+ elif type_ == "harness.tool.returned":
294
+ tool = payload.get("tool")
295
+ if not tool:
296
+ continue
297
+ obs = payload.get("observation")
298
+ is_close = bool(pending and pending["tool"] == "browser"
299
+ and pending["args"].get("op") == "close")
300
+ if isinstance(obs, dict) and not is_close:
301
+ observations.setdefault(tool, []).append(dict(obs))
302
+ pending = None
303
+ value = getattr(result, "value", None)
304
+ if value is not None:
305
+ moves.append({"text": json.dumps(value), "finish": "stop"})
306
+ return Bundle(task=task, moves=moves, observations=observations, model=model)
307
+
308
+
309
+ # --- the reusable offline runner ---------------------------------------------
310
+
311
+
312
+ async def replay_offline(spec: Any, cfg: Any, bundle: Bundle) -> tuple[Any, list]:
313
+ """Run an agent offline against ``bundle`` and return ``(result, events)``. Builds a
314
+ fresh registry, rebinds it to the bundle, and drives the real loop on a sink-free
315
+ bus — no model, no network, no filesystem writes. The reusable core behind
316
+ ``zu run --offline`` (the keystone) and ``zu harden`` (replaying perturbed bundles)."""
317
+ from zu_core.bus import EventBus
318
+ from zu_core.loop import run_task
319
+
320
+ from .config import build_registry
321
+
322
+ registry = build_registry(cfg)
323
+ provider = rebind_offline(registry, bundle)
324
+ bus = EventBus()
325
+ try:
326
+ result = await run_task(
327
+ spec, provider, registry, bus,
328
+ containment=cfg.containment,
329
+ max_observation_chars=cfg.max_observation_chars,
330
+ observation_strategy=cfg.observation_strategy,
331
+ max_context_chars=cfg.max_context_chars,
332
+ )
333
+ return result, await bus.query()
334
+ finally:
335
+ await bus.aclose()
zu_cli/sandbox.py ADDED
@@ -0,0 +1,276 @@
1
+ """Whole-agent-in-container containment — the launcher for ``containment: required``.
2
+
3
+ How runs actually get contained: the agent cannot police a hostile *tool* in
4
+ process — a tool is just Python running in your interpreter, so by the time the
5
+ loop sees a call the tool's code has already run. Real tool containment is an OS
6
+ boundary. This launcher runs the **entire agent inside a hardened container**
7
+ whose only route off-box is an egress proxy on an internal (default-DROP)
8
+ network, with all caps dropped, no-new-privileges, and a blocking seccomp
9
+ profile. Inside that box ``ZU_SANDBOXED=1`` is set, so the fail-closed floor
10
+ (:func:`zu_core.security.enforce_containment`) is satisfied and tools may run —
11
+ the container, not the loop, is what contains them.
12
+
13
+ Two halves:
14
+
15
+ * :func:`run_contained_from_env` — the in-container entrypoint (console script
16
+ ``zu-run-contained``). Reads the task + config from the environment, runs the
17
+ agent, and writes ``{"result": ..., "events": [...]}`` as one JSON object on
18
+ stdout. It runs *as contained* only because the launcher set ``ZU_SANDBOXED``.
19
+ * :class:`SandboxLauncher` — the host side. Launches the proxy, launches the
20
+ hardened container on the internal network, execs the entrypoint with the
21
+ task/config in its env, parses the Result back, and tears everything down.
22
+
23
+ The backend and proxy are injected (``LocalDockerBackend`` + ``LocalEgressProxy``
24
+ in production, fakes in tests), so the orchestration is exercised without a
25
+ daemon. The Docker daemon itself is the only un-fakeable part — the same P0/P1
26
+ boundary the red-team container form documents.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import asyncio
32
+ import json
33
+ import os
34
+ import sys
35
+ import time
36
+ from dataclasses import dataclass
37
+ from pathlib import Path
38
+ from typing import Any
39
+
40
+ from zu_core.contracts import Result
41
+ from zu_core.loop import run_task
42
+ from zu_core.security import SANDBOX_ENV
43
+
44
+
45
+ def _seccomp_block_profile() -> str:
46
+ """The host path of the shipped blocking seccomp profile (Docker reads the
47
+ profile from the client host). Resolved lazily so importing this module never
48
+ requires zu-backends to be installed."""
49
+ import zu_backends
50
+
51
+ return str(Path(zu_backends.__file__).parent / "seccomp" / "redteam-block.json")
52
+
53
+
54
+ def _last_json_object(out: str) -> dict:
55
+ """Parse the last non-empty line of stdout as the result JSON. The entrypoint
56
+ writes exactly one JSON object, but taking the last line tolerates any
57
+ incidental log line the image might print before it."""
58
+ for line in reversed(out.splitlines()):
59
+ line = line.strip()
60
+ if line:
61
+ return json.loads(line)
62
+ raise ValueError("no output to parse")
63
+
64
+
65
+ async def _run_in_process(task: dict, config: dict) -> tuple[Result, list]:
66
+ """Run one task in this process and return (Result, events). Used by the
67
+ in-container entrypoint, where ``ZU_SANDBOXED`` is already set by the launcher
68
+ so the containment floor passes."""
69
+ from .config import assemble, coerce_config, coerce_task
70
+
71
+ cfg = coerce_config(config)
72
+ spec = coerce_task(task, cfg.budget, allow_paths=False)
73
+ provider, registry, bus, providers = assemble(cfg)
74
+ try:
75
+ result = await run_task(
76
+ spec, provider, registry, bus,
77
+ providers=providers, containment=cfg.containment,
78
+ max_observation_chars=cfg.max_observation_chars,
79
+ observation_strategy=cfg.observation_strategy,
80
+ max_context_chars=cfg.max_context_chars,
81
+ )
82
+ return result, await bus.query()
83
+ finally:
84
+ await bus.aclose()
85
+
86
+
87
+ def run_contained_from_env(argv: list[str] | None = None) -> int:
88
+ """Console-script entrypoint (``zu-run-contained``) executed INSIDE the
89
+ container. Reads ``ZU_TASK`` / ``ZU_CONFIG`` (JSON) from the environment, runs
90
+ the agent, and emits the Result + event log as one JSON object on stdout."""
91
+ # The launcher mounts a bundle at ZU_BUNDLE and sets PYTHONPATH to it; add it
92
+ # to sys.path explicitly too, so the bundle's tools/ import-refs resolve even
93
+ # if PYTHONPATH wasn't honored by the exec environment.
94
+ bundle = os.environ.get("ZU_BUNDLE")
95
+ if bundle:
96
+ # The mounted bundle carries its own gitignored .env (e.g. EXA_API_KEY);
97
+ # load it so the agent's tools find their keys inside the box, then put the
98
+ # bundle on the path so its tools/ import-refs resolve.
99
+ from .config import load_dotenv
100
+
101
+ load_dotenv(Path(bundle) / ".env")
102
+ if bundle not in sys.path:
103
+ sys.path.insert(0, bundle)
104
+ task = json.loads(os.environ.get("ZU_TASK") or "{}")
105
+ config = json.loads(os.environ.get("ZU_CONFIG") or "{}")
106
+ result, events = asyncio.run(_run_in_process(task, config))
107
+ json.dump(
108
+ {
109
+ "result": result.model_dump(mode="json"),
110
+ "events": [e.model_dump(mode="json") for e in events],
111
+ },
112
+ sys.stdout,
113
+ default=str,
114
+ )
115
+ sys.stdout.write("\n")
116
+ return 0
117
+
118
+
119
+ @dataclass
120
+ class SandboxLauncher:
121
+ """Run the whole agent inside a hardened container whose sole egress is a proxy
122
+ SIDECAR on an internal (default-DROP) network — the faithful topology
123
+ (RED_TEAM_CONTAINER.md §3), the same one ``SidecarContainerGate`` enforces.
124
+
125
+ A host-side proxy cannot be the sole egress of an ``--internal`` container, so
126
+ the proxy runs as its own container on the internal network (the target's only
127
+ route off-box) with a second leg on bridge so IT — and only it — reaches the
128
+ outside. The target is internal-only, routed through the proxy by name, kept
129
+ alive with ``sleep infinity`` so we exec ``zu-run-contained`` into it.
130
+
131
+ ``backend`` is a ``LocalDockerBackend`` (its docker client manages the network
132
+ and sidecar). ``allowlist`` on :meth:`run` is what the proxy permits — the real
133
+ egress boundary; every other host (and every internal/metadata host) is refused
134
+ and logged. ``"*"`` permits any host: pass an explicit list for a real boundary."""
135
+
136
+ backend: Any
137
+ image: str
138
+ network_name: str = "zu-sandbox-net"
139
+ proxy_port: int = 8080
140
+ seccomp: str | None = None # None -> the shipped blocking profile
141
+ exec_timeout_s: float | None = None
142
+ ready_timeout_s: float = 20.0
143
+
144
+ async def run(
145
+ self, task: dict, config: dict, *, allowlist: list[str], bundle_dir: str | None = None
146
+ ) -> tuple[Result, list[dict]]:
147
+ """Run a whole AGENT inside the box (the ``zu-run-contained`` entrypoint) and parse
148
+ its Result + event log back. A thin wrapper over :meth:`run_entrypoint`."""
149
+ payload = await self.run_entrypoint(
150
+ ["zu-run-contained"],
151
+ {"ZU_TASK": json.dumps(task), "ZU_CONFIG": json.dumps(config)},
152
+ allowlist=allowlist, bundle_dir=bundle_dir,
153
+ )
154
+ result = Result.model_validate(payload["result"])
155
+ return result, payload.get("events", [])
156
+
157
+ async def run_entrypoint(
158
+ self, entrypoint: list[str], exec_env: dict, *, allowlist: list[str],
159
+ bundle_dir: str | None = None,
160
+ ) -> dict:
161
+ """Launch the hardened box (internal network + egress-proxy sidecar, caps dropped,
162
+ blocking seccomp), exec ``entrypoint`` inside it with the contained-floor + proxy env
163
+ merged with ``exec_env``, and return the last JSON object it wrote to stdout. The
164
+ reusable core of every contained run — an agent run (:meth:`run`) and contained
165
+ construction (``construct_sandbox.launch_contained_construction``) both go through
166
+ here, so the container/egress topology lives in exactly one audited place."""
167
+ client = self.backend._docker()
168
+ proxy_name = f"{self.network_name}-proxy"
169
+ # Clear any resources a crashed prior run may have left behind.
170
+ await self._remove_container(client, proxy_name)
171
+ await self._remove_network(client, self.network_name)
172
+ net = await asyncio.to_thread(client.networks.create, self.network_name, internal=True)
173
+ proxy = None
174
+ sandbox = None
175
+ try:
176
+ # The egress-proxy sidecar: a STABLE name (so the target resolves it via
177
+ # the internal network's embedded DNS), on the internal network, plus a
178
+ # bridge leg so it — and only it — reaches the outside.
179
+ proxy_env = {
180
+ "ZU_EGRESS_ALLOWLIST": ",".join(allowlist),
181
+ "ZU_EGRESS_PORT": str(self.proxy_port),
182
+ }
183
+ # The proxy is trusted control-plane infra (the egress boundary itself),
184
+ # run as root so it can bind/log/write regardless of the image's default
185
+ # user. The untrusted target below keeps the image's non-root user.
186
+ proxy = await asyncio.to_thread(
187
+ client.containers.run, self.image, ["zu-egress-proxy"], name=proxy_name,
188
+ network=self.network_name, environment=proxy_env, user="0", detach=True)
189
+ bridge = await asyncio.to_thread(client.networks.get, "bridge")
190
+ await asyncio.to_thread(bridge.connect, proxy)
191
+ await self._await_proxy_ready(proxy)
192
+
193
+ # The target: internal-only (the proxy is the only route off-box), caps
194
+ # dropped + blocking seccomp, kept alive so we exec the entrypoint into it.
195
+ target_spec: dict = {
196
+ "image": self.image,
197
+ "network": "isolated",
198
+ "network_name": self.network_name,
199
+ "proxy": {"host": proxy_name, "port": self.proxy_port},
200
+ "seccomp": self.seccomp or _seccomp_block_profile(),
201
+ "command": ["sleep", "infinity"],
202
+ }
203
+ # A bundle's own tools/ are not in the image — mount the bundle dir
204
+ # READ-ONLY at /bundle so the agent's `tools.x:Class` import-refs
205
+ # resolve inside the box. The user owns this code; the mount is ro and
206
+ # the container is still caps-dropped + egress-gated.
207
+ if bundle_dir is not None:
208
+ target_spec["volumes"] = {
209
+ str(Path(bundle_dir).resolve()): {"bind": "/bundle", "mode": "ro"}
210
+ }
211
+ sandbox = await self.backend.launch(target_spec)
212
+ # `docker exec` does not inherit the container's runtime proxy env, so
213
+ # pass it explicitly. ZU_SANDBOXED marks the run contained — set HERE,
214
+ # where the boundary is actually established, never baked into the image.
215
+ proxy_url = f"http://{proxy_name}:{self.proxy_port}"
216
+ env = {
217
+ SANDBOX_ENV: "1",
218
+ "HTTP_PROXY": proxy_url, "HTTPS_PROXY": proxy_url,
219
+ "http_proxy": proxy_url, "https_proxy": proxy_url,
220
+ "NO_PROXY": "localhost,127.0.0.1",
221
+ }
222
+ if bundle_dir is not None:
223
+ # Put the mounted bundle on the path so its tools/ import-refs resolve.
224
+ env["PYTHONPATH"] = "/bundle"
225
+ env["ZU_BUNDLE"] = "/bundle"
226
+ env.update(exec_env) # the caller's entrypoint-specific vars (task/config, opts)
227
+ code, out, err = await self.backend.exec_entrypoint(
228
+ sandbox, entrypoint, environment=env, timeout_s=self.exec_timeout_s,
229
+ )
230
+ if not out.strip():
231
+ raise RuntimeError(
232
+ f"contained run produced no output (exit {code}): {err[:300]}"
233
+ )
234
+ return _last_json_object(out)
235
+ finally:
236
+ if sandbox is not None:
237
+ await self.backend.destroy(sandbox)
238
+ await self._best_effort(proxy, "remove", force=True)
239
+ await self._best_effort(net, "remove")
240
+
241
+ async def _await_proxy_ready(self, proxy: Any) -> None:
242
+ deadline = time.monotonic() + self.ready_timeout_s
243
+ while time.monotonic() < deadline:
244
+ await asyncio.to_thread(proxy.reload)
245
+ logs = (await asyncio.to_thread(proxy.logs)).decode("utf-8", "replace")
246
+ if "proxy.ready" in logs:
247
+ return
248
+ if getattr(proxy, "status", "") in ("exited", "dead"):
249
+ raise RuntimeError(f"proxy sidecar exited before ready: {logs[-300:]}")
250
+ await asyncio.sleep(0.2)
251
+ raise RuntimeError("proxy sidecar did not become ready in time")
252
+
253
+ @staticmethod
254
+ async def _best_effort(obj: Any, method: str, **kw: Any) -> None:
255
+ if obj is None:
256
+ return
257
+ try:
258
+ await asyncio.to_thread(getattr(obj, method), **kw)
259
+ except Exception: # noqa: BLE001 - teardown must not raise over the result
260
+ pass
261
+
262
+ @staticmethod
263
+ async def _remove_container(client: Any, name: str) -> None:
264
+ try:
265
+ c = await asyncio.to_thread(client.containers.get, name)
266
+ await asyncio.to_thread(c.remove, force=True)
267
+ except Exception: # noqa: BLE001 - absent is the normal case
268
+ pass
269
+
270
+ @staticmethod
271
+ async def _remove_network(client: Any, name: str) -> None:
272
+ try:
273
+ n = await asyncio.to_thread(client.networks.get, name)
274
+ await asyncio.to_thread(n.remove)
275
+ except Exception: # noqa: BLE001 - absent is the normal case
276
+ pass