zu-tools 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_tools/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Zu built-in tools.
2
+
3
+ The built-ins are written against the exact same Tool port users get — which
4
+ is what proves the plugin system is real, not a second-class add-on.
5
+ """
@@ -0,0 +1,429 @@
1
+ """action_surface — the perception-reduction tool (tier 3, Engineering Design §11).
2
+
3
+ A rendered web page is a DOM of 100k–1M+ tokens; the decision the agent needs
4
+ from it — "click Place order" — is a handful. Pushing the whole blob through the
5
+ model is slow, expensive, and *worse for accuracy* (the signal drowns in
6
+ markup). The way out is a reframe: the agent almost never needs the page — it
7
+ needs the **set of things it can do** on the page. That set is a few dozen
8
+ affordances, a few hundred tokens.
9
+
10
+ This tool produces that set, **deterministically**. The decision rule (§4.5)
11
+ settles why it is a tool and not a model job: a script may *enumerate what is
12
+ possible* (every actionable element), but it must not *decide what is reasonable*
13
+ (which one to pick) — that is the policy's judgment. So the reducer surfaces the
14
+ possible and never ranks or prunes by guessed task-relevance.
15
+
16
+ The pipeline (§11.2), run over an accessibility tree rather than the raw DOM:
17
+
18
+ 1. Walk the accessibility tree — roles, names, states — an order of magnitude
19
+ smaller than the DOM, built to answer "what can a user do here".
20
+ 2. Filter to interactive + meaningful (actions, plus the headings/labels/errors
21
+ an action needs); drop the rest.
22
+ 3. Prune the invisible — ignored, off-screen, zero-area, hidden.
23
+ 4. Resolve a stable, human-meaningful label per element.
24
+ 5. Assign a stable, opaque handle (a1, a2 …) that maps back, harness-side, to a
25
+ role+name locator. The model emits the handle, never a selector (§11.3).
26
+ 6. Emit a compact, typed representation.
27
+
28
+ And the competence boundary (§11.4): the honest risk is a false negative —
29
+ pruning the one element the task needed (a canvas button, an unlabeled icon). So
30
+ the reducer must know when it is **blind** and *signal* escalation to tier-4
31
+ vision rather than silently return an incomplete surface. ``blind`` on the
32
+ result is that signal; the ``action-surface-blind`` detector turns it into an
33
+ ESCALATE. Graceful degradation, never silent incompleteness.
34
+
35
+ The deterministic reducer (:func:`reduce_surface`) is the whole value and is
36
+ pure — it runs on an accessibility-tree snapshot with no browser, which is how a
37
+ coding harness drives it offline and how it is tested at $0. The live arm asks a
38
+ browser session for the tree (:meth:`ActionSurface.__call__` with ``op=open``)
39
+ and runs the same reducer over it.
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ from typing import Any
45
+ from urllib.parse import urlsplit
46
+
47
+ from pydantic import BaseModel, Field
48
+
49
+ from zu_core.ports import CAP_NET, CAP_SANDBOX, EGRESS_OPEN, BrowserSessionHandle, SessionBackend
50
+
51
+ from .net import validate_and_pin
52
+
53
+ _DEFAULT_IMAGE = "ghcr.io/k3-mt/zu-render-chromium:latest"
54
+
55
+ # Roles that represent something the agent can *do*. The list is generous on
56
+ # purpose — enumerating the possible is the job; choosing among it is the
57
+ # policy's. Anything actionable a real accessibility tree exposes belongs here.
58
+ INTERACTIVE_ROLES: frozenset[str] = frozenset({
59
+ "button", "link", "textbox", "searchbox", "combobox", "checkbox", "radio",
60
+ "switch", "slider", "spinbutton", "menuitem", "menuitemcheckbox",
61
+ "menuitemradio", "tab", "option", "textarea", "listbox", "menubutton",
62
+ "togglebutton", "datepicker", "colorwell",
63
+ })
64
+
65
+ # Roles whose *text* is meaningful context for choosing an action — headings
66
+ # orient, alerts/status carry the error and validation text an action needs —
67
+ # but which are not themselves actionable. We keep their names as context, never
68
+ # as affordances.
69
+ CONTEXT_ROLES: frozenset[str] = frozenset({
70
+ "heading", "alert", "status", "alertdialog", "log", "marquee",
71
+ })
72
+
73
+
74
+ class AxNode(BaseModel):
75
+ """One normalised accessibility-tree node — the reducer's input currency.
76
+
77
+ A small, serialisable shape so the reducer is pure and a harness can feed it
78
+ a captured tree directly. :func:`normalize_axtree` produces these from the
79
+ raw CDP ``Accessibility.getFullAXTree`` format.
80
+ """
81
+
82
+ role: str
83
+ name: str = ""
84
+ value: str | None = None
85
+ states: list[str] = Field(default_factory=list)
86
+ placeholder: str | None = None
87
+ description: str | None = None
88
+ # Pruning inputs. ``visible`` folds in aria-hidden/display:none/off-screen;
89
+ # ``ignored`` is the tree's own "not exposed" flag; ``bounds`` is [x,y,w,h].
90
+ visible: bool = True
91
+ ignored: bool = False
92
+ bounds: list[float] | None = None
93
+
94
+
95
+ class Affordance(BaseModel):
96
+ """One thing the policy can do, addressed by an opaque handle."""
97
+
98
+ handle: str
99
+ role: str
100
+ label: str
101
+ value: str | None = None
102
+ states: list[str] = Field(default_factory=list)
103
+
104
+
105
+ class Surface(BaseModel):
106
+ """The compact, typed reduction of a page — a few hundred tokens.
107
+
108
+ ``handle_map`` is the harness-side indirection (§11.3): handle → role+name
109
+ locator. The model only ever sees and emits handles; the durable locator
110
+ stays here and is re-resolved at action time.
111
+ """
112
+
113
+ title: str = ""
114
+ url: str = ""
115
+ affordances: list[Affordance] = Field(default_factory=list)
116
+ context: list[str] = Field(default_factory=list)
117
+ handle_map: dict[str, dict] = Field(default_factory=dict)
118
+ blind: bool = False
119
+ blind_reason: str | None = None
120
+
121
+
122
+ def _label_of(node: AxNode) -> str:
123
+ """The stable, human-meaningful label (§11.2 step 4): accessible name first
124
+ (which already folds in aria-label and an associated <label>), then
125
+ placeholder, then description. Class soup never reaches here — if none of
126
+ these is set, the element is unlabeled and counts toward blindness."""
127
+ for candidate in (node.name, node.placeholder, node.description):
128
+ if candidate and candidate.strip():
129
+ return candidate.strip()
130
+ return ""
131
+
132
+
133
+ def _is_pruned(node: AxNode) -> bool:
134
+ """Step 3 — prune the invisible. ignored / not-visible / zero-area go."""
135
+ if node.ignored or not node.visible:
136
+ return True
137
+ if node.bounds is not None and len(node.bounds) == 4:
138
+ w, h = node.bounds[2], node.bounds[3]
139
+ if w <= 0 or h <= 0:
140
+ return True
141
+ return False
142
+
143
+
144
+ def reduce_surface(
145
+ nodes: list[AxNode],
146
+ *,
147
+ title: str = "",
148
+ url: str = "",
149
+ unlabeled_ratio: float = 0.5,
150
+ ) -> Surface:
151
+ """Reduce an accessibility tree to the action surface — pure, deterministic.
152
+
153
+ Handles are assigned ``a1, a2 …`` in document (input) order over the emitted
154
+ affordances, so the same tree always yields the same handles. The blind
155
+ signal (§11.4) fires when the surface cannot be trusted to be complete: the
156
+ page had content but yielded no affordances, or too large a fraction of the
157
+ interactive elements have no resolvable label (a canvas/icon-heavy page the
158
+ accessibility tree describes poorly).
159
+ """
160
+ affordances: list[Affordance] = []
161
+ handle_map: dict[str, dict] = {}
162
+ context: list[str] = []
163
+ unlabeled = 0
164
+ interactive_seen = 0
165
+ kept_any_content = False
166
+
167
+ for node in nodes:
168
+ if _is_pruned(node):
169
+ continue
170
+ kept_any_content = True
171
+ role = node.role
172
+
173
+ if role in CONTEXT_ROLES:
174
+ label = _label_of(node)
175
+ if label:
176
+ context.append(label)
177
+ continue
178
+
179
+ if role in INTERACTIVE_ROLES:
180
+ interactive_seen += 1
181
+ label = _label_of(node)
182
+ if not label:
183
+ # Enumerated as possible, but unaddressable — a blindness signal,
184
+ # not a meaningless handle handed to the model.
185
+ unlabeled += 1
186
+ continue
187
+ handle = f"a{len(affordances) + 1}"
188
+ affordances.append(
189
+ Affordance(
190
+ handle=handle,
191
+ role=role,
192
+ label=label,
193
+ value=node.value,
194
+ states=list(node.states),
195
+ )
196
+ )
197
+ # The durable locator the model never sees (role + accessible name).
198
+ handle_map[handle] = {"role": role, "name": label}
199
+
200
+ blind = False
201
+ blind_reason: str | None = None
202
+ if not affordances and kept_any_content:
203
+ blind = True
204
+ blind_reason = "page had content but the accessibility tree yielded no addressable actions"
205
+ elif interactive_seen and (unlabeled / interactive_seen) > unlabeled_ratio:
206
+ blind = True
207
+ blind_reason = (
208
+ f"{unlabeled}/{interactive_seen} interactive elements are unlabeled "
209
+ "in the accessibility tree — too thin to trust"
210
+ )
211
+
212
+ return Surface(
213
+ title=title,
214
+ url=url,
215
+ affordances=affordances,
216
+ context=context,
217
+ handle_map=handle_map,
218
+ blind=blind,
219
+ blind_reason=blind_reason,
220
+ )
221
+
222
+
223
+ def _ax_string(field: Any) -> str:
224
+ """Read a CDP AX value object ``{"type":...,"value":...}`` as a string."""
225
+ if isinstance(field, dict):
226
+ v = field.get("value")
227
+ return str(v) if v is not None else ""
228
+ return ""
229
+
230
+
231
+ def normalize_axtree(cdp_nodes: list[dict]) -> list[AxNode]:
232
+ """Normalise the raw CDP ``Accessibility.getFullAXTree`` node list into
233
+ :class:`AxNode` records, in document (pre-order) order as CDP returns them.
234
+
235
+ CDP shape per node: ``role``/``name`` are ``{type,value}`` objects;
236
+ ``properties`` is a list of ``{name, value:{value}}``; ``ignored`` is a bool.
237
+ States we surface: disabled, checked, expanded, required, focused, selected,
238
+ invalid. Placeholder/description/value are read from their AX properties.
239
+ """
240
+ out: list[AxNode] = []
241
+ state_props = {"disabled", "checked", "expanded", "required", "focused", "selected", "invalid"}
242
+ for n in cdp_nodes:
243
+ role = _ax_string(n.get("role"))
244
+ if not role:
245
+ continue
246
+ props = {p.get("name"): p.get("value", {}) for p in n.get("properties", []) if isinstance(p, dict)}
247
+ states: list[str] = []
248
+ for sp in sorted(state_props):
249
+ val = props.get(sp, {})
250
+ v = val.get("value") if isinstance(val, dict) else None
251
+ if v is True or (isinstance(v, str) and v not in ("false", "")):
252
+ states.append(sp if not isinstance(v, str) or v == "true" else f"{sp}:{v}")
253
+ out.append(
254
+ AxNode(
255
+ role=role,
256
+ name=_ax_string(n.get("name")),
257
+ value=_ax_string(n.get("value")) or None,
258
+ states=states,
259
+ placeholder=_ax_string(props.get("placeholder")) or None,
260
+ description=_ax_string(n.get("description")) or None,
261
+ ignored=bool(n.get("ignored", False)),
262
+ # CDP marks unexposed nodes via ``ignored``; visibility off-screen
263
+ # is folded into ``hidden`` when the server supplies bounds.
264
+ visible=not bool(props.get("hidden", {}).get("value", False))
265
+ if isinstance(props.get("hidden"), dict) else True,
266
+ )
267
+ )
268
+ return out
269
+
270
+
271
+ class ActionSurface:
272
+ """Tier-3 tool: reduce a page to its action surface (and keep the handle map).
273
+
274
+ Two ways in, one reducer:
275
+
276
+ * ``op=reduce`` (default) — reduce a tree the caller already has. Pass
277
+ ``nodes`` (AxNode dicts) or raw ``axtree`` (CDP nodes), plus ``title`` /
278
+ ``url``. No browser, fully offline — the harness-driven and tested path.
279
+ * ``op=open`` — open ``url`` in a headless browser session, ask it for the
280
+ accessibility tree, and reduce that. The live arm.
281
+
282
+ After a reduction the handle→locator map is held on the instance for the run;
283
+ ``op=resolve`` returns the durable locator for a handle (a stale handle is an
284
+ escalation, not a crash — the caller re-resolves at action time, §11.3).
285
+ """
286
+
287
+ name = "action_surface"
288
+ tier = 3 # the accessibility-tree tier; unlocked by a detector ESCALATE
289
+ schema = {
290
+ "name": "action_surface",
291
+ "description": (
292
+ "Reduce a web page to the compact SET OF THINGS YOU CAN DO on it — a "
293
+ "flat list of affordances (button/link/textbox/…) each with an opaque "
294
+ "handle (a1, a2 …) and a human label. You choose a handle and act on "
295
+ "it; you never see or emit a CSS selector. op=open a url to capture and "
296
+ "reduce its accessibility tree; op=resolve a handle to its locator. If "
297
+ "'blind' is true the tree is too thin to trust — escalate to vision."
298
+ ),
299
+ "parameters": {
300
+ "type": "object",
301
+ "properties": {
302
+ "op": {"type": "string", "enum": ["reduce", "open", "resolve"]},
303
+ "url": {"type": "string", "description": "for op=open: the page to reduce"},
304
+ "handle": {"type": "string", "description": "for op=resolve: the handle to resolve"},
305
+ "axtree": {"type": "array", "items": {"type": "object"},
306
+ "description": "for op=reduce: raw CDP getFullAXTree nodes"},
307
+ "nodes": {"type": "array", "items": {"type": "object"},
308
+ "description": "for op=reduce: pre-normalised AxNode dicts"},
309
+ "title": {"type": "string"},
310
+ },
311
+ "required": ["op"],
312
+ },
313
+ }
314
+ prompt_fragment = (
315
+ "action_surface(op=open, url): reduce a page to a short list of affordances "
316
+ "(handles a1,a2,… with labels) instead of reading the whole DOM. Pick a handle "
317
+ "to act on; resolve(handle) gives its locator. 'blind' means escalate to vision."
318
+ )
319
+ capabilities = frozenset({CAP_NET, CAP_SANDBOX})
320
+ egress = frozenset({EGRESS_OPEN})
321
+
322
+ def __init__(
323
+ self,
324
+ backend: SessionBackend | None = None,
325
+ image: str = _DEFAULT_IMAGE,
326
+ *,
327
+ allow_private: bool | None = None,
328
+ unlabeled_ratio: float = 0.5,
329
+ ) -> None:
330
+ self._backend = backend
331
+ self.image = image
332
+ self.allow_private = allow_private
333
+ self.unlabeled_ratio = unlabeled_ratio
334
+ self._handle_map: dict[str, dict] = {}
335
+ self._session: BrowserSessionHandle | None = None
336
+
337
+ def _resolve_backend(self) -> SessionBackend:
338
+ if self._backend is None:
339
+ from zu_backends.local_docker import LocalDockerBackend
340
+
341
+ self._backend = LocalDockerBackend()
342
+ return self._backend
343
+
344
+ async def __call__(
345
+ self,
346
+ ctx: Any,
347
+ op: str = "reduce",
348
+ url: str | None = None,
349
+ handle: str | None = None,
350
+ axtree: list | None = None,
351
+ nodes: list | None = None,
352
+ title: str | None = None,
353
+ ) -> dict:
354
+ if op == "reduce":
355
+ return self._reduce_op(nodes=nodes, axtree=axtree, title=title or "", url=url or "")
356
+
357
+ if op == "resolve":
358
+ if not handle:
359
+ return {"error": "op=resolve requires a handle"}
360
+ locator = self._handle_map.get(handle)
361
+ if locator is None:
362
+ # Stale/unknown handle: signal a re-resolve, never a crash (§11.3).
363
+ return {"stale_handle": handle,
364
+ "error": f"handle {handle!r} is not on the current surface; re-capture"}
365
+ return {"handle": handle, "locator": locator}
366
+
367
+ if op == "open":
368
+ if not url:
369
+ return {"error": "op=open requires a url"}
370
+ return await self._open_op(url, title or "")
371
+
372
+ return {"error": f"unknown op {op!r}; use reduce/open/resolve"}
373
+
374
+ def _reduce_op(self, *, nodes: list | None, axtree: list | None, title: str, url: str) -> dict:
375
+ if nodes is not None:
376
+ ax = [n if isinstance(n, AxNode) else AxNode.model_validate(n) for n in nodes]
377
+ elif axtree is not None:
378
+ ax = normalize_axtree([n for n in axtree if isinstance(n, dict)])
379
+ else:
380
+ return {"error": "op=reduce requires 'nodes' or 'axtree'"}
381
+ surface = reduce_surface(ax, title=title, url=url, unlabeled_ratio=self.unlabeled_ratio)
382
+ return self._emit(surface)
383
+
384
+ async def _open_op(self, url: str, title: str) -> dict:
385
+ await self._close_session()
386
+ pinned_ip = validate_and_pin(url, allow_private=self.allow_private)
387
+ spec: dict[str, Any] = {"image": self.image, "tier": self.tier, "network": True}
388
+ host = urlsplit(url).hostname
389
+ if pinned_ip is not None and host:
390
+ spec["extra_hosts"] = {host: pinned_ip}
391
+ self._session = await self._resolve_backend().open_session(spec)
392
+ # Ask the session for the accessibility tree. The browser server returns
393
+ # ``{axtree: [...CDP nodes...], title, url}``; an older server that lacks
394
+ # the op returns an error, which we surface (not a crash).
395
+ resp = await self._session.send({"op": "axtree", "url": url})
396
+ if not isinstance(resp, dict) or resp.get("axtree") is None:
397
+ err = resp.get("error") if isinstance(resp, dict) else "bad session response"
398
+ return {"error": f"could not capture accessibility tree: {err}"}
399
+ ax = normalize_axtree([n for n in resp["axtree"] if isinstance(n, dict)])
400
+ surface = reduce_surface(
401
+ ax,
402
+ title=title or str(resp.get("title", "")),
403
+ url=str(resp.get("url", url)),
404
+ unlabeled_ratio=self.unlabeled_ratio,
405
+ )
406
+ return self._emit(surface)
407
+
408
+ def _emit(self, surface: Surface) -> dict:
409
+ """The surface as a loop-friendly observation. The handle map is held on
410
+ the instance (harness-side) and echoed for the harness; ``surface_blind``
411
+ is the top-level flag the blind detector reads."""
412
+ self._handle_map = dict(surface.handle_map)
413
+ return {
414
+ "action_surface": surface.model_dump(exclude={"handle_map"}),
415
+ "handle_map": surface.handle_map,
416
+ "surface_blind": surface.blind,
417
+ }
418
+
419
+ async def _close_session(self) -> None:
420
+ if self._session is not None:
421
+ session, self._session = self._session, None
422
+ try:
423
+ await session.close()
424
+ except Exception: # noqa: BLE001 — teardown must not raise over a result
425
+ pass
426
+
427
+ async def aclose(self) -> None:
428
+ """Close any lingering session — for run teardown so a container never leaks."""
429
+ await self._close_session()
zu_tools/browser.py ADDED
@@ -0,0 +1,172 @@
1
+ """browser — a PERSISTENT, event-driven headless-browser session (tier 2).
2
+
3
+ Where ``render_dom`` is one-shot (a fresh browser per call), ``browser`` keeps ONE
4
+ headless browser ALIVE across calls so a model can drive a reactive, multi-step
5
+ widget the way a person does: ``open`` a url, then ``act`` / ``read`` repeatedly —
6
+ observing the real state (and the network responses it triggered) after each step,
7
+ reacting to what actually happened — then ``close``. That removes the
8
+ timing-fragility of replaying a fixed action sequence into a fresh browser, which a
9
+ reactive SPA defeats (a selection must register before the next step).
10
+
11
+ It surfaces content (rendered text, captured XHR/JSON, optional html); it does not
12
+ provide a transaction-submitting primitive. The session lives in the same hardened,
13
+ headless container as ``render_dom`` (caps dropped, DNS-pinned, --no-sandbox); the
14
+ state is held by the long-lived ``zu-browser`` server inside it.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import Any
20
+ from urllib.parse import urlsplit
21
+
22
+ from zu_core.ports import CAP_NET, CAP_SANDBOX, EGRESS_OPEN, BrowserSessionHandle, SessionBackend
23
+
24
+ from .net import validate_and_pin
25
+
26
+ _DEFAULT_IMAGE = "ghcr.io/k3-mt/zu-render-chromium:latest"
27
+ _OBS_KEYS = ("status", "url", "text", "html", "content", "network",
28
+ "action_error", "action_error_kind", "consent_dismissed")
29
+
30
+
31
+ class Browser:
32
+ name = "browser"
33
+ tier = 2 # like render_dom — unlocked only after a detector escalates
34
+ schema = {
35
+ "name": "browser",
36
+ "description": (
37
+ "Drive a PERSISTENT headless browser across calls to work through a "
38
+ "reactive, multi-step JS widget. op=open a url, then op=act / op=read "
39
+ "repeatedly (the page state is held between calls), then op=close. "
40
+ "Read the returned text after each step and decide the next action — "
41
+ "if action_error comes back, the selector missed; try another."
42
+ ),
43
+ "parameters": {
44
+ "type": "object",
45
+ "properties": {
46
+ "op": {"type": "string", "enum": ["open", "act", "read", "close"]},
47
+ "url": {"type": "string", "description": "for op=open: the page to open"},
48
+ "actions": {
49
+ "type": "array",
50
+ "description": "for op=act: actions run in order on the HELD page — "
51
+ "{click|fill|select|wait_for: <selector>, value?} | {wait_ms:<n>}. "
52
+ "A selector is CSS or a text= selector; target what you SEE. "
53
+ "For an AMBIGUOUS option (e.g. a '1'/'2'/'3' button that appears "
54
+ "many times), add \"near\": \"<label text>\" to a click — it picks "
55
+ "the matching control closest to that label, e.g. "
56
+ "{\"click\": \"1\", \"near\": \"Number of pets\"}.",
57
+ "items": {"type": "object"},
58
+ },
59
+ "wait_until": {
60
+ "type": "string",
61
+ "enum": ["load", "domcontentloaded", "networkidle", "commit"],
62
+ "description": "for op=open: when navigation is done (optional)",
63
+ },
64
+ "capture_network": {
65
+ "type": "boolean",
66
+ "description": "for op=open: capture XHR/JSON responses (the widget's data) "
67
+ "for the whole session (optional)",
68
+ },
69
+ "width": {"type": "integer"},
70
+ "height": {"type": "integer"},
71
+ "html": {"type": "boolean", "description": "also return raw html (optional)"},
72
+ },
73
+ "required": ["op"],
74
+ },
75
+ }
76
+ prompt_fragment = (
77
+ "browser(op=open|act|read|close, url?, actions?, capture_network?): a PERSISTENT "
78
+ "headless browser. Open a url, then act/read step by step (state is kept) to drive "
79
+ "a multi-step widget to the data you need; capture_network grabs the JSON it fetches."
80
+ )
81
+ capabilities = frozenset({CAP_NET, CAP_SANDBOX})
82
+ egress = frozenset({EGRESS_OPEN})
83
+
84
+ def __init__(
85
+ self,
86
+ backend: SessionBackend | None = None,
87
+ image: str = _DEFAULT_IMAGE,
88
+ *,
89
+ allow_private: bool | None = None,
90
+ ) -> None:
91
+ self._backend = backend
92
+ self.image = image
93
+ self.allow_private = allow_private
94
+ self._session: BrowserSessionHandle | None = None # held across calls within a run
95
+
96
+ def _resolve_backend(self) -> SessionBackend:
97
+ if self._backend is None:
98
+ from zu_backends.local_docker import LocalDockerBackend
99
+
100
+ self._backend = LocalDockerBackend()
101
+ return self._backend
102
+
103
+ async def __call__(
104
+ self, ctx: Any, op: str, url: str | None = None, actions: list | None = None,
105
+ wait_until: str | None = None, capture_network: bool = False,
106
+ width: int | None = None, height: int | None = None, html: bool = False,
107
+ ) -> dict:
108
+ if op == "open":
109
+ if not url:
110
+ return {"error": "op=open requires a url"}
111
+ await self._close_session() # one session at a time; replace any prior
112
+ # Same SSRF backstop + DNS pin as render_dom, before leasing a browser.
113
+ pinned_ip = validate_and_pin(url, allow_private=self.allow_private)
114
+ spec: dict[str, Any] = {"image": self.image, "tier": self.tier, "network": True}
115
+ host = urlsplit(url).hostname
116
+ if pinned_ip is not None and host:
117
+ spec["extra_hosts"] = {host: pinned_ip}
118
+ self._session = await self._resolve_backend().open_session(spec)
119
+ cmd: dict[str, Any] = {"op": "open", "url": url}
120
+ if wait_until:
121
+ cmd["wait_until"] = wait_until
122
+ if capture_network:
123
+ cmd["capture_network"] = True
124
+ if width:
125
+ cmd["width"] = int(width)
126
+ if height:
127
+ cmd["height"] = int(height)
128
+ if html:
129
+ cmd["html"] = True
130
+ return self._normalise(await self._session.send(cmd))
131
+
132
+ if op in ("act", "read"):
133
+ if self._session is None:
134
+ return {"error": "no open session; call browser(op=open, url=...) first"}
135
+ cmd = {"op": op}
136
+ if op == "act" and actions:
137
+ cmd["actions"] = actions
138
+ if html:
139
+ cmd["html"] = True
140
+ return self._normalise(await self._session.send(cmd))
141
+
142
+ if op == "close":
143
+ await self._close_session()
144
+ return {"closed": True}
145
+
146
+ return {"error": f"unknown op {op!r}; use open/act/read/close"}
147
+
148
+ @staticmethod
149
+ def _normalise(obs: Any) -> dict:
150
+ """The session response as a loop-friendly observation (content keys the
151
+ loop stores for grounding; a session/command error passed through)."""
152
+ if not isinstance(obs, dict):
153
+ return {"error": "bad session response"}
154
+ if "error" in obs and "text" not in obs:
155
+ return {"error": obs["error"]}
156
+ out: dict[str, Any] = {"rendered": True}
157
+ for k in _OBS_KEYS:
158
+ if obs.get(k) is not None:
159
+ out[k] = obs[k]
160
+ return out
161
+
162
+ async def _close_session(self) -> None:
163
+ if self._session is not None:
164
+ session, self._session = self._session, None
165
+ try:
166
+ await session.close()
167
+ except Exception: # noqa: BLE001 - teardown must not raise over a result
168
+ pass
169
+
170
+ async def aclose(self) -> None:
171
+ """Close a lingering session — for run teardown so a container never leaks."""
172
+ await self._close_session()