zu-tools 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+
9
+ # uv / venv
10
+ .venv/
11
+ uv.lock.bak
12
+
13
+ # Test / type caches
14
+ .pytest_cache/
15
+ .mypy_cache/
16
+ .ruff_cache/
17
+ .coverage
18
+ htmlcov/
19
+
20
+ # Zu runtime artifacts
21
+ *.db
22
+ zu.db
23
+ zu.yaml.local
24
+ zu_review.jsonl
25
+ *.review.jsonl
26
+ # Per-agent cost telemetry ledger — machine-local run history, not source.
27
+ cost.jsonl
28
+ # A recorded replay path is learned per-run and machine-local — regenerated on
29
+ # every successful run, not source. The agent ships; its track does not.
30
+ track.json
31
+ # …except the flagship example ships its track on purpose, as a demo of the
32
+ # record/replay convergence (committed; re-runs show as ordinary modifications).
33
+ !examples/agents/vet-appointment/track.json
34
+
35
+ # Editor / OS
36
+ .idea/
37
+ .vscode/
38
+ .DS_Store
39
+
40
+ # Claude Code local session state
41
+ .claude/
42
+
43
+ # Secrets
44
+ .env
45
+ .env.*
46
+ !.env.example
47
+
48
+ # Microsoft Office temp/lock files
49
+ ~$*
50
+
51
+ # Internal design / strategy docs — kept local, never in the public repo
52
+ *.docx
53
+ *.pdf
54
+ # BUILD.md is the internal build-sequence / deferred-gaps ledger — kept local.
55
+ # (ARCHITECTURE.md is public: an onboarding agent needs the structural map.)
56
+ docs/BUILD.md
57
+
58
+ # Local secret — API key for live validation, never commit
59
+ zu_demo_key.md
60
+ *_key.md
61
+
62
+ # Local PyPI publish token — never commit
63
+ /pypi
64
+
65
+ # Local Discord credentials (bot token / app secrets) — never commit
66
+ /discord
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: zu-tools
3
+ Version: 0.2.0
4
+ Summary: Zu built-in tools: web_search, http_fetch, html_parse, render_dom, browser
5
+ Project-URL: Homepage, https://github.com/k3-mt/zu
6
+ Project-URL: Repository, https://github.com/k3-mt/zu
7
+ License-Expression: Apache-2.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: httpx
18
+ Requires-Dist: selectolax
19
+ Requires-Dist: zu-backends==0.2.0
20
+ Requires-Dist: zu-core==0.2.0
21
+ Description-Content-Type: text/markdown
22
+
23
+ # zu-tools
24
+
25
+ Tools — the **`Tool`** port: actions the model may take. A tool declares its
26
+ tier (the escalation ladder), its JSON `schema`, a `prompt_fragment`, and its
27
+ **capability envelope** (`capabilities` + `egress`) so its blast radius is
28
+ visible in its own code and the gate can bound it.
29
+
30
+ ## Registered plugins (`zu.tools`)
31
+
32
+ | Name | Class | Tier | Envelope |
33
+ |------|-------|------|----------|
34
+ | `http_fetch` | `HttpFetch` | 1 | `CAP_NET`, open egress — a general web fetcher with a host-level SSRF guard (`net.check_url`). |
35
+ | `html_parse` | `HtmlParse` | 1 | none — pure CPU on HTML it is handed (least privilege). |
36
+ | `render_dom` | `RenderDom` | 2 | `CAP_NET` + `CAP_SANDBOX`, open egress — renders a URL in a headless browser inside a `SandboxBackend` (unlocked only after a detector escalates off tier 1). |
37
+
38
+ ## The tier ladder
39
+
40
+ `http_fetch` and `html_parse` are tier 1 (cheap, offered from the start).
41
+ `render_dom` is tier 2 — the escalation target when a JavaScript page defeats
42
+ tier 1. The loop only offers tools at or below the current tier; a detector
43
+ `ESCALATE` climbs the ladder. The browser runs in a sandbox behind a seam tests
44
+ can freeze (a saved rendered page), so the escalation arc is proven offline.
45
+
46
+ ## Extend
47
+
48
+ Implement the `Tool` shape (see [`AGENTS.md`](../../AGENTS.md) → *Recipe: add a
49
+ tool*), declare a minimal `capabilities`/`egress`, register under `zu.tools`, and
50
+ add a deterministic test (use an `httpx.MockTransport` to fixture the network).
51
+
52
+ ## Tests
53
+
54
+ `uv run pytest packages/zu-tools` — offline; the network is fixtured.
@@ -0,0 +1,32 @@
1
+ # zu-tools
2
+
3
+ Tools — the **`Tool`** port: actions the model may take. A tool declares its
4
+ tier (the escalation ladder), its JSON `schema`, a `prompt_fragment`, and its
5
+ **capability envelope** (`capabilities` + `egress`) so its blast radius is
6
+ visible in its own code and the gate can bound it.
7
+
8
+ ## Registered plugins (`zu.tools`)
9
+
10
+ | Name | Class | Tier | Envelope |
11
+ |------|-------|------|----------|
12
+ | `http_fetch` | `HttpFetch` | 1 | `CAP_NET`, open egress — a general web fetcher with a host-level SSRF guard (`net.check_url`). |
13
+ | `html_parse` | `HtmlParse` | 1 | none — pure CPU on HTML it is handed (least privilege). |
14
+ | `render_dom` | `RenderDom` | 2 | `CAP_NET` + `CAP_SANDBOX`, open egress — renders a URL in a headless browser inside a `SandboxBackend` (unlocked only after a detector escalates off tier 1). |
15
+
16
+ ## The tier ladder
17
+
18
+ `http_fetch` and `html_parse` are tier 1 (cheap, offered from the start).
19
+ `render_dom` is tier 2 — the escalation target when a JavaScript page defeats
20
+ tier 1. The loop only offers tools at or below the current tier; a detector
21
+ `ESCALATE` climbs the ladder. The browser runs in a sandbox behind a seam tests
22
+ can freeze (a saved rendered page), so the escalation arc is proven offline.
23
+
24
+ ## Extend
25
+
26
+ Implement the `Tool` shape (see [`AGENTS.md`](../../AGENTS.md) → *Recipe: add a
27
+ tool*), declare a minimal `capabilities`/`egress`, register under `zu.tools`, and
28
+ add a deterministic test (use an `httpx.MockTransport` to fixture the network).
29
+
30
+ ## Tests
31
+
32
+ `uv run pytest packages/zu-tools` — offline; the network is fixtured.
@@ -0,0 +1,43 @@
1
+ [project]
2
+ name = "zu-tools"
3
+ version = "0.2.0"
4
+ description = "Zu built-in tools: web_search, http_fetch, html_parse, render_dom, browser"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "Apache-2.0"
8
+ classifiers = [
9
+ "Development Status :: 4 - Beta",
10
+ "Intended Audience :: Developers",
11
+ "License :: OSI Approved :: Apache Software License",
12
+ "Programming Language :: Python :: 3",
13
+ "Programming Language :: Python :: 3.11",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Topic :: Software Development :: Libraries :: Application Frameworks",
16
+ "Typing :: Typed",
17
+ ]
18
+ # zu-backends supplies render_dom's default sandbox (local-docker); it is
19
+ # imported lazily, so a tier-1-only run never touches it, but the default
20
+ # tier-2 render works out of the box once installed.
21
+ dependencies = ["zu-core==0.2.0", "httpx", "selectolax", "zu-backends==0.2.0"]
22
+
23
+ [project.entry-points."zu.tools"] # <- how a tool is registered
24
+ web_search = "zu_tools.search:WebSearch"
25
+ http_fetch = "zu_tools.fetch:HttpFetch"
26
+ browser = "zu_tools.browser:Browser"
27
+ recall = "zu_tools.recall:Recall"
28
+ html_parse = "zu_tools.parse:HtmlParse"
29
+ render_dom = "zu_tools.render:RenderDom"
30
+ action_surface = "zu_tools.action_surface:ActionSurface"
31
+ pointer = "zu_tools.pointer:PointerControl"
32
+ simulate = "zu_tools.simulate:Simulate"
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/k3-mt/zu"
36
+ Repository = "https://github.com/k3-mt/zu"
37
+
38
+ [build-system]
39
+ requires = ["hatchling"]
40
+ build-backend = "hatchling.build"
41
+
42
+ [tool.hatch.build.targets.wheel]
43
+ packages = ["src/zu_tools"]
@@ -0,0 +1,5 @@
1
+ """Zu built-in tools.
2
+
3
+ The built-ins are written against the exact same Tool port users get — which
4
+ is what proves the plugin system is real, not a second-class add-on.
5
+ """
@@ -0,0 +1,429 @@
1
+ """action_surface — the perception-reduction tool (tier 3, Engineering Design §11).
2
+
3
+ A rendered web page is a DOM of 100k–1M+ tokens; the decision the agent needs
4
+ from it — "click Place order" — is a handful. Pushing the whole blob through the
5
+ model is slow, expensive, and *worse for accuracy* (the signal drowns in
6
+ markup). The way out is a reframe: the agent almost never needs the page — it
7
+ needs the **set of things it can do** on the page. That set is a few dozen
8
+ affordances, a few hundred tokens.
9
+
10
+ This tool produces that set, **deterministically**. The decision rule (§4.5)
11
+ settles why it is a tool and not a model job: a script may *enumerate what is
12
+ possible* (every actionable element), but it must not *decide what is reasonable*
13
+ (which one to pick) — that is the policy's judgment. So the reducer surfaces the
14
+ possible and never ranks or prunes by guessed task-relevance.
15
+
16
+ The pipeline (§11.2), run over an accessibility tree rather than the raw DOM:
17
+
18
+ 1. Walk the accessibility tree — roles, names, states — an order of magnitude
19
+ smaller than the DOM, built to answer "what can a user do here".
20
+ 2. Filter to interactive + meaningful (actions, plus the headings/labels/errors
21
+ an action needs); drop the rest.
22
+ 3. Prune the invisible — ignored, off-screen, zero-area, hidden.
23
+ 4. Resolve a stable, human-meaningful label per element.
24
+ 5. Assign a stable, opaque handle (a1, a2 …) that maps back, harness-side, to a
25
+ role+name locator. The model emits the handle, never a selector (§11.3).
26
+ 6. Emit a compact, typed representation.
27
+
28
+ And the competence boundary (§11.4): the honest risk is a false negative —
29
+ pruning the one element the task needed (a canvas button, an unlabeled icon). So
30
+ the reducer must know when it is **blind** and *signal* escalation to tier-4
31
+ vision rather than silently return an incomplete surface. ``blind`` on the
32
+ result is that signal; the ``action-surface-blind`` detector turns it into an
33
+ ESCALATE. Graceful degradation, never silent incompleteness.
34
+
35
+ The deterministic reducer (:func:`reduce_surface`) is the whole value and is
36
+ pure — it runs on an accessibility-tree snapshot with no browser, which is how a
37
+ coding harness drives it offline and how it is tested at $0. The live arm asks a
38
+ browser session for the tree (:meth:`ActionSurface.__call__` with ``op=open``)
39
+ and runs the same reducer over it.
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ from typing import Any
45
+ from urllib.parse import urlsplit
46
+
47
+ from pydantic import BaseModel, Field
48
+
49
+ from zu_core.ports import CAP_NET, CAP_SANDBOX, EGRESS_OPEN, BrowserSessionHandle, SessionBackend
50
+
51
+ from .net import validate_and_pin
52
+
53
+ _DEFAULT_IMAGE = "ghcr.io/k3-mt/zu-render-chromium:latest"
54
+
55
+ # Roles that represent something the agent can *do*. The list is generous on
56
+ # purpose — enumerating the possible is the job; choosing among it is the
57
+ # policy's. Anything actionable a real accessibility tree exposes belongs here.
58
+ INTERACTIVE_ROLES: frozenset[str] = frozenset({
59
+ "button", "link", "textbox", "searchbox", "combobox", "checkbox", "radio",
60
+ "switch", "slider", "spinbutton", "menuitem", "menuitemcheckbox",
61
+ "menuitemradio", "tab", "option", "textarea", "listbox", "menubutton",
62
+ "togglebutton", "datepicker", "colorwell",
63
+ })
64
+
65
+ # Roles whose *text* is meaningful context for choosing an action — headings
66
+ # orient, alerts/status carry the error and validation text an action needs —
67
+ # but which are not themselves actionable. We keep their names as context, never
68
+ # as affordances.
69
+ CONTEXT_ROLES: frozenset[str] = frozenset({
70
+ "heading", "alert", "status", "alertdialog", "log", "marquee",
71
+ })
72
+
73
+
74
+ class AxNode(BaseModel):
75
+ """One normalised accessibility-tree node — the reducer's input currency.
76
+
77
+ A small, serialisable shape so the reducer is pure and a harness can feed it
78
+ a captured tree directly. :func:`normalize_axtree` produces these from the
79
+ raw CDP ``Accessibility.getFullAXTree`` format.
80
+ """
81
+
82
+ role: str
83
+ name: str = ""
84
+ value: str | None = None
85
+ states: list[str] = Field(default_factory=list)
86
+ placeholder: str | None = None
87
+ description: str | None = None
88
+ # Pruning inputs. ``visible`` folds in aria-hidden/display:none/off-screen;
89
+ # ``ignored`` is the tree's own "not exposed" flag; ``bounds`` is [x,y,w,h].
90
+ visible: bool = True
91
+ ignored: bool = False
92
+ bounds: list[float] | None = None
93
+
94
+
95
+ class Affordance(BaseModel):
96
+ """One thing the policy can do, addressed by an opaque handle."""
97
+
98
+ handle: str
99
+ role: str
100
+ label: str
101
+ value: str | None = None
102
+ states: list[str] = Field(default_factory=list)
103
+
104
+
105
+ class Surface(BaseModel):
106
+ """The compact, typed reduction of a page — a few hundred tokens.
107
+
108
+ ``handle_map`` is the harness-side indirection (§11.3): handle → role+name
109
+ locator. The model only ever sees and emits handles; the durable locator
110
+ stays here and is re-resolved at action time.
111
+ """
112
+
113
+ title: str = ""
114
+ url: str = ""
115
+ affordances: list[Affordance] = Field(default_factory=list)
116
+ context: list[str] = Field(default_factory=list)
117
+ handle_map: dict[str, dict] = Field(default_factory=dict)
118
+ blind: bool = False
119
+ blind_reason: str | None = None
120
+
121
+
122
+ def _label_of(node: AxNode) -> str:
123
+ """The stable, human-meaningful label (§11.2 step 4): accessible name first
124
+ (which already folds in aria-label and an associated <label>), then
125
+ placeholder, then description. Class soup never reaches here — if none of
126
+ these is set, the element is unlabeled and counts toward blindness."""
127
+ for candidate in (node.name, node.placeholder, node.description):
128
+ if candidate and candidate.strip():
129
+ return candidate.strip()
130
+ return ""
131
+
132
+
133
+ def _is_pruned(node: AxNode) -> bool:
134
+ """Step 3 — prune the invisible. ignored / not-visible / zero-area go."""
135
+ if node.ignored or not node.visible:
136
+ return True
137
+ if node.bounds is not None and len(node.bounds) == 4:
138
+ w, h = node.bounds[2], node.bounds[3]
139
+ if w <= 0 or h <= 0:
140
+ return True
141
+ return False
142
+
143
+
144
+ def reduce_surface(
145
+ nodes: list[AxNode],
146
+ *,
147
+ title: str = "",
148
+ url: str = "",
149
+ unlabeled_ratio: float = 0.5,
150
+ ) -> Surface:
151
+ """Reduce an accessibility tree to the action surface — pure, deterministic.
152
+
153
+ Handles are assigned ``a1, a2 …`` in document (input) order over the emitted
154
+ affordances, so the same tree always yields the same handles. The blind
155
+ signal (§11.4) fires when the surface cannot be trusted to be complete: the
156
+ page had content but yielded no affordances, or too large a fraction of the
157
+ interactive elements have no resolvable label (a canvas/icon-heavy page the
158
+ accessibility tree describes poorly).
159
+ """
160
+ affordances: list[Affordance] = []
161
+ handle_map: dict[str, dict] = {}
162
+ context: list[str] = []
163
+ unlabeled = 0
164
+ interactive_seen = 0
165
+ kept_any_content = False
166
+
167
+ for node in nodes:
168
+ if _is_pruned(node):
169
+ continue
170
+ kept_any_content = True
171
+ role = node.role
172
+
173
+ if role in CONTEXT_ROLES:
174
+ label = _label_of(node)
175
+ if label:
176
+ context.append(label)
177
+ continue
178
+
179
+ if role in INTERACTIVE_ROLES:
180
+ interactive_seen += 1
181
+ label = _label_of(node)
182
+ if not label:
183
+ # Enumerated as possible, but unaddressable — a blindness signal,
184
+ # not a meaningless handle handed to the model.
185
+ unlabeled += 1
186
+ continue
187
+ handle = f"a{len(affordances) + 1}"
188
+ affordances.append(
189
+ Affordance(
190
+ handle=handle,
191
+ role=role,
192
+ label=label,
193
+ value=node.value,
194
+ states=list(node.states),
195
+ )
196
+ )
197
+ # The durable locator the model never sees (role + accessible name).
198
+ handle_map[handle] = {"role": role, "name": label}
199
+
200
+ blind = False
201
+ blind_reason: str | None = None
202
+ if not affordances and kept_any_content:
203
+ blind = True
204
+ blind_reason = "page had content but the accessibility tree yielded no addressable actions"
205
+ elif interactive_seen and (unlabeled / interactive_seen) > unlabeled_ratio:
206
+ blind = True
207
+ blind_reason = (
208
+ f"{unlabeled}/{interactive_seen} interactive elements are unlabeled "
209
+ "in the accessibility tree — too thin to trust"
210
+ )
211
+
212
+ return Surface(
213
+ title=title,
214
+ url=url,
215
+ affordances=affordances,
216
+ context=context,
217
+ handle_map=handle_map,
218
+ blind=blind,
219
+ blind_reason=blind_reason,
220
+ )
221
+
222
+
223
+ def _ax_string(field: Any) -> str:
224
+ """Read a CDP AX value object ``{"type":...,"value":...}`` as a string."""
225
+ if isinstance(field, dict):
226
+ v = field.get("value")
227
+ return str(v) if v is not None else ""
228
+ return ""
229
+
230
+
231
+ def normalize_axtree(cdp_nodes: list[dict]) -> list[AxNode]:
232
+ """Normalise the raw CDP ``Accessibility.getFullAXTree`` node list into
233
+ :class:`AxNode` records, in document (pre-order) order as CDP returns them.
234
+
235
+ CDP shape per node: ``role``/``name`` are ``{type,value}`` objects;
236
+ ``properties`` is a list of ``{name, value:{value}}``; ``ignored`` is a bool.
237
+ States we surface: disabled, checked, expanded, required, focused, selected,
238
+ invalid. Placeholder/description/value are read from their AX properties.
239
+ """
240
+ out: list[AxNode] = []
241
+ state_props = {"disabled", "checked", "expanded", "required", "focused", "selected", "invalid"}
242
+ for n in cdp_nodes:
243
+ role = _ax_string(n.get("role"))
244
+ if not role:
245
+ continue
246
+ props = {p.get("name"): p.get("value", {}) for p in n.get("properties", []) if isinstance(p, dict)}
247
+ states: list[str] = []
248
+ for sp in sorted(state_props):
249
+ val = props.get(sp, {})
250
+ v = val.get("value") if isinstance(val, dict) else None
251
+ if v is True or (isinstance(v, str) and v not in ("false", "")):
252
+ states.append(sp if not isinstance(v, str) or v == "true" else f"{sp}:{v}")
253
+ out.append(
254
+ AxNode(
255
+ role=role,
256
+ name=_ax_string(n.get("name")),
257
+ value=_ax_string(n.get("value")) or None,
258
+ states=states,
259
+ placeholder=_ax_string(props.get("placeholder")) or None,
260
+ description=_ax_string(n.get("description")) or None,
261
+ ignored=bool(n.get("ignored", False)),
262
+ # CDP marks unexposed nodes via ``ignored``; visibility off-screen
263
+ # is folded into ``hidden`` when the server supplies bounds.
264
+ visible=not bool(props.get("hidden", {}).get("value", False))
265
+ if isinstance(props.get("hidden"), dict) else True,
266
+ )
267
+ )
268
+ return out
269
+
270
+
271
+ class ActionSurface:
272
+ """Tier-3 tool: reduce a page to its action surface (and keep the handle map).
273
+
274
+ Two ways in, one reducer:
275
+
276
+ * ``op=reduce`` (default) — reduce a tree the caller already has. Pass
277
+ ``nodes`` (AxNode dicts) or raw ``axtree`` (CDP nodes), plus ``title`` /
278
+ ``url``. No browser, fully offline — the harness-driven and tested path.
279
+ * ``op=open`` — open ``url`` in a headless browser session, ask it for the
280
+ accessibility tree, and reduce that. The live arm.
281
+
282
+ After a reduction the handle→locator map is held on the instance for the run;
283
+ ``op=resolve`` returns the durable locator for a handle (a stale handle is an
284
+ escalation, not a crash — the caller re-resolves at action time, §11.3).
285
+ """
286
+
287
+ name = "action_surface"
288
+ tier = 3 # the accessibility-tree tier; unlocked by a detector ESCALATE
289
+ schema = {
290
+ "name": "action_surface",
291
+ "description": (
292
+ "Reduce a web page to the compact SET OF THINGS YOU CAN DO on it — a "
293
+ "flat list of affordances (button/link/textbox/…) each with an opaque "
294
+ "handle (a1, a2 …) and a human label. You choose a handle and act on "
295
+ "it; you never see or emit a CSS selector. op=open a url to capture and "
296
+ "reduce its accessibility tree; op=resolve a handle to its locator. If "
297
+ "'blind' is true the tree is too thin to trust — escalate to vision."
298
+ ),
299
+ "parameters": {
300
+ "type": "object",
301
+ "properties": {
302
+ "op": {"type": "string", "enum": ["reduce", "open", "resolve"]},
303
+ "url": {"type": "string", "description": "for op=open: the page to reduce"},
304
+ "handle": {"type": "string", "description": "for op=resolve: the handle to resolve"},
305
+ "axtree": {"type": "array", "items": {"type": "object"},
306
+ "description": "for op=reduce: raw CDP getFullAXTree nodes"},
307
+ "nodes": {"type": "array", "items": {"type": "object"},
308
+ "description": "for op=reduce: pre-normalised AxNode dicts"},
309
+ "title": {"type": "string"},
310
+ },
311
+ "required": ["op"],
312
+ },
313
+ }
314
+ prompt_fragment = (
315
+ "action_surface(op=open, url): reduce a page to a short list of affordances "
316
+ "(handles a1,a2,… with labels) instead of reading the whole DOM. Pick a handle "
317
+ "to act on; resolve(handle) gives its locator. 'blind' means escalate to vision."
318
+ )
319
+ capabilities = frozenset({CAP_NET, CAP_SANDBOX})
320
+ egress = frozenset({EGRESS_OPEN})
321
+
322
+ def __init__(
323
+ self,
324
+ backend: SessionBackend | None = None,
325
+ image: str = _DEFAULT_IMAGE,
326
+ *,
327
+ allow_private: bool | None = None,
328
+ unlabeled_ratio: float = 0.5,
329
+ ) -> None:
330
+ self._backend = backend
331
+ self.image = image
332
+ self.allow_private = allow_private
333
+ self.unlabeled_ratio = unlabeled_ratio
334
+ self._handle_map: dict[str, dict] = {}
335
+ self._session: BrowserSessionHandle | None = None
336
+
337
+ def _resolve_backend(self) -> SessionBackend:
338
+ if self._backend is None:
339
+ from zu_backends.local_docker import LocalDockerBackend
340
+
341
+ self._backend = LocalDockerBackend()
342
+ return self._backend
343
+
344
+ async def __call__(
345
+ self,
346
+ ctx: Any,
347
+ op: str = "reduce",
348
+ url: str | None = None,
349
+ handle: str | None = None,
350
+ axtree: list | None = None,
351
+ nodes: list | None = None,
352
+ title: str | None = None,
353
+ ) -> dict:
354
+ if op == "reduce":
355
+ return self._reduce_op(nodes=nodes, axtree=axtree, title=title or "", url=url or "")
356
+
357
+ if op == "resolve":
358
+ if not handle:
359
+ return {"error": "op=resolve requires a handle"}
360
+ locator = self._handle_map.get(handle)
361
+ if locator is None:
362
+ # Stale/unknown handle: signal a re-resolve, never a crash (§11.3).
363
+ return {"stale_handle": handle,
364
+ "error": f"handle {handle!r} is not on the current surface; re-capture"}
365
+ return {"handle": handle, "locator": locator}
366
+
367
+ if op == "open":
368
+ if not url:
369
+ return {"error": "op=open requires a url"}
370
+ return await self._open_op(url, title or "")
371
+
372
+ return {"error": f"unknown op {op!r}; use reduce/open/resolve"}
373
+
374
+ def _reduce_op(self, *, nodes: list | None, axtree: list | None, title: str, url: str) -> dict:
375
+ if nodes is not None:
376
+ ax = [n if isinstance(n, AxNode) else AxNode.model_validate(n) for n in nodes]
377
+ elif axtree is not None:
378
+ ax = normalize_axtree([n for n in axtree if isinstance(n, dict)])
379
+ else:
380
+ return {"error": "op=reduce requires 'nodes' or 'axtree'"}
381
+ surface = reduce_surface(ax, title=title, url=url, unlabeled_ratio=self.unlabeled_ratio)
382
+ return self._emit(surface)
383
+
384
+ async def _open_op(self, url: str, title: str) -> dict:
385
+ await self._close_session()
386
+ pinned_ip = validate_and_pin(url, allow_private=self.allow_private)
387
+ spec: dict[str, Any] = {"image": self.image, "tier": self.tier, "network": True}
388
+ host = urlsplit(url).hostname
389
+ if pinned_ip is not None and host:
390
+ spec["extra_hosts"] = {host: pinned_ip}
391
+ self._session = await self._resolve_backend().open_session(spec)
392
+ # Ask the session for the accessibility tree. The browser server returns
393
+ # ``{axtree: [...CDP nodes...], title, url}``; an older server that lacks
394
+ # the op returns an error, which we surface (not a crash).
395
+ resp = await self._session.send({"op": "axtree", "url": url})
396
+ if not isinstance(resp, dict) or resp.get("axtree") is None:
397
+ err = resp.get("error") if isinstance(resp, dict) else "bad session response"
398
+ return {"error": f"could not capture accessibility tree: {err}"}
399
+ ax = normalize_axtree([n for n in resp["axtree"] if isinstance(n, dict)])
400
+ surface = reduce_surface(
401
+ ax,
402
+ title=title or str(resp.get("title", "")),
403
+ url=str(resp.get("url", url)),
404
+ unlabeled_ratio=self.unlabeled_ratio,
405
+ )
406
+ return self._emit(surface)
407
+
408
+ def _emit(self, surface: Surface) -> dict:
409
+ """The surface as a loop-friendly observation. The handle map is held on
410
+ the instance (harness-side) and echoed for the harness; ``surface_blind``
411
+ is the top-level flag the blind detector reads."""
412
+ self._handle_map = dict(surface.handle_map)
413
+ return {
414
+ "action_surface": surface.model_dump(exclude={"handle_map"}),
415
+ "handle_map": surface.handle_map,
416
+ "surface_blind": surface.blind,
417
+ }
418
+
419
+ async def _close_session(self) -> None:
420
+ if self._session is not None:
421
+ session, self._session = self._session, None
422
+ try:
423
+ await session.close()
424
+ except Exception: # noqa: BLE001 — teardown must not raise over a result
425
+ pass
426
+
427
+ async def aclose(self) -> None:
428
+ """Close any lingering session — for run teardown so a container never leaks."""
429
+ await self._close_session()