zu-shadow 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_shadow/__init__.py ADDED
@@ -0,0 +1,46 @@
1
+ """zu-shadow — author a production agent by DEMONSTRATION (§2.8).
2
+
3
+ A Shadow recording *is* the event bus run over a HUMAN session: the human is the
4
+ policy for that one run, so recording costs almost nothing architecturally — the
5
+ recorder folds an abstract input/CDP stream into ``data.shadow.*`` events on the
6
+ same append-only log everything else uses. Four disciplines are load-bearing:
7
+
8
+ * **Redaction is DEFAULT-ON and runs BEFORE append** (``redaction``): secrets —
9
+ passwords, ``Authorization``/``Cookie`` headers, tokens/API keys, configured PII
10
+ — never reach :meth:`EventSink.append`. The "why" intent text is redacted too.
11
+ * **Capture is SEMANTIC** (``capture``): a user action is named by its target's
12
+ ``{role, name, label}`` (the core ``surface`` currency, shared with §4 handles /
13
+ §5 SurfaceView) — never a CSS selector or pixel coordinate.
14
+ * **The synthesizer is itself a Zu agent** (``synthesizer``): driven by a
15
+ ``ModelProvider`` (offline-tested with ``ScriptedProvider``), it PROPOSES an
16
+ agent spec + an induced ``Fsm`` + ``Invariant``s; the egress allowlist writes
17
+ itself from the recorded ``network.response`` hosts.
18
+ * **Promotion is GATED by reproduced outcome** (``replay_gate``): a synthesized
19
+ agent does not run on real data until it reproduces the recorded outcome, reusing
20
+ zu-cli's ``offline.py``/``build.py``. The "why" resolutions are reviewed, never
21
+ auto-promoted.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from .capture import SemanticTarget, capture_click, capture_navigate, capture_type
27
+ from .recorder import RecordedSession, Recorder
28
+ from .redaction import RedactionPolicy, redact_event, redact_text
29
+ from .replay_gate import PromotionVerdict, verify_and_gate
30
+ from .synthesizer import SynthesisResult, Synthesizer
31
+
32
+ __all__ = [
33
+ "PromotionVerdict",
34
+ "RecordedSession",
35
+ "Recorder",
36
+ "RedactionPolicy",
37
+ "SemanticTarget",
38
+ "SynthesisResult",
39
+ "Synthesizer",
40
+ "capture_click",
41
+ "capture_navigate",
42
+ "capture_type",
43
+ "redact_event",
44
+ "redact_text",
45
+ "verify_and_gate",
46
+ ]
zu_shadow/capture.py ADDED
@@ -0,0 +1,119 @@
1
+ """SEMANTIC-TARGET capture — name an action by WHAT it acts on, not WHERE.
2
+
3
+ Every captured user action identifies its target by ``{role, name, label}`` — the
4
+ same accessibility-grounded currency the core ``surface`` types speak (§4 handles /
5
+ §5 ``SurfaceView``). NEVER a CSS selector, an XPath, or a pixel coordinate: those
6
+ are brittle (a redesign breaks them) and untransferable (they cannot feed the §4
7
+ locator / §5 recognizer). A semantic target re-resolves on a changed page, which is
8
+ the whole reason a synthesized agent can be *resilient* rather than pixel-frozen.
9
+
10
+ ``SemanticTarget`` is a thin, frozen value object that reuses ``role``/``label``
11
+ exactly as :class:`zu_core.surface.SurfaceAffordance` does, plus the accessible
12
+ ``name`` (the click target's accessible name). The capture helpers turn a raw
13
+ abstract-stream event into a redaction-ready ``data.shadow.*`` payload.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from pydantic import BaseModel
19
+
20
+ from zu_core import events as ev
21
+ from zu_core.surface import SurfaceAffordance
22
+
23
+ # Target role/name/label tokens that mark an input as a CREDENTIAL field, so the
24
+ # recorder records its typed value under a credential-named key the redaction stage
25
+ # blanks wholesale — a password is never recorded verbatim, even pre-redaction-sweep.
26
+ _CREDENTIAL_TARGET_HINTS: tuple[str, ...] = ("password", "passwd", "secret", "token",
27
+ "api key", "api_key", "apikey", "otp",
28
+ "cvv", "cvc", "pin", "security code",
29
+ # payment-card secrets — the agent must NEVER hold
30
+ # these; a real payment goes through the §8 broker.
31
+ "card number", "cardnumber", "card no",
32
+ "credit card", "debit card", "expiration", "expiry",
33
+ "iban", "sort code", "account number")
34
+
35
+
36
+ class SemanticTarget(BaseModel):
37
+ """A user-action target, identified the way the core surface currency does:
38
+ ``role`` (a free string, e.g. ``button``/``link``/``textbox``), the accessible
39
+ ``name``, and a human ``label``. NO selector, NO coordinates — re-resolvable on
40
+ a changed page. Frozen so it is a stable value on the log."""
41
+
42
+ model_config = {"frozen": True}
43
+
44
+ role: str
45
+ name: str = ""
46
+ label: str = ""
47
+
48
+ @classmethod
49
+ def from_affordance(cls, a: SurfaceAffordance, *, name: str = "") -> SemanticTarget:
50
+ """Build a target from a core ``SurfaceAffordance`` — the bridge from a §5
51
+ SurfaceView the live recorder reduced to a recorded action target. The
52
+ affordance's ``label`` carries through; ``name`` is the accessible name the
53
+ CDP locate step resolved (the affordance has no separate name field)."""
54
+ return cls(role=a.role, name=name or a.label, label=a.label)
55
+
56
+ def to_payload(self) -> dict:
57
+ return {"role": self.role, "name": self.name, "label": self.label}
58
+
59
+
60
+ def capture_click(target: SemanticTarget, *, intent: str | None = None) -> tuple[str, dict]:
61
+ """A ``data.shadow.user.click`` (type, payload). ``intent`` is the OPTIONAL,
62
+ reviewed "why" narration — carried but NEVER auto-promoted into the agent."""
63
+ payload: dict = {"target": target.to_payload()}
64
+ if intent is not None:
65
+ payload["intent"] = intent
66
+ return ev.SHADOW_USER_CLICK, payload
67
+
68
+
69
+ def _is_credential_target(target: SemanticTarget) -> bool:
70
+ """A type target whose role/name/label marks it as a credential input — so its
71
+ value is recorded under a credential-named key the redaction stage blanks."""
72
+ blob = f"{target.role} {target.name} {target.label}".lower()
73
+ return any(h in blob for h in _CREDENTIAL_TARGET_HINTS)
74
+
75
+
76
+ def capture_type(target: SemanticTarget, value: str, *,
77
+ intent: str | None = None) -> tuple[str, dict]:
78
+ """A ``data.shadow.user.type`` (type, payload). The recorder MARKS a credential
79
+ target: a password/secret field's value goes under a ``password`` key that the
80
+ redaction stage (run before append) blanks wholesale, so a credential is never
81
+ recorded verbatim. A non-credential value rides under ``value`` and is still
82
+ swept for token shapes by redaction. Capture marks; redaction enforces the floor."""
83
+ payload: dict = {"target": target.to_payload()}
84
+ if _is_credential_target(target):
85
+ payload["password"] = value # credential-named ⇒ redaction blanks it wholesale
86
+ else:
87
+ payload["value"] = value
88
+ if intent is not None:
89
+ payload["intent"] = intent
90
+ return ev.SHADOW_USER_TYPE, payload
91
+
92
+
93
+ def capture_navigate(url: str, *, intent: str | None = None) -> tuple[str, dict]:
94
+ """A ``data.shadow.user.navigate`` (type, payload). The URL is redaction-swept
95
+ (credentials/tokens in the query stripped) before it reaches the log."""
96
+ payload: dict = {"url": url}
97
+ if intent is not None:
98
+ payload["intent"] = intent
99
+ return ev.SHADOW_USER_NAVIGATE, payload
100
+
101
+
102
+ def capture_page_loaded(url: str, title: str) -> tuple[str, dict]:
103
+ """A ``data.shadow.page.loaded`` (type, payload) — a settled page; the locus a
104
+ subsequent action's semantic target re-resolves against."""
105
+ return ev.SHADOW_PAGE_LOADED, {"url": url, "title": title}
106
+
107
+
108
+ def capture_network_response(url: str, status: int, host: str) -> tuple[str, dict]:
109
+ """A ``data.shadow.network.response`` (type, payload) — METADATA only (no body,
110
+ no headers beyond the host). The synthesized agent's egress allowlist is induced
111
+ from the ``host`` values across these events."""
112
+ return ev.SHADOW_NETWORK_RESPONSE, {"url": url, "status": status, "host": host}
113
+
114
+
115
+ def capture_scroll(direction: str, y: int = 0) -> tuple[str, dict]:
116
+ """A ``data.shadow.user.scroll`` (type, payload) — a settled scroll up/down. Context,
117
+ not an action step: it records that the human had to scroll to reach the next thing."""
118
+ d = direction if direction in ("up", "down") else "down"
119
+ return ev.SHADOW_USER_SCROLL, {"direction": d, "y": int(y)}
zu_shadow/executor.py ADDED
@@ -0,0 +1,273 @@
1
+ """The live executor — the agent USES a Shadow recording to do the task itself, and
2
+ GENERALISES it.
3
+
4
+ Record the task once (buy a muzzle); Shadow synthesises the path; this executor then
5
+ RE-RUNS it on the live site — and the next run can vary it (search "collars" instead of
6
+ "muzzles"). It is the §1.5 division made concrete: the recording bounds the action space
7
+ (the demonstrated procedure + semantic anchors), and where the live page diverges from
8
+ the demonstration the MODEL proposes within the bounded affordance set while the harness
9
+ disposes. Three resolution modes per step:
10
+
11
+ * EXACT — the demonstrated target still exists (a fixed-flow control like "Add to
12
+ cart" / "Check out") → re-resolve it by role+name and act.
13
+ * PARAM — a typed value is overridden ("muzzles" → "collars"; the customer's own
14
+ name/address) → type the override into the field.
15
+ * MODEL — the demonstrated specific target is GONE (you searched collars, so the
16
+ muzzle product link isn't there) → the model picks the best handle from the
17
+ CURRENT affordances (it emits a handle, never a selector), generalising.
18
+
19
+ The COMMIT BOUNDARY (a payment / place-order step) is never auto-crossed: the executor
20
+ escalates before it (a real payment is a §8 brokered capability, never the captured card).
21
+ The browser is an injected ``BrowserSession`` — a fake drives it at $0 in tests; the live
22
+ Playwright binding drives real Chrome.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import re
28
+ from dataclasses import dataclass, field
29
+ from typing import Any, Protocol, runtime_checkable
30
+
31
+ from zu_core import events as ev
32
+ from zu_core.ports import ModelProvider, ModelRequest
33
+ from zu_core.surface import SurfaceView
34
+
35
+ from .redaction import REDACTED
36
+
37
+ _CLICKABLE = frozenset({"button", "link", "checkbox", "radio", "switch", "tab",
38
+ "menuitem", "option", "row", "gridcell"})
39
+ _FIELDS = frozenset({"textbox", "searchbox", "combobox"})
40
+ # Steps whose name names an irreversible money/commit action — never auto-crossed.
41
+ _COMMIT = re.compile(r"(?i)\b(place order|pay now|pay$|buy now|complete (order|purchase|"
42
+ r"payment)|confirm (and )?pay|submit order|checkout & pay)\b")
43
+ # A payment-card field — the agent must NEVER type a card; a real payment is a §8 brokered
44
+ # capability. A redacted secret value means the same: the agent doesn't hold the secret.
45
+ _PAYMENT_FIELD = re.compile(r"(?i)\b(card number|cardnumber|card no|credit card|debit card|"
46
+ r"expiration|expiry|cvv|cvc|security code|iban|sort code|"
47
+ r"account number)\b")
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class Step:
52
+ """One step of the demonstrated path: what to do, on what (by role+name), the value
53
+ typed, the human's why, and whether it crosses the commit boundary."""
54
+ kind: str # "click" | "type" | "navigate"
55
+ role: str = ""
56
+ name: str = ""
57
+ value: str | None = None
58
+ intent: str | None = None
59
+ committing: bool = False
60
+
61
+
62
+ @dataclass
63
+ class StepOutcome:
64
+ step: Step
65
+ via: str # "exact" | "param" | "model" | "navigate" | "escalated" | "unresolved"
66
+ handle: str | None = None
67
+ value: str | None = None
68
+ ok: bool = True
69
+ detail: str = ""
70
+
71
+
72
+ @dataclass
73
+ class RunReport:
74
+ outcomes: list[StepOutcome] = field(default_factory=list)
75
+ completed: bool = False
76
+ escalated_at: int | None = None
77
+
78
+ @property
79
+ def acted(self) -> list[StepOutcome]:
80
+ return [o for o in self.outcomes if o.handle is not None]
81
+
82
+
83
+ @runtime_checkable
84
+ class BrowserSession(Protocol):
85
+ """The live browser the executor drives. The fake test double and the live Playwright
86
+ binding both satisfy this. ``perceive`` returns the CURRENT page's affordances (the
87
+ Action Surface); ``act`` operates one by its opaque handle (never a selector)."""
88
+
89
+ def perceive(self) -> SurfaceView: ...
90
+ def act(self, handle: str, kind: str, value: str | None = None) -> None: ...
91
+ def current_url(self) -> str: ...
92
+
93
+
94
+ def _norm(s: str | None) -> str:
95
+ return re.sub(r"\s+", " ", (s or "")).strip().lower()
96
+
97
+
98
+ def steps_from_recording(events: list[Any]) -> list[Step]:
99
+ """Turn a recording's events into the executable path: clicks/types/navigates, with the
100
+ same cleanup the synthesizer applies (drop a focus-click before a type on the same
101
+ target; collapse a consecutive duplicate), and the commit boundary marked."""
102
+ raw: list[Step] = []
103
+ for e in events:
104
+ t = getattr(e, "type", "")
105
+ p = getattr(e, "payload", {}) or {}
106
+ if t == ev.SHADOW_USER_NAVIGATE:
107
+ raw.append(Step(kind="navigate", value=p.get("url", "")))
108
+ continue
109
+ if t not in (ev.SHADOW_USER_CLICK, ev.SHADOW_USER_TYPE):
110
+ continue
111
+ tgt = p.get("target", {}) or {}
112
+ kind = "click" if t == ev.SHADOW_USER_CLICK else "type"
113
+ name = tgt.get("name") or tgt.get("label") or ""
114
+ value = p.get("value")
115
+ if value is None:
116
+ value = p.get("password") # a credential field's (redacted) value lives under this key
117
+ committing = (
118
+ (kind == "click" and bool(_COMMIT.search(name))) # an irreversible order/pay click
119
+ or value == REDACTED # a step needing a secret the agent lacks
120
+ or bool(_PAYMENT_FIELD.search(name)) # a payment-card field — brokered (§8)
121
+ )
122
+ raw.append(Step(kind=kind, role=tgt.get("role", ""), name=name,
123
+ value=value, intent=p.get("intent"), committing=committing))
124
+ # R2: drop a focus-click immediately followed by a type on the same target. R1: collapse
125
+ # a consecutive duplicate. (The whys live on the events and are reviewed separately.)
126
+ out: list[Step] = []
127
+ for i, s in enumerate(raw):
128
+ if s.kind == "click" and i + 1 < len(raw):
129
+ nxt = raw[i + 1]
130
+ if nxt.kind == "type" and _norm(nxt.name) == _norm(s.name):
131
+ continue
132
+ if out and out[-1].kind == s.kind and _norm(out[-1].name) == _norm(s.name) \
133
+ and out[-1].value == s.value:
134
+ continue
135
+ out.append(s)
136
+ return out
137
+
138
+
139
+ def _match(surface: SurfaceView, role: str, name: str) -> str | None:
140
+ """Re-resolve the demonstrated target on the CURRENT page by role+name: an exact label
141
+ match first, then a contained match (robust to small label drift)."""
142
+ nm = _norm(name)
143
+ if not nm:
144
+ return None
145
+ for a in surface.affordances:
146
+ if _norm(a.label) == nm:
147
+ return a.handle
148
+ for a in surface.affordances:
149
+ al = _norm(a.label)
150
+ if al and (nm in al or al in nm) and (not role or a.role == role or
151
+ (role in _FIELDS and a.role in _FIELDS)):
152
+ return a.handle
153
+ return None
154
+
155
+
156
+ def _first_field(surface: SurfaceView) -> str | None:
157
+ for a in surface.affordances:
158
+ if a.role in _FIELDS:
159
+ return a.handle
160
+ return None
161
+
162
+
163
+ # A control that dismisses a blocking overlay (cookie/consent banner, popup) that the
164
+ # demonstration didn't include — generic verbs only, anchored so it matches a dismiss button,
165
+ # not "Accept terms" text. Accepting/closing a banner is reversible; it just unblocks the step.
166
+ _DISMISS = re.compile(r"(?i)^(accept( all)?( cookies)?|agree|i agree|allow( all)?|got it|"
167
+ r"ok(ay)?|continue|close|dismiss|no thanks|reject( all)?|"
168
+ r"accept all cookies)$")
169
+
170
+
171
+ def _interstitial(surface: SurfaceView) -> str | None:
172
+ """A dismiss control for a cookie/consent/popup overlay blocking the step — so the run
173
+ isn't derailed by an interstitial that wasn't in the recording."""
174
+ for a in surface.affordances:
175
+ if a.role in ("button", "link") and _DISMISS.match(_norm(a.label)):
176
+ return a.handle
177
+ return None
178
+
179
+
180
+ def _resolve_exact(step: Step, surface: SurfaceView,
181
+ ov: dict[str, str]) -> tuple[str | None, str, str | None]:
182
+ """Resolve a step WITHOUT the model: EXACT re-resolve (the demonstrated target by
183
+ role+name) or PARAM (type an override into a field). The model-choice generalisation is
184
+ the LAST resort, tried only after exact retries fail — so a lazy-loading or banner-blocked
185
+ page is retried for the real control instead of the model grabbing a wrong one."""
186
+ value = ov.get(_norm(step.name), step.value) if step.kind == "type" else None
187
+ handle = _match(surface, step.role, step.name)
188
+ if handle is not None:
189
+ return handle, "exact", value
190
+ if step.kind == "type":
191
+ f = _first_field(surface)
192
+ if f is not None:
193
+ return f, "param", value
194
+ return None, "", value
195
+
196
+
197
+ async def _model_choose(step: Step, surface: SurfaceView, model: ModelProvider) -> str | None:
198
+ """GENERALISE: the demonstrated control is gone, so the model picks the handle that best
199
+ continues the task — bounded to the CURRENT affordances (it emits a handle, never a
200
+ selector). A reply that names no real handle resolves to None → escalate, never guess."""
201
+ clickable = [a for a in surface.affordances if a.role in _CLICKABLE]
202
+ if not clickable:
203
+ return None
204
+ listing = "\n".join(f'{a.handle}: {a.role} "{a.label}"' for a in clickable)
205
+ goal = step.intent or f"{step.kind} {step.name}".strip()
206
+ req = ModelRequest(messages=[
207
+ {"role": "system", "content": "You drive a web agent following a known task on a live "
208
+ "site. The demonstrated control is not on this page. Pick the SINGLE affordance handle "
209
+ "that best continues the task. Reply with ONLY the handle (e.g. a3)."},
210
+ {"role": "user", "content": f"Step to continue: {goal}\nAffordances:\n{listing}\n\nHandle:"},
211
+ ])
212
+ resp = await model.complete(req)
213
+ handles = {a.handle for a in clickable}
214
+ for tok in re.findall(r"[A-Za-z]+\w*", resp.text or ""):
215
+ if tok in handles:
216
+ return tok
217
+ return None
218
+
219
+
220
+ async def execute(
221
+ steps: list[Step],
222
+ session: BrowserSession,
223
+ model: ModelProvider,
224
+ *,
225
+ overrides: dict[str, str] | None = None,
226
+ on_commit: str = "escalate",
227
+ max_retries: int = 2,
228
+ ) -> RunReport:
229
+ """Drive the demonstrated path on the live ``session``, generalising via ``overrides``
230
+ (a typed value keyed by the step's name, e.g. {"search": "collars"}) and the model for
231
+ unmatched controls. When a target isn't found, dismiss a blocking interstitial (cookie /
232
+ consent / popup) and RE-PERCEIVE before escalating — so a banner that wasn't in the
233
+ recording, or content still loading, doesn't derail the run. Stops at the commit boundary."""
234
+ ov = {_norm(k): v for k, v in (overrides or {}).items()}
235
+ report = RunReport()
236
+ for i, step in enumerate(steps):
237
+ if step.kind == "navigate":
238
+ report.outcomes.append(StepOutcome(step, "navigate")) # a consequence of the prior act
239
+ continue
240
+ if step.committing and on_commit == "escalate":
241
+ report.outcomes.append(StepOutcome(step, "escalated", ok=False,
242
+ detail="commit boundary — route to a human / the broker"))
243
+ report.escalated_at = i
244
+ return report
245
+
246
+ surface = session.perceive()
247
+ handle, via, value = _resolve_exact(step, surface, ov)
248
+ tries = 0
249
+ while handle is None and tries < max_retries:
250
+ inter = _interstitial(surface)
251
+ if inter is not None: # dismiss a cookie/consent/popup that wasn't demonstrated
252
+ session.act(inter, "click", None)
253
+ report.outcomes.append(StepOutcome(
254
+ Step(kind="click", role="button", name="(dismiss interstitial)"),
255
+ "interstitial", handle=inter))
256
+ surface = session.perceive() # re-perceive: the banner is gone / content settled
257
+ handle, via, value = _resolve_exact(step, surface, ov)
258
+ tries += 1
259
+
260
+ if handle is None and step.kind == "click": # GENERALISE only after exact retries fail
261
+ handle, via = await _model_choose(step, surface, model), "model"
262
+
263
+ if handle is None:
264
+ report.outcomes.append(StepOutcome(step, "unresolved", ok=False,
265
+ detail="no resolvable target — escalate"))
266
+ report.escalated_at = i
267
+ return report
268
+
269
+ session.act(handle, step.kind, value)
270
+ report.outcomes.append(StepOutcome(step, via, handle=handle, value=value))
271
+
272
+ report.completed = True
273
+ return report
zu_shadow/live.py ADDED
@@ -0,0 +1,106 @@
1
+ """The LIVE recorder binding — real Chromium + a real human, over CDP.
2
+
3
+ This is the demo/manual half of Shadow: it drives a real browser and watches a real
4
+ human do the task, translating CDP events into the SAME abstract ``RawInput`` items
5
+ the offline recorder consumes. Because the live binding produces the identical
6
+ stream the synthetic tests do, the offline core (recorder → redaction → synthesizer
7
+ → replay gate) is exercised exactly as it is live — nothing about the live path is
8
+ special-cased downstream.
9
+
10
+ It is NOT unit-tested offline (it needs a real Chromium + a human), so it sits
11
+ behind the ``live`` extra and this manual entrypoint, guarded so importing it
12
+ without the browser tools fails with an actionable message rather than at runtime.
13
+ The accessibility tree (CDP ``Accessibility.getFullAXTree`` / the §4 locate op) is
14
+ what makes capture SEMANTIC: each interacted node is resolved to its
15
+ ``{role, name, label}`` — never a selector or coordinate — before it becomes a
16
+ ``RawInput``.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from collections.abc import AsyncIterator
22
+
23
+ from zu_core.bus import EventBus
24
+
25
+ from .capture import SemanticTarget
26
+ from .recorder import RawInput, RecordedSession, Recorder
27
+ from .redaction import RedactionPolicy
28
+
29
+
30
+ def _require_browser() -> None:
31
+ try:
32
+ import zu_tools.browser # noqa: F401
33
+ except ModuleNotFoundError as exc: # pragma: no cover - live-only path
34
+ raise RuntimeError(
35
+ "the live Shadow recorder needs the browser tools: pip install 'zu-shadow[live]'. "
36
+ "The offline core (synthetic stream → recorder → synthesizer → gate) needs none."
37
+ ) from exc
38
+
39
+
40
+ def ax_node_to_target(node: dict) -> SemanticTarget:
41
+ """Resolve one CDP accessibility node to a SEMANTIC target — its role, accessible
42
+ name, and label. The single place selectors/coordinates are deliberately DROPPED
43
+ (a live click's pixel position never becomes part of the record)."""
44
+ name = str(node.get("name", "") or "")
45
+ return SemanticTarget(
46
+ role=str(node.get("role", "") or "generic"),
47
+ name=name,
48
+ label=str(node.get("label", "") or name),
49
+ )
50
+
51
+
52
+ async def record_live( # pragma: no cover - live-only, manual entrypoint
53
+ cdp_events: AsyncIterator[dict],
54
+ *,
55
+ site: str,
56
+ bus: EventBus | None = None,
57
+ policy: RedactionPolicy | None = None,
58
+ ) -> RecordedSession:
59
+ """Drive a live recording from a stream of CDP events. Translates each CDP event
60
+ into a ``RawInput`` (resolving interacted nodes to semantic targets via the AX
61
+ tree) and folds it through the SAME :class:`Recorder` the offline path uses, so
62
+ redaction-before-append holds identically on a live session.
63
+
64
+ Manual entrypoint: requires the browser tools and a real CDP source. Wire a real
65
+ Chromium target's CDP feed as ``cdp_events`` (e.g. via ``zu_tools.browser``).
66
+ """
67
+ _require_browser()
68
+ bus = bus or EventBus()
69
+ recorder = Recorder(bus, site=site, policy=policy)
70
+ await recorder.start()
71
+ async for cdp in cdp_events:
72
+ item = _cdp_to_raw(cdp)
73
+ if item is not None:
74
+ await recorder.record(item)
75
+ await recorder.end()
76
+ events = await bus.query()
77
+ return RecordedSession(site=site, events=list(events))
78
+
79
+
80
+ def _cdp_to_raw(cdp: dict) -> RawInput | None:
81
+ """Map one CDP event to a ``RawInput`` (or None to skip). Mirrors the abstract
82
+ kinds the offline stream uses; the live CDP method names are translated here so
83
+ the rest of Shadow never sees CDP."""
84
+ method = cdp.get("method", "")
85
+ params = cdp.get("params", {}) or {}
86
+ if method == "Input.dispatchMouseEvent" and params.get("type") == "mousePressed":
87
+ node = params.get("ax_node", {})
88
+ return RawInput(kind="click", target=ax_node_to_target(node),
89
+ intent=params.get("intent"))
90
+ if method == "Input.insertText":
91
+ node = params.get("ax_node", {})
92
+ return RawInput(kind="type", target=ax_node_to_target(node),
93
+ value=params.get("text", ""), intent=params.get("intent"))
94
+ if method == "Page.navigate":
95
+ return RawInput(kind="navigate", url=params.get("url", ""),
96
+ intent=params.get("intent"))
97
+ if method == "Page.frameStoppedLoading" or method == "Page.loadEventFired":
98
+ return RawInput(kind="page", url=params.get("url", ""), title=params.get("title", ""))
99
+ if method == "Network.responseReceived":
100
+ resp = params.get("response", {}) or {}
101
+ url = resp.get("url", "")
102
+ from urllib.parse import urlsplit
103
+
104
+ return RawInput(kind="network", url=url, status=int(resp.get("status", 0)),
105
+ host=urlsplit(url).hostname or "")
106
+ return None