zu-shadow 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_shadow/__init__.py +46 -0
- zu_shadow/capture.py +119 -0
- zu_shadow/executor.py +273 -0
- zu_shadow/live.py +106 -0
- zu_shadow/live_capture.py +340 -0
- zu_shadow/live_executor.py +242 -0
- zu_shadow/recorder.py +190 -0
- zu_shadow/redaction.py +213 -0
- zu_shadow/replay_gate.py +133 -0
- zu_shadow/scale.py +87 -0
- zu_shadow/synthesizer.py +346 -0
- zu_shadow-0.1.13.dist-info/METADATA +81 -0
- zu_shadow-0.1.13.dist-info/RECORD +14 -0
- zu_shadow-0.1.13.dist-info/WHEEL +4 -0
zu_shadow/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""zu-shadow — author a production agent by DEMONSTRATION (§2.8).
|
|
2
|
+
|
|
3
|
+
A Shadow recording *is* the event bus run over a HUMAN session: the human is the
|
|
4
|
+
policy for that one run, so recording costs almost nothing architecturally — the
|
|
5
|
+
recorder folds an abstract input/CDP stream into ``data.shadow.*`` events on the
|
|
6
|
+
same append-only log everything else uses. Four disciplines are load-bearing:
|
|
7
|
+
|
|
8
|
+
* **Redaction is DEFAULT-ON and runs BEFORE append** (``redaction``): secrets —
|
|
9
|
+
passwords, ``Authorization``/``Cookie`` headers, tokens/API keys, configured PII
|
|
10
|
+
— never reach :meth:`EventSink.append`. The "why" intent text is redacted too.
|
|
11
|
+
* **Capture is SEMANTIC** (``capture``): a user action is named by its target's
|
|
12
|
+
``{role, name, label}`` (the core ``surface`` currency, shared with §4 handles /
|
|
13
|
+
§5 SurfaceView) — never a CSS selector or pixel coordinate.
|
|
14
|
+
* **The synthesizer is itself a Zu agent** (``synthesizer``): driven by a
|
|
15
|
+
``ModelProvider`` (offline-tested with ``ScriptedProvider``), it PROPOSES an
|
|
16
|
+
agent spec + an induced ``Fsm`` + ``Invariant``s; the egress allowlist writes
|
|
17
|
+
itself from the recorded ``network.response`` hosts.
|
|
18
|
+
* **Promotion is GATED by reproduced outcome** (``replay_gate``): a synthesized
|
|
19
|
+
agent does not run on real data until it reproduces the recorded outcome, reusing
|
|
20
|
+
zu-cli's ``offline.py``/``build.py``. The "why" resolutions are reviewed, never
|
|
21
|
+
auto-promoted.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from .capture import SemanticTarget, capture_click, capture_navigate, capture_type
|
|
27
|
+
from .recorder import RecordedSession, Recorder
|
|
28
|
+
from .redaction import RedactionPolicy, redact_event, redact_text
|
|
29
|
+
from .replay_gate import PromotionVerdict, verify_and_gate
|
|
30
|
+
from .synthesizer import SynthesisResult, Synthesizer
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"PromotionVerdict",
|
|
34
|
+
"RecordedSession",
|
|
35
|
+
"Recorder",
|
|
36
|
+
"RedactionPolicy",
|
|
37
|
+
"SemanticTarget",
|
|
38
|
+
"SynthesisResult",
|
|
39
|
+
"Synthesizer",
|
|
40
|
+
"capture_click",
|
|
41
|
+
"capture_navigate",
|
|
42
|
+
"capture_type",
|
|
43
|
+
"redact_event",
|
|
44
|
+
"redact_text",
|
|
45
|
+
"verify_and_gate",
|
|
46
|
+
]
|
zu_shadow/capture.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""SEMANTIC-TARGET capture — name an action by WHAT it acts on, not WHERE.
|
|
2
|
+
|
|
3
|
+
Every captured user action identifies its target by ``{role, name, label}`` — the
|
|
4
|
+
same accessibility-grounded currency the core ``surface`` types speak (§4 handles /
|
|
5
|
+
§5 ``SurfaceView``). NEVER a CSS selector, an XPath, or a pixel coordinate: those
|
|
6
|
+
are brittle (a redesign breaks them) and untransferable (they cannot feed the §4
|
|
7
|
+
locator / §5 recognizer). A semantic target re-resolves on a changed page, which is
|
|
8
|
+
the whole reason a synthesized agent can be *resilient* rather than pixel-frozen.
|
|
9
|
+
|
|
10
|
+
``SemanticTarget`` is a thin, frozen value object that reuses ``role``/``label``
|
|
11
|
+
exactly as :class:`zu_core.surface.SurfaceAffordance` does, plus the accessible
|
|
12
|
+
``name`` (the click target's accessible name). The capture helpers turn a raw
|
|
13
|
+
abstract-stream event into a redaction-ready ``data.shadow.*`` payload.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
from zu_core import events as ev
|
|
21
|
+
from zu_core.surface import SurfaceAffordance
|
|
22
|
+
|
|
23
|
+
# Target role/name/label tokens that mark an input as a CREDENTIAL field, so the
|
|
24
|
+
# recorder records its typed value under a credential-named key the redaction stage
|
|
25
|
+
# blanks wholesale — a password is never recorded verbatim, even pre-redaction-sweep.
|
|
26
|
+
_CREDENTIAL_TARGET_HINTS: tuple[str, ...] = ("password", "passwd", "secret", "token",
|
|
27
|
+
"api key", "api_key", "apikey", "otp",
|
|
28
|
+
"cvv", "cvc", "pin", "security code",
|
|
29
|
+
# payment-card secrets — the agent must NEVER hold
|
|
30
|
+
# these; a real payment goes through the §8 broker.
|
|
31
|
+
"card number", "cardnumber", "card no",
|
|
32
|
+
"credit card", "debit card", "expiration", "expiry",
|
|
33
|
+
"iban", "sort code", "account number")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SemanticTarget(BaseModel):
|
|
37
|
+
"""A user-action target, identified the way the core surface currency does:
|
|
38
|
+
``role`` (a free string, e.g. ``button``/``link``/``textbox``), the accessible
|
|
39
|
+
``name``, and a human ``label``. NO selector, NO coordinates — re-resolvable on
|
|
40
|
+
a changed page. Frozen so it is a stable value on the log."""
|
|
41
|
+
|
|
42
|
+
model_config = {"frozen": True}
|
|
43
|
+
|
|
44
|
+
role: str
|
|
45
|
+
name: str = ""
|
|
46
|
+
label: str = ""
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_affordance(cls, a: SurfaceAffordance, *, name: str = "") -> SemanticTarget:
|
|
50
|
+
"""Build a target from a core ``SurfaceAffordance`` — the bridge from a §5
|
|
51
|
+
SurfaceView the live recorder reduced to a recorded action target. The
|
|
52
|
+
affordance's ``label`` carries through; ``name`` is the accessible name the
|
|
53
|
+
CDP locate step resolved (the affordance has no separate name field)."""
|
|
54
|
+
return cls(role=a.role, name=name or a.label, label=a.label)
|
|
55
|
+
|
|
56
|
+
def to_payload(self) -> dict:
|
|
57
|
+
return {"role": self.role, "name": self.name, "label": self.label}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def capture_click(target: SemanticTarget, *, intent: str | None = None) -> tuple[str, dict]:
|
|
61
|
+
"""A ``data.shadow.user.click`` (type, payload). ``intent`` is the OPTIONAL,
|
|
62
|
+
reviewed "why" narration — carried but NEVER auto-promoted into the agent."""
|
|
63
|
+
payload: dict = {"target": target.to_payload()}
|
|
64
|
+
if intent is not None:
|
|
65
|
+
payload["intent"] = intent
|
|
66
|
+
return ev.SHADOW_USER_CLICK, payload
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _is_credential_target(target: SemanticTarget) -> bool:
|
|
70
|
+
"""A type target whose role/name/label marks it as a credential input — so its
|
|
71
|
+
value is recorded under a credential-named key the redaction stage blanks."""
|
|
72
|
+
blob = f"{target.role} {target.name} {target.label}".lower()
|
|
73
|
+
return any(h in blob for h in _CREDENTIAL_TARGET_HINTS)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def capture_type(target: SemanticTarget, value: str, *,
|
|
77
|
+
intent: str | None = None) -> tuple[str, dict]:
|
|
78
|
+
"""A ``data.shadow.user.type`` (type, payload). The recorder MARKS a credential
|
|
79
|
+
target: a password/secret field's value goes under a ``password`` key that the
|
|
80
|
+
redaction stage (run before append) blanks wholesale, so a credential is never
|
|
81
|
+
recorded verbatim. A non-credential value rides under ``value`` and is still
|
|
82
|
+
swept for token shapes by redaction. Capture marks; redaction enforces the floor."""
|
|
83
|
+
payload: dict = {"target": target.to_payload()}
|
|
84
|
+
if _is_credential_target(target):
|
|
85
|
+
payload["password"] = value # credential-named ⇒ redaction blanks it wholesale
|
|
86
|
+
else:
|
|
87
|
+
payload["value"] = value
|
|
88
|
+
if intent is not None:
|
|
89
|
+
payload["intent"] = intent
|
|
90
|
+
return ev.SHADOW_USER_TYPE, payload
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def capture_navigate(url: str, *, intent: str | None = None) -> tuple[str, dict]:
|
|
94
|
+
"""A ``data.shadow.user.navigate`` (type, payload). The URL is redaction-swept
|
|
95
|
+
(credentials/tokens in the query stripped) before it reaches the log."""
|
|
96
|
+
payload: dict = {"url": url}
|
|
97
|
+
if intent is not None:
|
|
98
|
+
payload["intent"] = intent
|
|
99
|
+
return ev.SHADOW_USER_NAVIGATE, payload
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def capture_page_loaded(url: str, title: str) -> tuple[str, dict]:
|
|
103
|
+
"""A ``data.shadow.page.loaded`` (type, payload) — a settled page; the locus a
|
|
104
|
+
subsequent action's semantic target re-resolves against."""
|
|
105
|
+
return ev.SHADOW_PAGE_LOADED, {"url": url, "title": title}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def capture_network_response(url: str, status: int, host: str) -> tuple[str, dict]:
|
|
109
|
+
"""A ``data.shadow.network.response`` (type, payload) — METADATA only (no body,
|
|
110
|
+
no headers beyond the host). The synthesized agent's egress allowlist is induced
|
|
111
|
+
from the ``host`` values across these events."""
|
|
112
|
+
return ev.SHADOW_NETWORK_RESPONSE, {"url": url, "status": status, "host": host}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def capture_scroll(direction: str, y: int = 0) -> tuple[str, dict]:
|
|
116
|
+
"""A ``data.shadow.user.scroll`` (type, payload) — a settled scroll up/down. Context,
|
|
117
|
+
not an action step: it records that the human had to scroll to reach the next thing."""
|
|
118
|
+
d = direction if direction in ("up", "down") else "down"
|
|
119
|
+
return ev.SHADOW_USER_SCROLL, {"direction": d, "y": int(y)}
|
zu_shadow/executor.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""The live executor — the agent USES a Shadow recording to do the task itself, and
|
|
2
|
+
GENERALISES it.
|
|
3
|
+
|
|
4
|
+
Record the task once (buy a muzzle); Shadow synthesises the path; this executor then
|
|
5
|
+
RE-RUNS it on the live site — and the next run can vary it (search "collars" instead of
|
|
6
|
+
"muzzles"). It is the §1.5 division made concrete: the recording bounds the action space
|
|
7
|
+
(the demonstrated procedure + semantic anchors), and where the live page diverges from
|
|
8
|
+
the demonstration the MODEL proposes within the bounded affordance set while the harness
|
|
9
|
+
disposes. Three resolution modes per step:
|
|
10
|
+
|
|
11
|
+
* EXACT — the demonstrated target still exists (a fixed-flow control like "Add to
|
|
12
|
+
cart" / "Check out") → re-resolve it by role+name and act.
|
|
13
|
+
* PARAM — a typed value is overridden ("muzzles" → "collars"; the customer's own
|
|
14
|
+
name/address) → type the override into the field.
|
|
15
|
+
* MODEL — the demonstrated specific target is GONE (you searched collars, so the
|
|
16
|
+
muzzle product link isn't there) → the model picks the best handle from the
|
|
17
|
+
CURRENT affordances (it emits a handle, never a selector), generalising.
|
|
18
|
+
|
|
19
|
+
The COMMIT BOUNDARY (a payment / place-order step) is never auto-crossed: the executor
|
|
20
|
+
escalates before it (a real payment is a §8 brokered capability, never the captured card).
|
|
21
|
+
The browser is an injected ``BrowserSession`` — a fake drives it at $0 in tests; the live
|
|
22
|
+
Playwright binding drives real Chrome.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import re
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from typing import Any, Protocol, runtime_checkable
|
|
30
|
+
|
|
31
|
+
from zu_core import events as ev
|
|
32
|
+
from zu_core.ports import ModelProvider, ModelRequest
|
|
33
|
+
from zu_core.surface import SurfaceView
|
|
34
|
+
|
|
35
|
+
from .redaction import REDACTED
|
|
36
|
+
|
|
37
|
+
_CLICKABLE = frozenset({"button", "link", "checkbox", "radio", "switch", "tab",
|
|
38
|
+
"menuitem", "option", "row", "gridcell"})
|
|
39
|
+
_FIELDS = frozenset({"textbox", "searchbox", "combobox"})
|
|
40
|
+
# Steps whose name names an irreversible money/commit action — never auto-crossed.
|
|
41
|
+
_COMMIT = re.compile(r"(?i)\b(place order|pay now|pay$|buy now|complete (order|purchase|"
|
|
42
|
+
r"payment)|confirm (and )?pay|submit order|checkout & pay)\b")
|
|
43
|
+
# A payment-card field — the agent must NEVER type a card; a real payment is a §8 brokered
|
|
44
|
+
# capability. A redacted secret value means the same: the agent doesn't hold the secret.
|
|
45
|
+
_PAYMENT_FIELD = re.compile(r"(?i)\b(card number|cardnumber|card no|credit card|debit card|"
|
|
46
|
+
r"expiration|expiry|cvv|cvc|security code|iban|sort code|"
|
|
47
|
+
r"account number)\b")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class Step:
|
|
52
|
+
"""One step of the demonstrated path: what to do, on what (by role+name), the value
|
|
53
|
+
typed, the human's why, and whether it crosses the commit boundary."""
|
|
54
|
+
kind: str # "click" | "type" | "navigate"
|
|
55
|
+
role: str = ""
|
|
56
|
+
name: str = ""
|
|
57
|
+
value: str | None = None
|
|
58
|
+
intent: str | None = None
|
|
59
|
+
committing: bool = False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class StepOutcome:
|
|
64
|
+
step: Step
|
|
65
|
+
via: str # "exact" | "param" | "model" | "navigate" | "escalated" | "unresolved"
|
|
66
|
+
handle: str | None = None
|
|
67
|
+
value: str | None = None
|
|
68
|
+
ok: bool = True
|
|
69
|
+
detail: str = ""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class RunReport:
|
|
74
|
+
outcomes: list[StepOutcome] = field(default_factory=list)
|
|
75
|
+
completed: bool = False
|
|
76
|
+
escalated_at: int | None = None
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def acted(self) -> list[StepOutcome]:
|
|
80
|
+
return [o for o in self.outcomes if o.handle is not None]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@runtime_checkable
|
|
84
|
+
class BrowserSession(Protocol):
|
|
85
|
+
"""The live browser the executor drives. The fake test double and the live Playwright
|
|
86
|
+
binding both satisfy this. ``perceive`` returns the CURRENT page's affordances (the
|
|
87
|
+
Action Surface); ``act`` operates one by its opaque handle (never a selector)."""
|
|
88
|
+
|
|
89
|
+
def perceive(self) -> SurfaceView: ...
|
|
90
|
+
def act(self, handle: str, kind: str, value: str | None = None) -> None: ...
|
|
91
|
+
def current_url(self) -> str: ...
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _norm(s: str | None) -> str:
|
|
95
|
+
return re.sub(r"\s+", " ", (s or "")).strip().lower()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def steps_from_recording(events: list[Any]) -> list[Step]:
|
|
99
|
+
"""Turn a recording's events into the executable path: clicks/types/navigates, with the
|
|
100
|
+
same cleanup the synthesizer applies (drop a focus-click before a type on the same
|
|
101
|
+
target; collapse a consecutive duplicate), and the commit boundary marked."""
|
|
102
|
+
raw: list[Step] = []
|
|
103
|
+
for e in events:
|
|
104
|
+
t = getattr(e, "type", "")
|
|
105
|
+
p = getattr(e, "payload", {}) or {}
|
|
106
|
+
if t == ev.SHADOW_USER_NAVIGATE:
|
|
107
|
+
raw.append(Step(kind="navigate", value=p.get("url", "")))
|
|
108
|
+
continue
|
|
109
|
+
if t not in (ev.SHADOW_USER_CLICK, ev.SHADOW_USER_TYPE):
|
|
110
|
+
continue
|
|
111
|
+
tgt = p.get("target", {}) or {}
|
|
112
|
+
kind = "click" if t == ev.SHADOW_USER_CLICK else "type"
|
|
113
|
+
name = tgt.get("name") or tgt.get("label") or ""
|
|
114
|
+
value = p.get("value")
|
|
115
|
+
if value is None:
|
|
116
|
+
value = p.get("password") # a credential field's (redacted) value lives under this key
|
|
117
|
+
committing = (
|
|
118
|
+
(kind == "click" and bool(_COMMIT.search(name))) # an irreversible order/pay click
|
|
119
|
+
or value == REDACTED # a step needing a secret the agent lacks
|
|
120
|
+
or bool(_PAYMENT_FIELD.search(name)) # a payment-card field — brokered (§8)
|
|
121
|
+
)
|
|
122
|
+
raw.append(Step(kind=kind, role=tgt.get("role", ""), name=name,
|
|
123
|
+
value=value, intent=p.get("intent"), committing=committing))
|
|
124
|
+
# R2: drop a focus-click immediately followed by a type on the same target. R1: collapse
|
|
125
|
+
# a consecutive duplicate. (The whys live on the events and are reviewed separately.)
|
|
126
|
+
out: list[Step] = []
|
|
127
|
+
for i, s in enumerate(raw):
|
|
128
|
+
if s.kind == "click" and i + 1 < len(raw):
|
|
129
|
+
nxt = raw[i + 1]
|
|
130
|
+
if nxt.kind == "type" and _norm(nxt.name) == _norm(s.name):
|
|
131
|
+
continue
|
|
132
|
+
if out and out[-1].kind == s.kind and _norm(out[-1].name) == _norm(s.name) \
|
|
133
|
+
and out[-1].value == s.value:
|
|
134
|
+
continue
|
|
135
|
+
out.append(s)
|
|
136
|
+
return out
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _match(surface: SurfaceView, role: str, name: str) -> str | None:
|
|
140
|
+
"""Re-resolve the demonstrated target on the CURRENT page by role+name: an exact label
|
|
141
|
+
match first, then a contained match (robust to small label drift)."""
|
|
142
|
+
nm = _norm(name)
|
|
143
|
+
if not nm:
|
|
144
|
+
return None
|
|
145
|
+
for a in surface.affordances:
|
|
146
|
+
if _norm(a.label) == nm:
|
|
147
|
+
return a.handle
|
|
148
|
+
for a in surface.affordances:
|
|
149
|
+
al = _norm(a.label)
|
|
150
|
+
if al and (nm in al or al in nm) and (not role or a.role == role or
|
|
151
|
+
(role in _FIELDS and a.role in _FIELDS)):
|
|
152
|
+
return a.handle
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _first_field(surface: SurfaceView) -> str | None:
|
|
157
|
+
for a in surface.affordances:
|
|
158
|
+
if a.role in _FIELDS:
|
|
159
|
+
return a.handle
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# A control that dismisses a blocking overlay (cookie/consent banner, popup) that the
|
|
164
|
+
# demonstration didn't include — generic verbs only, anchored so it matches a dismiss button,
|
|
165
|
+
# not "Accept terms" text. Accepting/closing a banner is reversible; it just unblocks the step.
|
|
166
|
+
_DISMISS = re.compile(r"(?i)^(accept( all)?( cookies)?|agree|i agree|allow( all)?|got it|"
|
|
167
|
+
r"ok(ay)?|continue|close|dismiss|no thanks|reject( all)?|"
|
|
168
|
+
r"accept all cookies)$")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _interstitial(surface: SurfaceView) -> str | None:
|
|
172
|
+
"""A dismiss control for a cookie/consent/popup overlay blocking the step — so the run
|
|
173
|
+
isn't derailed by an interstitial that wasn't in the recording."""
|
|
174
|
+
for a in surface.affordances:
|
|
175
|
+
if a.role in ("button", "link") and _DISMISS.match(_norm(a.label)):
|
|
176
|
+
return a.handle
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _resolve_exact(step: Step, surface: SurfaceView,
|
|
181
|
+
ov: dict[str, str]) -> tuple[str | None, str, str | None]:
|
|
182
|
+
"""Resolve a step WITHOUT the model: EXACT re-resolve (the demonstrated target by
|
|
183
|
+
role+name) or PARAM (type an override into a field). The model-choice generalisation is
|
|
184
|
+
the LAST resort, tried only after exact retries fail — so a lazy-loading or banner-blocked
|
|
185
|
+
page is retried for the real control instead of the model grabbing a wrong one."""
|
|
186
|
+
value = ov.get(_norm(step.name), step.value) if step.kind == "type" else None
|
|
187
|
+
handle = _match(surface, step.role, step.name)
|
|
188
|
+
if handle is not None:
|
|
189
|
+
return handle, "exact", value
|
|
190
|
+
if step.kind == "type":
|
|
191
|
+
f = _first_field(surface)
|
|
192
|
+
if f is not None:
|
|
193
|
+
return f, "param", value
|
|
194
|
+
return None, "", value
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
async def _model_choose(step: Step, surface: SurfaceView, model: ModelProvider) -> str | None:
|
|
198
|
+
"""GENERALISE: the demonstrated control is gone, so the model picks the handle that best
|
|
199
|
+
continues the task — bounded to the CURRENT affordances (it emits a handle, never a
|
|
200
|
+
selector). A reply that names no real handle resolves to None → escalate, never guess."""
|
|
201
|
+
clickable = [a for a in surface.affordances if a.role in _CLICKABLE]
|
|
202
|
+
if not clickable:
|
|
203
|
+
return None
|
|
204
|
+
listing = "\n".join(f'{a.handle}: {a.role} "{a.label}"' for a in clickable)
|
|
205
|
+
goal = step.intent or f"{step.kind} {step.name}".strip()
|
|
206
|
+
req = ModelRequest(messages=[
|
|
207
|
+
{"role": "system", "content": "You drive a web agent following a known task on a live "
|
|
208
|
+
"site. The demonstrated control is not on this page. Pick the SINGLE affordance handle "
|
|
209
|
+
"that best continues the task. Reply with ONLY the handle (e.g. a3)."},
|
|
210
|
+
{"role": "user", "content": f"Step to continue: {goal}\nAffordances:\n{listing}\n\nHandle:"},
|
|
211
|
+
])
|
|
212
|
+
resp = await model.complete(req)
|
|
213
|
+
handles = {a.handle for a in clickable}
|
|
214
|
+
for tok in re.findall(r"[A-Za-z]+\w*", resp.text or ""):
|
|
215
|
+
if tok in handles:
|
|
216
|
+
return tok
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
async def execute(
|
|
221
|
+
steps: list[Step],
|
|
222
|
+
session: BrowserSession,
|
|
223
|
+
model: ModelProvider,
|
|
224
|
+
*,
|
|
225
|
+
overrides: dict[str, str] | None = None,
|
|
226
|
+
on_commit: str = "escalate",
|
|
227
|
+
max_retries: int = 2,
|
|
228
|
+
) -> RunReport:
|
|
229
|
+
"""Drive the demonstrated path on the live ``session``, generalising via ``overrides``
|
|
230
|
+
(a typed value keyed by the step's name, e.g. {"search": "collars"}) and the model for
|
|
231
|
+
unmatched controls. When a target isn't found, dismiss a blocking interstitial (cookie /
|
|
232
|
+
consent / popup) and RE-PERCEIVE before escalating — so a banner that wasn't in the
|
|
233
|
+
recording, or content still loading, doesn't derail the run. Stops at the commit boundary."""
|
|
234
|
+
ov = {_norm(k): v for k, v in (overrides or {}).items()}
|
|
235
|
+
report = RunReport()
|
|
236
|
+
for i, step in enumerate(steps):
|
|
237
|
+
if step.kind == "navigate":
|
|
238
|
+
report.outcomes.append(StepOutcome(step, "navigate")) # a consequence of the prior act
|
|
239
|
+
continue
|
|
240
|
+
if step.committing and on_commit == "escalate":
|
|
241
|
+
report.outcomes.append(StepOutcome(step, "escalated", ok=False,
|
|
242
|
+
detail="commit boundary — route to a human / the broker"))
|
|
243
|
+
report.escalated_at = i
|
|
244
|
+
return report
|
|
245
|
+
|
|
246
|
+
surface = session.perceive()
|
|
247
|
+
handle, via, value = _resolve_exact(step, surface, ov)
|
|
248
|
+
tries = 0
|
|
249
|
+
while handle is None and tries < max_retries:
|
|
250
|
+
inter = _interstitial(surface)
|
|
251
|
+
if inter is not None: # dismiss a cookie/consent/popup that wasn't demonstrated
|
|
252
|
+
session.act(inter, "click", None)
|
|
253
|
+
report.outcomes.append(StepOutcome(
|
|
254
|
+
Step(kind="click", role="button", name="(dismiss interstitial)"),
|
|
255
|
+
"interstitial", handle=inter))
|
|
256
|
+
surface = session.perceive() # re-perceive: the banner is gone / content settled
|
|
257
|
+
handle, via, value = _resolve_exact(step, surface, ov)
|
|
258
|
+
tries += 1
|
|
259
|
+
|
|
260
|
+
if handle is None and step.kind == "click": # GENERALISE only after exact retries fail
|
|
261
|
+
handle, via = await _model_choose(step, surface, model), "model"
|
|
262
|
+
|
|
263
|
+
if handle is None:
|
|
264
|
+
report.outcomes.append(StepOutcome(step, "unresolved", ok=False,
|
|
265
|
+
detail="no resolvable target — escalate"))
|
|
266
|
+
report.escalated_at = i
|
|
267
|
+
return report
|
|
268
|
+
|
|
269
|
+
session.act(handle, step.kind, value)
|
|
270
|
+
report.outcomes.append(StepOutcome(step, via, handle=handle, value=value))
|
|
271
|
+
|
|
272
|
+
report.completed = True
|
|
273
|
+
return report
|
zu_shadow/live.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""The LIVE recorder binding — real Chromium + a real human, over CDP.
|
|
2
|
+
|
|
3
|
+
This is the demo/manual half of Shadow: it drives a real browser and watches a real
|
|
4
|
+
human do the task, translating CDP events into the SAME abstract ``RawInput`` items
|
|
5
|
+
the offline recorder consumes. Because the live binding produces the identical
|
|
6
|
+
stream the synthetic tests do, the offline core (recorder → redaction → synthesizer
|
|
7
|
+
→ replay gate) is exercised exactly as it is live — nothing about the live path is
|
|
8
|
+
special-cased downstream.
|
|
9
|
+
|
|
10
|
+
It is NOT unit-tested offline (it needs a real Chromium + a human), so it sits
|
|
11
|
+
behind the ``live`` extra and this manual entrypoint, guarded so importing it
|
|
12
|
+
without the browser tools fails with an actionable message rather than at runtime.
|
|
13
|
+
The accessibility tree (CDP ``Accessibility.getFullAXTree`` / the §4 locate op) is
|
|
14
|
+
what makes capture SEMANTIC: each interacted node is resolved to its
|
|
15
|
+
``{role, name, label}`` — never a selector or coordinate — before it becomes a
|
|
16
|
+
``RawInput``.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from collections.abc import AsyncIterator
|
|
22
|
+
|
|
23
|
+
from zu_core.bus import EventBus
|
|
24
|
+
|
|
25
|
+
from .capture import SemanticTarget
|
|
26
|
+
from .recorder import RawInput, RecordedSession, Recorder
|
|
27
|
+
from .redaction import RedactionPolicy
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _require_browser() -> None:
|
|
31
|
+
try:
|
|
32
|
+
import zu_tools.browser # noqa: F401
|
|
33
|
+
except ModuleNotFoundError as exc: # pragma: no cover - live-only path
|
|
34
|
+
raise RuntimeError(
|
|
35
|
+
"the live Shadow recorder needs the browser tools: pip install 'zu-shadow[live]'. "
|
|
36
|
+
"The offline core (synthetic stream → recorder → synthesizer → gate) needs none."
|
|
37
|
+
) from exc
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def ax_node_to_target(node: dict) -> SemanticTarget:
|
|
41
|
+
"""Resolve one CDP accessibility node to a SEMANTIC target — its role, accessible
|
|
42
|
+
name, and label. The single place selectors/coordinates are deliberately DROPPED
|
|
43
|
+
(a live click's pixel position never becomes part of the record)."""
|
|
44
|
+
name = str(node.get("name", "") or "")
|
|
45
|
+
return SemanticTarget(
|
|
46
|
+
role=str(node.get("role", "") or "generic"),
|
|
47
|
+
name=name,
|
|
48
|
+
label=str(node.get("label", "") or name),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def record_live( # pragma: no cover - live-only, manual entrypoint
|
|
53
|
+
cdp_events: AsyncIterator[dict],
|
|
54
|
+
*,
|
|
55
|
+
site: str,
|
|
56
|
+
bus: EventBus | None = None,
|
|
57
|
+
policy: RedactionPolicy | None = None,
|
|
58
|
+
) -> RecordedSession:
|
|
59
|
+
"""Drive a live recording from a stream of CDP events. Translates each CDP event
|
|
60
|
+
into a ``RawInput`` (resolving interacted nodes to semantic targets via the AX
|
|
61
|
+
tree) and folds it through the SAME :class:`Recorder` the offline path uses, so
|
|
62
|
+
redaction-before-append holds identically on a live session.
|
|
63
|
+
|
|
64
|
+
Manual entrypoint: requires the browser tools and a real CDP source. Wire a real
|
|
65
|
+
Chromium target's CDP feed as ``cdp_events`` (e.g. via ``zu_tools.browser``).
|
|
66
|
+
"""
|
|
67
|
+
_require_browser()
|
|
68
|
+
bus = bus or EventBus()
|
|
69
|
+
recorder = Recorder(bus, site=site, policy=policy)
|
|
70
|
+
await recorder.start()
|
|
71
|
+
async for cdp in cdp_events:
|
|
72
|
+
item = _cdp_to_raw(cdp)
|
|
73
|
+
if item is not None:
|
|
74
|
+
await recorder.record(item)
|
|
75
|
+
await recorder.end()
|
|
76
|
+
events = await bus.query()
|
|
77
|
+
return RecordedSession(site=site, events=list(events))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _cdp_to_raw(cdp: dict) -> RawInput | None:
|
|
81
|
+
"""Map one CDP event to a ``RawInput`` (or None to skip). Mirrors the abstract
|
|
82
|
+
kinds the offline stream uses; the live CDP method names are translated here so
|
|
83
|
+
the rest of Shadow never sees CDP."""
|
|
84
|
+
method = cdp.get("method", "")
|
|
85
|
+
params = cdp.get("params", {}) or {}
|
|
86
|
+
if method == "Input.dispatchMouseEvent" and params.get("type") == "mousePressed":
|
|
87
|
+
node = params.get("ax_node", {})
|
|
88
|
+
return RawInput(kind="click", target=ax_node_to_target(node),
|
|
89
|
+
intent=params.get("intent"))
|
|
90
|
+
if method == "Input.insertText":
|
|
91
|
+
node = params.get("ax_node", {})
|
|
92
|
+
return RawInput(kind="type", target=ax_node_to_target(node),
|
|
93
|
+
value=params.get("text", ""), intent=params.get("intent"))
|
|
94
|
+
if method == "Page.navigate":
|
|
95
|
+
return RawInput(kind="navigate", url=params.get("url", ""),
|
|
96
|
+
intent=params.get("intent"))
|
|
97
|
+
if method == "Page.frameStoppedLoading" or method == "Page.loadEventFired":
|
|
98
|
+
return RawInput(kind="page", url=params.get("url", ""), title=params.get("title", ""))
|
|
99
|
+
if method == "Network.responseReceived":
|
|
100
|
+
resp = params.get("response", {}) or {}
|
|
101
|
+
url = resp.get("url", "")
|
|
102
|
+
from urllib.parse import urlsplit
|
|
103
|
+
|
|
104
|
+
return RawInput(kind="network", url=url, status=int(resp.get("status", 0)),
|
|
105
|
+
host=urlsplit(url).hostname or "")
|
|
106
|
+
return None
|