zu-patterns 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_patterns/__init__.py +53 -0
- zu_patterns/_match.py +80 -0
- zu_patterns/autocomplete.py +62 -0
- zu_patterns/cart_checkout.py +102 -0
- zu_patterns/cookie_banner.py +71 -0
- zu_patterns/login_form.py +87 -0
- zu_patterns/modal_dialog.py +64 -0
- zu_patterns/paginated_list.py +52 -0
- zu_patterns/rail.py +86 -0
- zu_patterns/recognizer.py +80 -0
- zu_patterns/reversibility.py +178 -0
- zu_patterns/search.py +778 -0
- zu_patterns/search_box.py +59 -0
- zu_patterns/sortable_table.py +59 -0
- zu_patterns-0.2.2.dist-info/METADATA +69 -0
- zu_patterns-0.2.2.dist-info/RECORD +18 -0
- zu_patterns-0.2.2.dist-info/WHEEL +4 -0
- zu_patterns-0.2.2.dist-info/entry_points.txt +9 -0
zu_patterns/search.py
ADDED
|
@@ -0,0 +1,778 @@
|
|
|
1
|
+
"""Offline guided search — best-first planning OVER the Phase-1 induced FSM.
|
|
2
|
+
|
|
3
|
+
This is the planner half of the §5 stack (the policy prior is the recognizer).
|
|
4
|
+
It runs over ``zu_core.reachability.Fsm`` (REUSE — there is no second FSM type)
|
|
5
|
+
with the pattern recognizer as the move-ordering prior, the same AlphaZero shape
|
|
6
|
+
the doc describes: explore the residual the pattern does not resolve, guided by
|
|
7
|
+
``co_reachable`` (a cheap value estimate) and pruned of ``trap_states``.
|
|
8
|
+
|
|
9
|
+
Two pieces:
|
|
10
|
+
* ``fsm_from_events`` — an EMPIRICAL transition-model builder: fold the event
|
|
11
|
+
log's surface→action→surface triples into FSM edges (the documented future
|
|
12
|
+
``fsm_from_track`` helper, sourced from the event log NOW; Shadow recordings
|
|
13
|
+
EXTEND this later — DEFERRED, see below).
|
|
14
|
+
* ``plan`` — best-first search over the FSM, ordered by
|
|
15
|
+
``f = co-reachability + prior``, pruning edges into traps, and FLAGGING which
|
|
16
|
+
edges cross a committing boundary (so the deferred live executor knows where
|
|
17
|
+
lookahead must stop). Offline the whole learned graph is explorable; the plan
|
|
18
|
+
never auto-crosses a COMMITTING edge in the live seam.
|
|
19
|
+
|
|
20
|
+
Now also:
|
|
21
|
+
* ``live_mpc_step`` / ``mpc_run`` — the LIVE guided-MPC loop (§5.2, the
|
|
22
|
+
AlphaZero shape): the model PROPOSES ≤K candidates (the recognizer is the
|
|
23
|
+
move-ordering prior), a shallow lookahead over the learned FSM DISPOSES via
|
|
24
|
+
the rail (``co_reachable``/traps), one REVERSIBLE step executes via an injected
|
|
25
|
+
executor, then re-plan — STOPPING at the commit boundary (default-to-committing).
|
|
26
|
+
* ``fsm_from_shadow`` / ``merge_transition_models`` — the Shadow-sourced
|
|
27
|
+
transition model: fold a recording's induced FSM / shadow events into the SAME
|
|
28
|
+
search model; accumulating recordings GROWS the graph.
|
|
29
|
+
|
|
30
|
+
Pure, offline, $0 — the executor is the only I/O and it is injected (a fake in
|
|
31
|
+
tests, a real browser in production).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import hashlib
|
|
37
|
+
from collections.abc import Awaitable, Callable, Sequence
|
|
38
|
+
from dataclasses import dataclass, field
|
|
39
|
+
from typing import Any
|
|
40
|
+
|
|
41
|
+
from zu_core import events as ev
|
|
42
|
+
from zu_core.ports import ModelProvider, ModelRequest, RecognitionResult
|
|
43
|
+
from zu_core.reachability import Fsm, FsmEdge, co_reachable, trap_states
|
|
44
|
+
from zu_core.surface import SurfaceAffordance, SurfaceView
|
|
45
|
+
|
|
46
|
+
from .recognizer import recognize
|
|
47
|
+
from .reversibility import Commitment, classify_action
|
|
48
|
+
|
|
49
|
+
# --- (A) the empirical transition-model builder ---------------------------
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _payload(e: Any) -> dict:
|
|
53
|
+
p = getattr(e, "payload", None)
|
|
54
|
+
return p if isinstance(p, dict) else {}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def surface_state_id(payload: dict) -> str:
|
|
58
|
+
"""A stable digest of the surface the agent was on — the FSM state id.
|
|
59
|
+
|
|
60
|
+
Heuristic (documented): prefer ``url`` + ``title`` when present (a web locus);
|
|
61
|
+
otherwise hash the sorted affordance handles (the structural fingerprint of
|
|
62
|
+
the surface). Two visits to the same page collapse to the same state; two
|
|
63
|
+
structurally different surfaces stay distinct. Shadow recordings (next phase)
|
|
64
|
+
supply richer state; this is the event-log source.
|
|
65
|
+
"""
|
|
66
|
+
url = str(payload.get("url", ""))
|
|
67
|
+
title = str(payload.get("title", ""))
|
|
68
|
+
if url or title:
|
|
69
|
+
basis = f"url={url}\x1ftitle={title}"
|
|
70
|
+
else:
|
|
71
|
+
handles = payload.get("handles") or []
|
|
72
|
+
basis = "h=" + ",".join(sorted(str(h) for h in handles))
|
|
73
|
+
return "s_" + hashlib.sha256(basis.encode()).hexdigest()[:12]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def fsm_from_events(
|
|
77
|
+
events: Sequence[Any],
|
|
78
|
+
*,
|
|
79
|
+
goal_states: frozenset[str] | None = None,
|
|
80
|
+
initial: str | None = None,
|
|
81
|
+
) -> Fsm:
|
|
82
|
+
"""Fold an event log into an induced ``Fsm`` (pure).
|
|
83
|
+
|
|
84
|
+
The log is read as a sequence of ``data.surface.captured`` snapshots
|
|
85
|
+
interleaved with ``harness.tool.invoked`` actions: each
|
|
86
|
+
surface→tool→next-surface triple becomes an edge
|
|
87
|
+
``FsmEdge(src=state_before, dst=state_after, label=action)``. The action
|
|
88
|
+
label is the tool name (plus handle when present), so the edge names the move
|
|
89
|
+
that induced the transition. Accepting states are ``goal_states`` (the caller
|
|
90
|
+
supplies them — e.g. states where a success Monitor would hold).
|
|
91
|
+
"""
|
|
92
|
+
states: list[str] = []
|
|
93
|
+
edges: list[FsmEdge] = []
|
|
94
|
+
last_state: str | None = None
|
|
95
|
+
pending_action: str | None = None
|
|
96
|
+
for e in events:
|
|
97
|
+
etype = getattr(e, "type", None)
|
|
98
|
+
if etype == ev.SURFACE_CAPTURED:
|
|
99
|
+
sid = surface_state_id(_payload(e))
|
|
100
|
+
if sid not in states:
|
|
101
|
+
states.append(sid)
|
|
102
|
+
if last_state is not None and pending_action is not None:
|
|
103
|
+
edges.append(FsmEdge(src=last_state, dst=sid, label=pending_action))
|
|
104
|
+
last_state = sid
|
|
105
|
+
pending_action = None
|
|
106
|
+
elif etype == ev.TOOL_INVOKED:
|
|
107
|
+
p = _payload(e)
|
|
108
|
+
tool = str(p.get("tool", "action"))
|
|
109
|
+
handle = p.get("handle") or (p.get("args") or {}).get("handle")
|
|
110
|
+
pending_action = f"{tool}:{handle}" if handle else tool
|
|
111
|
+
init = initial if initial is not None else (states[0] if states else "")
|
|
112
|
+
accepting = goal_states if goal_states is not None else frozenset()
|
|
113
|
+
# Ensure declared goal states are part of the state set (a goal may be named
|
|
114
|
+
# before it is observed).
|
|
115
|
+
state_set = frozenset(states) | accepting | ({init} if init else frozenset())
|
|
116
|
+
return Fsm(states=state_set, initial=init, accepting=accepting & state_set, edges=tuple(edges))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# --- (B) the best-first planner over the induced FSM ----------------------
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass(frozen=True)
|
|
123
|
+
class PlanStep:
|
|
124
|
+
"""One move in a plan: the edge taken, and whether it crosses a commit
|
|
125
|
+
boundary (the live executor must STOP — never auto-cross — a committing edge)."""
|
|
126
|
+
|
|
127
|
+
src: str
|
|
128
|
+
dst: str
|
|
129
|
+
label: str
|
|
130
|
+
committing: bool
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass(frozen=True)
|
|
134
|
+
class Plan:
|
|
135
|
+
"""A planned path from the FSM's initial state toward a goal.
|
|
136
|
+
|
|
137
|
+
``reached_goal`` says whether the path ends in an accepting state.
|
|
138
|
+
``steps`` is the ordered moves; ``crosses_commit`` flags whether any step is a
|
|
139
|
+
committing boundary (the deferred live MPC must halt before it). ``expansions``
|
|
140
|
+
records search effort for the $0 test bar.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
steps: tuple[PlanStep, ...]
|
|
144
|
+
reached_goal: bool
|
|
145
|
+
crosses_commit: bool
|
|
146
|
+
expansions: int = 0
|
|
147
|
+
detail: str | None = None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# A prior over an edge: returns a non-negative bonus (higher ⇒ explore first).
|
|
151
|
+
EdgePrior = Callable[[FsmEdge], float]
|
|
152
|
+
# A commitment classifier over an edge: REVERSIBLE | COMMITTING.
|
|
153
|
+
EdgeClassifier = Callable[[FsmEdge], Commitment]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _default_classifier(edge: FsmEdge) -> Commitment:
|
|
157
|
+
# OFFLINE-EXPLORATION-ONLY. This REVERSIBLE default is the inverse of the
|
|
158
|
+
# project-wide default-to-committing rail discipline, and that is DELIBERATE
|
|
159
|
+
# AND SAFE *because it never gates a live side-effecting action*:
|
|
160
|
+
# * ``plan()`` runs purely offline over the learned/remembered FSM. Marking
|
|
161
|
+
# an unknown edge REVERSIBLE only lets the planner LOOK PAST it; it does not
|
|
162
|
+
# execute anything. The commit boundary is FLAGGED on each ``PlanStep``
|
|
163
|
+
# (``committing``) and aggregated in ``Plan.crosses_commit`` — surfaced, not
|
|
164
|
+
# crossed.
|
|
165
|
+
# * The LIVE seam (``live_mpc_step``, deferred) does NOT trust this default:
|
|
166
|
+
# it re-classifies every candidate edge with ``reversibility.classify_action``,
|
|
167
|
+
# which DEFAULTS TO COMMITTING on uncertainty (see
|
|
168
|
+
# ``test_live_classifier_defaults_to_committing``), and STOPS at the first
|
|
169
|
+
# committing boundary. So the offline REVERSIBLE default cannot leak into a
|
|
170
|
+
# live execution decision.
|
|
171
|
+
# If that separation could ever be violated, flip this to COMMITTING.
|
|
172
|
+
return Commitment.REVERSIBLE
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _default_prior(edge: FsmEdge) -> float:
|
|
176
|
+
return 0.0
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def plan(
|
|
180
|
+
fsm: Fsm,
|
|
181
|
+
*,
|
|
182
|
+
prior: EdgePrior = _default_prior,
|
|
183
|
+
classifier: EdgeClassifier = _default_classifier,
|
|
184
|
+
max_expansions: int = 1000,
|
|
185
|
+
) -> Plan:
|
|
186
|
+
"""Best-first search from ``fsm.initial`` toward an accepting state (pure).
|
|
187
|
+
|
|
188
|
+
The frontier is ordered by ``f = co_reachability(dst) + prior(edge)``: an edge
|
|
189
|
+
whose destination can still reach the goal, preferred by the move-ordering
|
|
190
|
+
prior, is expanded first. Edges into trap states are PRUNED (reuse
|
|
191
|
+
``trap_states``). Each chosen edge is classified; a committing edge is FLAGGED
|
|
192
|
+
in the plan (offline we still record the path, but ``crosses_commit`` tells the
|
|
193
|
+
deferred live executor where lookahead must stop).
|
|
194
|
+
"""
|
|
195
|
+
co = co_reachable(fsm)
|
|
196
|
+
traps = trap_states(fsm)
|
|
197
|
+
start = fsm.initial
|
|
198
|
+
if start not in fsm.states:
|
|
199
|
+
return Plan(steps=(), reached_goal=False, crosses_commit=False, detail="no initial state")
|
|
200
|
+
|
|
201
|
+
# best-first over partial paths. A path is (steps, current_state, visited).
|
|
202
|
+
@dataclass(order=True)
|
|
203
|
+
class _Node:
|
|
204
|
+
score: float
|
|
205
|
+
seq: int # tie-breaker for determinism
|
|
206
|
+
steps: tuple[PlanStep, ...] = field(compare=False)
|
|
207
|
+
state: str = field(compare=False)
|
|
208
|
+
visited: frozenset[str] = field(compare=False)
|
|
209
|
+
|
|
210
|
+
import heapq
|
|
211
|
+
|
|
212
|
+
counter = 0
|
|
213
|
+
heap: list[_Node] = [_Node(score=0.0, seq=0, steps=(), state=start, visited=frozenset({start}))]
|
|
214
|
+
best_partial: tuple[PlanStep, ...] = ()
|
|
215
|
+
expansions = 0
|
|
216
|
+
while heap and expansions < max_expansions:
|
|
217
|
+
node = heapq.heappop(heap)
|
|
218
|
+
expansions += 1
|
|
219
|
+
if node.state in fsm.accepting:
|
|
220
|
+
return Plan(
|
|
221
|
+
steps=node.steps,
|
|
222
|
+
reached_goal=True,
|
|
223
|
+
crosses_commit=any(s.committing for s in node.steps),
|
|
224
|
+
expansions=expansions,
|
|
225
|
+
)
|
|
226
|
+
if len(node.steps) > len(best_partial):
|
|
227
|
+
best_partial = node.steps
|
|
228
|
+
# expand: outgoing edges, skipping traps and already-visited states.
|
|
229
|
+
out = sorted(
|
|
230
|
+
(e for e in fsm.edges if e.src == node.state),
|
|
231
|
+
key=lambda e: (-(float(e.dst in co) + prior(e)), e.label),
|
|
232
|
+
)
|
|
233
|
+
for e in out:
|
|
234
|
+
if e.dst in traps or e.dst in node.visited:
|
|
235
|
+
continue
|
|
236
|
+
committing = classifier(e) is Commitment.COMMITTING
|
|
237
|
+
step = PlanStep(src=e.src, dst=e.dst, label=e.label, committing=committing)
|
|
238
|
+
# f = negative so heapq (a min-heap) pops the highest-value first.
|
|
239
|
+
f = -(float(e.dst in co) + prior(e))
|
|
240
|
+
counter += 1
|
|
241
|
+
heapq.heappush(
|
|
242
|
+
heap,
|
|
243
|
+
_Node(
|
|
244
|
+
score=f,
|
|
245
|
+
seq=counter,
|
|
246
|
+
steps=node.steps + (step,),
|
|
247
|
+
state=e.dst,
|
|
248
|
+
visited=node.visited | {e.dst},
|
|
249
|
+
),
|
|
250
|
+
)
|
|
251
|
+
return Plan(
|
|
252
|
+
steps=best_partial,
|
|
253
|
+
reached_goal=False,
|
|
254
|
+
crosses_commit=any(s.committing for s in best_partial),
|
|
255
|
+
expansions=expansions,
|
|
256
|
+
detail="goal not reached within max_expansions" if heap else "frontier exhausted",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# --- (C) the LIVE guided-MPC loop (§5.2, the AlphaZero shape) --------------
|
|
261
|
+
#
|
|
262
|
+
# MODEL PROPOSES, HARNESS DISPOSES. The model proposes ≤K candidate next actions
|
|
263
|
+
# (policy-pruned branching) — the pattern recognizer supplies the move-ordering
|
|
264
|
+
# PRIOR; a shallow lookahead over the LEARNED ``Fsm`` (the remembered transition
|
|
265
|
+
# model) estimates where each candidate leads; the rail/reachability DISPOSES
|
|
266
|
+
# (co_reachable to the goal? not a trap?). A pattern's prediction is a PRIOR
|
|
267
|
+
# confirmed by the deterministic lookahead/rail, NEVER ground truth.
|
|
268
|
+
#
|
|
269
|
+
# ``live_mpc_step`` is PURE decision logic — no real I/O. The executor is injected
|
|
270
|
+
# into the driver loop (``mpc_run``), so the whole thing is offline-testable with a
|
|
271
|
+
# ScriptedProvider + a hand-built Fsm + a fake executor.
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# A proposed candidate: the action label (matching an FSM edge ``label``), the
|
|
275
|
+
# affordance handle it acts on (for the executor), and a generic interaction verb
|
|
276
|
+
# (``op``)/``role`` the commit-boundary classifier reads.
|
|
277
|
+
@dataclass(frozen=True)
|
|
278
|
+
class Candidate:
|
|
279
|
+
label: str
|
|
280
|
+
handle: str | None = None
|
|
281
|
+
op: str | None = None
|
|
282
|
+
role: str | None = None
|
|
283
|
+
http_method: str | None = None
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@dataclass(frozen=True)
|
|
287
|
+
class MpcDecision:
|
|
288
|
+
"""The result of one ``live_mpc_step``: the chosen candidate and WHY.
|
|
289
|
+
|
|
290
|
+
``action`` is the picked on-rail candidate (``None`` ⇒ no on-rail/safe move —
|
|
291
|
+
the loop escalates). ``escalate`` is set when the best candidate crosses the
|
|
292
|
+
COMMIT BOUNDARY (a side-effecting/irreversible step the live loop must NOT
|
|
293
|
+
auto-cross) or when nothing is recognized/reachable. ``committing`` says the
|
|
294
|
+
chosen/blocking candidate was classified COMMITTING. ``scored`` is the full
|
|
295
|
+
ranked list (candidate, lookahead-score) for audit — the lookahead+rail
|
|
296
|
+
DISPOSED, the model only PROPOSED.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
action: Candidate | None
|
|
300
|
+
escalate: bool
|
|
301
|
+
rationale: str
|
|
302
|
+
committing: bool = False
|
|
303
|
+
scored: tuple[tuple[Candidate, float], ...] = ()
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# The state of the current surface within the learned FSM. The caller maps a live
|
|
307
|
+
# ``SurfaceView`` to an FSM state id; offline tests pass the id directly.
|
|
308
|
+
SurfaceToState = Callable[[SurfaceView], str]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _surface_state(surface: SurfaceView) -> str:
|
|
312
|
+
"""Default surface→FSM-state mapping: the same digest ``fsm_from_events``
|
|
313
|
+
uses, so a live surface lands on the learned state when the model remembers it."""
|
|
314
|
+
payload = {
|
|
315
|
+
"url": surface.url,
|
|
316
|
+
"title": surface.title,
|
|
317
|
+
"handles": [a.handle for a in surface.affordances],
|
|
318
|
+
}
|
|
319
|
+
return surface_state_id(payload)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _prior_for_candidate(
|
|
323
|
+
cand: Candidate, recognized: RecognitionResult | None
|
|
324
|
+
) -> float:
|
|
325
|
+
"""The move-ordering PRIOR (the recognizer's confidence, biased to the handles
|
|
326
|
+
the recognized archetype bound). A recognized handle ⇒ explore-first bonus."""
|
|
327
|
+
if recognized is None:
|
|
328
|
+
return 0.0
|
|
329
|
+
bonus = recognized.confidence
|
|
330
|
+
if cand.handle is not None and cand.handle in recognized.matched_handles:
|
|
331
|
+
bonus += 1.0
|
|
332
|
+
return bonus
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _lookahead_score(fsm: Fsm, co: frozenset[str], traps: frozenset[str],
|
|
336
|
+
dst: str, depth: int) -> float:
|
|
337
|
+
"""SHALLOW lookahead over the LEARNED fsm: how good is landing on ``dst``?
|
|
338
|
+
|
|
339
|
+
The rail evaluator (``co_reachable``) is the value estimate: ``dst`` accepting
|
|
340
|
+
⇒ best; ``dst`` co-reachable (goal still reachable) ⇒ good; a trap ⇒ worst
|
|
341
|
+
(pruned). Within ``depth`` we look whether an accepting state is reachable from
|
|
342
|
+
``dst`` (a cheap bounded BFS), preferring the shorter route. Pure graph query —
|
|
343
|
+
no model, no I/O. This is what DISPOSES."""
|
|
344
|
+
if dst in traps:
|
|
345
|
+
return -1.0
|
|
346
|
+
if dst in fsm.accepting:
|
|
347
|
+
return 100.0
|
|
348
|
+
if dst not in co:
|
|
349
|
+
# not co-reachable and not accepting: a dead end for the goal.
|
|
350
|
+
return -1.0
|
|
351
|
+
# bounded BFS to the nearest accepting state within ``depth`` — closer is
|
|
352
|
+
# better (the value estimate the rail's co_reachable underwrites).
|
|
353
|
+
frontier = {dst}
|
|
354
|
+
seen = {dst}
|
|
355
|
+
for d in range(1, max(depth, 1) + 1):
|
|
356
|
+
nxt: set[str] = set()
|
|
357
|
+
for s in frontier:
|
|
358
|
+
for e in fsm.edges:
|
|
359
|
+
if e.src == s and e.dst not in seen:
|
|
360
|
+
if e.dst in fsm.accepting:
|
|
361
|
+
return 100.0 - d
|
|
362
|
+
if e.dst in co:
|
|
363
|
+
nxt.add(e.dst)
|
|
364
|
+
seen.add(e.dst)
|
|
365
|
+
frontier = nxt
|
|
366
|
+
if not frontier:
|
|
367
|
+
break
|
|
368
|
+
# co-reachable but goal is beyond the horizon: still on-rail, mild positive.
|
|
369
|
+
return 1.0
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
async def live_mpc_step(
|
|
373
|
+
surface: SurfaceView,
|
|
374
|
+
model: ModelProvider,
|
|
375
|
+
fsm: Fsm,
|
|
376
|
+
patterns: Sequence[Any] = (),
|
|
377
|
+
*,
|
|
378
|
+
k: int = 3,
|
|
379
|
+
depth: int = 2,
|
|
380
|
+
surface_to_state: SurfaceToState | None = None,
|
|
381
|
+
priors: Sequence[Any] = (),
|
|
382
|
+
min_confidence: float = 0.6,
|
|
383
|
+
) -> MpcDecision:
|
|
384
|
+
"""One guided-MPC step — MODEL PROPOSES, deterministic lookahead+rail DISPOSES.
|
|
385
|
+
|
|
386
|
+
PROPOSE: the ``ModelProvider`` proposes ≤K candidate next actions from the
|
|
387
|
+
current ``SurfaceView`` (policy-pruned branching; K small). The pattern
|
|
388
|
+
recognizer supplies the move-ordering PRIOR — recognized archetypes/handles are
|
|
389
|
+
explored first (the heuristic network).
|
|
390
|
+
|
|
391
|
+
LOOK AHEAD: each candidate maps to an FSM edge out of the current state; a
|
|
392
|
+
SHALLOW lookahead over the LEARNED fsm estimates where it leads, SCORED by the
|
|
393
|
+
rail evaluator (``co_reachable`` to the goal / not a ``trap``).
|
|
394
|
+
|
|
395
|
+
DISPOSE: pick the best-scoring on-rail candidate. A pattern's prediction is a
|
|
396
|
+
PRIOR confirmed by the lookahead/rail, never trusted as ground truth.
|
|
397
|
+
|
|
398
|
+
SAFETY — STOP AT THE COMMIT BOUNDARY: the chosen candidate is re-classified by
|
|
399
|
+
``classify_action`` (default-to-COMMITTING on uncertainty). A COMMITTING next
|
|
400
|
+
step is the live-search boundary: the step does NOT execute — the decision is
|
|
401
|
+
``escalate``. Only a REVERSIBLE/idempotent candidate is returned for execution.
|
|
402
|
+
An UNRECOGNIZED / no-on-rail-candidate surface also escalates (fall through to
|
|
403
|
+
the model / route out). Pure: no I/O beyond the injected ``model.complete``.
|
|
404
|
+
"""
|
|
405
|
+
to_state = surface_to_state or _surface_state
|
|
406
|
+
here = to_state(surface)
|
|
407
|
+
co = co_reachable(fsm)
|
|
408
|
+
traps = trap_states(fsm)
|
|
409
|
+
|
|
410
|
+
# PROPOSE — the model proposes ≤K candidates from the surface.
|
|
411
|
+
rec = recognize(surface, patterns, min_confidence=min_confidence)
|
|
412
|
+
proposals = await _propose_candidates(surface, model, rec.result, k=k)
|
|
413
|
+
if not proposals:
|
|
414
|
+
return MpcDecision(
|
|
415
|
+
action=None, escalate=True,
|
|
416
|
+
rationale="model proposed no candidates — escalate",
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
# LOOK AHEAD + score. Each candidate's label is matched to an outgoing FSM edge
|
|
420
|
+
# from the current state (the learned transition); its destination is scored by
|
|
421
|
+
# the rail. The recognizer's confidence is the move-ordering PRIOR (a tie-break
|
|
422
|
+
# / bias, NEVER overriding the deterministic lookahead).
|
|
423
|
+
edges_here = {e.label: e for e in fsm.edges if e.src == here}
|
|
424
|
+
scored: list[tuple[Candidate, float]] = []
|
|
425
|
+
for cand in proposals:
|
|
426
|
+
edge = edges_here.get(cand.label)
|
|
427
|
+
if edge is None:
|
|
428
|
+
# the learned model has no memory of this move from here: unknown
|
|
429
|
+
# transition ⇒ blind. Score it below any on-rail known move.
|
|
430
|
+
scored.append((cand, -2.0))
|
|
431
|
+
continue
|
|
432
|
+
base = _lookahead_score(fsm, co, traps, edge.dst, depth)
|
|
433
|
+
score = base + 0.001 * _prior_for_candidate(cand, rec.result)
|
|
434
|
+
scored.append((cand, score))
|
|
435
|
+
# deterministic ordering: score desc, then label for stable ties.
|
|
436
|
+
scored.sort(key=lambda cs: (-cs[1], cs[0].label))
|
|
437
|
+
scored_t = tuple(scored)
|
|
438
|
+
|
|
439
|
+
best, best_score = scored[0]
|
|
440
|
+
if best_score <= 0.0:
|
|
441
|
+
# no on-rail candidate (trap / unknown / unreachable). The deterministic
|
|
442
|
+
# lookahead+rail DISPOSED against the model's proposals — escalate rather
|
|
443
|
+
# than execute a blind/off-rail move.
|
|
444
|
+
return MpcDecision(
|
|
445
|
+
action=None, escalate=True,
|
|
446
|
+
rationale=(
|
|
447
|
+
f"no on-rail candidate from {here!r} "
|
|
448
|
+
f"(best {best.label!r} scored {best_score:.3f}) — escalate"
|
|
449
|
+
),
|
|
450
|
+
scored=scored_t,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
# DISPOSE — SAFETY: re-classify the chosen candidate at the COMMIT BOUNDARY.
|
|
454
|
+
# default-to-committing: an uncertain/side-effecting move STOPS the live loop.
|
|
455
|
+
commitment = classify_action(
|
|
456
|
+
http_method=best.http_method, role=best.role, op=best.op, priors=priors
|
|
457
|
+
)
|
|
458
|
+
if commitment is Commitment.COMMITTING:
|
|
459
|
+
return MpcDecision(
|
|
460
|
+
action=best, escalate=True, committing=True,
|
|
461
|
+
rationale=(
|
|
462
|
+
f"chosen on-rail candidate {best.label!r} is COMMITTING "
|
|
463
|
+
"(default-to-committing) — STOP at the commit boundary, escalate"
|
|
464
|
+
),
|
|
465
|
+
scored=scored_t,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
return MpcDecision(
|
|
469
|
+
action=best, escalate=False, committing=False,
|
|
470
|
+
rationale=(
|
|
471
|
+
f"chosen {best.label!r} → on-rail (score {best_score:.3f}); "
|
|
472
|
+
"REVERSIBLE — execute one step then re-plan"
|
|
473
|
+
),
|
|
474
|
+
scored=scored_t,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _aff(surface: SurfaceView, handle: str | None) -> SurfaceAffordance | None:
|
|
479
|
+
if handle is None:
|
|
480
|
+
return None
|
|
481
|
+
for a in surface.affordances:
|
|
482
|
+
if a.handle == handle:
|
|
483
|
+
return a
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
async def _propose_candidates(
|
|
488
|
+
surface: SurfaceView, model: ModelProvider,
|
|
489
|
+
recognized: RecognitionResult | None, *, k: int,
|
|
490
|
+
) -> list[Candidate]:
|
|
491
|
+
"""Ask the ModelProvider to PROPOSE ≤K candidate next actions over the surface.
|
|
492
|
+
|
|
493
|
+
The model emits tool_calls (the policy-pruned branching factor): each call's
|
|
494
|
+
``args`` carry ``{label, handle?, op?, role?, http_method?}``. A proposal's
|
|
495
|
+
``op``/``role`` default from the named affordance when the model omits them, so
|
|
496
|
+
the commit-boundary classifier always has signal. ≤K are kept (the model
|
|
497
|
+
prunes; we cap)."""
|
|
498
|
+
req = _proposal_request(surface, recognized, k=k)
|
|
499
|
+
resp = await model.complete(req)
|
|
500
|
+
out: list[Candidate] = []
|
|
501
|
+
for call in resp.tool_calls[:k]:
|
|
502
|
+
args = call.args or {}
|
|
503
|
+
label = str(args.get("label") or call.name)
|
|
504
|
+
handle = args.get("handle")
|
|
505
|
+
aff = _aff(surface, handle if isinstance(handle, str) else None)
|
|
506
|
+
op = args.get("op") or (call.name if call.name in _OP_NAMES else None)
|
|
507
|
+
role = args.get("role") or (aff.role if aff is not None else None)
|
|
508
|
+
out.append(
|
|
509
|
+
Candidate(
|
|
510
|
+
label=label,
|
|
511
|
+
handle=handle if isinstance(handle, str) else None,
|
|
512
|
+
op=op if isinstance(op, str) else None,
|
|
513
|
+
role=role if isinstance(role, str) else None,
|
|
514
|
+
http_method=(
|
|
515
|
+
str(args["http_method"]) if args.get("http_method") else None
|
|
516
|
+
),
|
|
517
|
+
)
|
|
518
|
+
)
|
|
519
|
+
return out
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
# Generic interaction verbs a tool name may itself be (so a bare ``fill``/``submit``
|
|
523
|
+
# tool call carries an op signal to the classifier without explicit args).
|
|
524
|
+
_OP_NAMES = frozenset(
|
|
525
|
+
{"fill", "read", "open", "select", "expand", "focus",
|
|
526
|
+
"submit", "confirm", "purchase", "pay", "checkout", "delete", "click"}
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _proposal_request(
|
|
531
|
+
surface: SurfaceView, recognized: RecognitionResult | None, *, k: int
|
|
532
|
+
) -> ModelRequest:
|
|
533
|
+
"""The ModelRequest handed to the proposing policy: the surface affordances and
|
|
534
|
+
the recognizer's PRIOR (archetype + handles), asking for ≤K candidate moves as
|
|
535
|
+
tool calls. The recognized handles are surfaced as a hint to bias move ordering;
|
|
536
|
+
the model is free to ignore them (the lookahead/rail still DISPOSES)."""
|
|
537
|
+
affs = [
|
|
538
|
+
{"handle": a.handle, "role": a.role, "label": a.label} for a in surface.affordances
|
|
539
|
+
]
|
|
540
|
+
hint: dict[str, Any] = {}
|
|
541
|
+
if recognized is not None:
|
|
542
|
+
hint = {
|
|
543
|
+
"archetype": recognized.archetype,
|
|
544
|
+
"confidence": recognized.confidence,
|
|
545
|
+
"suggested_handles": list(recognized.matched_handles),
|
|
546
|
+
}
|
|
547
|
+
sys = (
|
|
548
|
+
"Propose up to K candidate next actions over the current surface as tool "
|
|
549
|
+
"calls. Each call's args carry {label, handle, op?, role?}. You PROPOSE; a "
|
|
550
|
+
"deterministic lookahead over the learned model disposes — do not commit."
|
|
551
|
+
)
|
|
552
|
+
user = {"k": k, "url": surface.url, "title": surface.title,
|
|
553
|
+
"affordances": affs, "prior": hint}
|
|
554
|
+
import json
|
|
555
|
+
|
|
556
|
+
return ModelRequest(
|
|
557
|
+
messages=[{"role": "system", "content": sys},
|
|
558
|
+
{"role": "user", "content": json.dumps(user)}]
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
# An injected executor: act ONE step in the real world (browser/tool) and return
|
|
563
|
+
# the resulting ``SurfaceView``. Offline tests inject a fake returning scripted
|
|
564
|
+
# next-surfaces; a real run drives the browser. It is async and may be awaited.
|
|
565
|
+
ActionExecutor = Callable[[Candidate, SurfaceView], Awaitable[SurfaceView]]
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
@dataclass(frozen=True)
|
|
569
|
+
class MpcOutcome:
|
|
570
|
+
"""The result of an ``mpc_run`` driver loop."""
|
|
571
|
+
|
|
572
|
+
reached_goal: bool
|
|
573
|
+
escalated: bool
|
|
574
|
+
steps: tuple[Candidate, ...]
|
|
575
|
+
rationale: str
|
|
576
|
+
surface: SurfaceView
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
async def mpc_run(
|
|
580
|
+
surface: SurfaceView,
|
|
581
|
+
model: ModelProvider,
|
|
582
|
+
fsm: Fsm,
|
|
583
|
+
executor: ActionExecutor,
|
|
584
|
+
patterns: Sequence[Any] = (),
|
|
585
|
+
*,
|
|
586
|
+
k: int = 3,
|
|
587
|
+
depth: int = 2,
|
|
588
|
+
max_steps: int = 25,
|
|
589
|
+
surface_to_state: SurfaceToState | None = None,
|
|
590
|
+
priors: Sequence[Any] = (),
|
|
591
|
+
min_confidence: float = 0.6,
|
|
592
|
+
) -> MpcOutcome:
|
|
593
|
+
"""The driver loop: ``live_mpc_step`` → execute ONE step via the injected
|
|
594
|
+
``executor`` → re-plan from the REAL resulting state → repeat.
|
|
595
|
+
|
|
596
|
+
Stops when: the goal FSM state is reached (success), a trap/terminal/no-on-rail
|
|
597
|
+
candidate is hit (escalate), or a COMMITTING step is chosen (STOP at the commit
|
|
598
|
+
boundary — escalate, NEVER auto-cross). Reversible/idempotent steps execute
|
|
599
|
+
freely. ``max_steps`` bounds the loop. The executor is the only I/O; everything
|
|
600
|
+
else is the pure decision above, so the whole loop runs offline with a fake
|
|
601
|
+
executor."""
|
|
602
|
+
to_state = surface_to_state or _surface_state
|
|
603
|
+
taken: list[Candidate] = []
|
|
604
|
+
cur = surface
|
|
605
|
+
for _ in range(max_steps):
|
|
606
|
+
if to_state(cur) in fsm.accepting:
|
|
607
|
+
return MpcOutcome(
|
|
608
|
+
reached_goal=True, escalated=False, steps=tuple(taken),
|
|
609
|
+
rationale="reached goal state", surface=cur,
|
|
610
|
+
)
|
|
611
|
+
decision = await live_mpc_step(
|
|
612
|
+
cur, model, fsm, patterns, k=k, depth=depth,
|
|
613
|
+
surface_to_state=to_state, priors=priors, min_confidence=min_confidence,
|
|
614
|
+
)
|
|
615
|
+
if decision.escalate or decision.action is None:
|
|
616
|
+
return MpcOutcome(
|
|
617
|
+
reached_goal=False, escalated=True, steps=tuple(taken),
|
|
618
|
+
rationale=decision.rationale, surface=cur,
|
|
619
|
+
)
|
|
620
|
+
# execute exactly ONE reversible step via the injected executor, then
|
|
621
|
+
# re-plan from the REAL resulting surface.
|
|
622
|
+
taken.append(decision.action)
|
|
623
|
+
cur = await executor(decision.action, cur)
|
|
624
|
+
return MpcOutcome(
|
|
625
|
+
reached_goal=to_state(cur) in fsm.accepting, escalated=False,
|
|
626
|
+
steps=tuple(taken), rationale="max_steps reached", surface=cur,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# --- (D) the transition model FROM SHADOW recordings (Part B) -------------
|
|
631
|
+
#
|
|
632
|
+
# ``fsm_from_events`` folds the EVENT LOG into an ``Fsm``. ``fsm_from_shadow`` does
|
|
633
|
+
# the same from a Shadow recording, so a recording and the event log feed the SAME
|
|
634
|
+
# search transition model. The shapes are aligned (both produce a ``reachability.
|
|
635
|
+
# Fsm``), so the two sources MERGE — accumulating recordings GROWS the learned
|
|
636
|
+
# graph (the apprenticeship premise).
|
|
637
|
+
#
|
|
638
|
+
# DEPENDENCY DIRECTION: zu-shadow depends on zu-core AND zu-cli. Importing zu-shadow
|
|
639
|
+
# from zu-patterns risks a package cycle and violates the "dependency-light" rule,
|
|
640
|
+
# so ``fsm_from_shadow`` takes PLAIN inputs — either the already-emitted induced
|
|
641
|
+
# ``Fsm`` (the synthesizer's ``SynthesisResult.fsm``) OR the list of shadow events
|
|
642
|
+
# (``data.shadow.user.*``) — and NEVER imports zu-shadow. zu-patterns still depends
|
|
643
|
+
# only on zu-core.
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _shadow_state_id(seq: int) -> str:
|
|
647
|
+
return f"shadow_s{seq}"
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _shadow_action_label(e: Any) -> str:
|
|
651
|
+
"""The edge label for one ``data.shadow.user.*`` event — verb[:target], the
|
|
652
|
+
same human-readable shape the synthesizer's ``_action_label`` produces."""
|
|
653
|
+
t = getattr(e, "type", "")
|
|
654
|
+
p = _payload(e)
|
|
655
|
+
if t == ev.SHADOW_USER_NAVIGATE:
|
|
656
|
+
return "navigate"
|
|
657
|
+
verb = "click" if t == ev.SHADOW_USER_CLICK else "type"
|
|
658
|
+
target = p.get("target") or {}
|
|
659
|
+
name = ""
|
|
660
|
+
if isinstance(target, dict):
|
|
661
|
+
name = target.get("name") or target.get("label") or target.get("role") or ""
|
|
662
|
+
return f"{verb}:{name}" if name else verb
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def fsm_from_shadow_events(
|
|
666
|
+
events: Sequence[Any],
|
|
667
|
+
*,
|
|
668
|
+
initial: str = "shadow_start",
|
|
669
|
+
goal: str = "shadow_goal",
|
|
670
|
+
) -> Fsm:
|
|
671
|
+
"""Fold a Shadow recording's ``data.shadow.user.*`` action sequence into an
|
|
672
|
+
induced ``Fsm`` (pure) — the SAME shape ``fsm_from_events`` and the synthesizer's
|
|
673
|
+
``induce_fsm`` produce, so the search transition model is source-agnostic.
|
|
674
|
+
|
|
675
|
+
One state per recorded action, an edge per consecutive pair labelled by the
|
|
676
|
+
action, ``initial`` → … → ``goal`` (a ``done`` edge into the accepting goal).
|
|
677
|
+
Takes plain events (no zu-shadow import)."""
|
|
678
|
+
actions = [
|
|
679
|
+
e for e in events
|
|
680
|
+
if getattr(e, "type", "") in (
|
|
681
|
+
ev.SHADOW_USER_CLICK, ev.SHADOW_USER_TYPE, ev.SHADOW_USER_NAVIGATE
|
|
682
|
+
)
|
|
683
|
+
]
|
|
684
|
+
states = [initial]
|
|
685
|
+
edges: list[FsmEdge] = []
|
|
686
|
+
prev = initial
|
|
687
|
+
for i, e in enumerate(actions):
|
|
688
|
+
s = _shadow_state_id(i + 1)
|
|
689
|
+
states.append(s)
|
|
690
|
+
edges.append(FsmEdge(src=prev, dst=s, label=_shadow_action_label(e)))
|
|
691
|
+
prev = s
|
|
692
|
+
states.append(goal)
|
|
693
|
+
edges.append(FsmEdge(src=prev, dst=goal, label="done"))
|
|
694
|
+
return Fsm(
|
|
695
|
+
states=frozenset(states),
|
|
696
|
+
initial=initial,
|
|
697
|
+
accepting=frozenset({goal}),
|
|
698
|
+
edges=tuple(edges),
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
def merge_transition_models(*fsms: Fsm) -> Fsm:
|
|
703
|
+
"""Merge induced ``Fsm``s into ONE learned transition model — the union of
|
|
704
|
+
states and edges (de-duplicated), so accumulating recordings GROWS the graph.
|
|
705
|
+
|
|
706
|
+
The first FSM's ``initial`` is kept as the merged initial; the accepting sets
|
|
707
|
+
union (any source's goal is a goal). Pure set/tuple algebra — no new machinery,
|
|
708
|
+
just graph union over ``reachability.Fsm``. This is what lets a Shadow recording
|
|
709
|
+
and the event log feed the same search, and a second recording extend the
|
|
710
|
+
first."""
|
|
711
|
+
if not fsms:
|
|
712
|
+
return Fsm(states=frozenset(), initial="", accepting=frozenset(), edges=())
|
|
713
|
+
states: set[str] = set()
|
|
714
|
+
accepting: set[str] = set()
|
|
715
|
+
seen_edges: set[tuple[str, str, str]] = set()
|
|
716
|
+
edges: list[FsmEdge] = []
|
|
717
|
+
for f in fsms:
|
|
718
|
+
states |= f.states
|
|
719
|
+
accepting |= f.accepting
|
|
720
|
+
for e in f.edges:
|
|
721
|
+
key = (e.src, e.dst, e.label)
|
|
722
|
+
if key not in seen_edges:
|
|
723
|
+
seen_edges.add(key)
|
|
724
|
+
edges.append(e)
|
|
725
|
+
return Fsm(
|
|
726
|
+
states=frozenset(states),
|
|
727
|
+
initial=fsms[0].initial,
|
|
728
|
+
accepting=frozenset(accepting),
|
|
729
|
+
edges=tuple(edges),
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def fsm_from_shadow(
|
|
734
|
+
source: Any,
|
|
735
|
+
*,
|
|
736
|
+
base: Fsm | None = None,
|
|
737
|
+
initial: str = "shadow_start",
|
|
738
|
+
goal: str = "shadow_goal",
|
|
739
|
+
) -> Fsm:
|
|
740
|
+
"""Build/extend the empirical transition model from a Shadow recording (Part B).
|
|
741
|
+
|
|
742
|
+
``source`` is taken as PLAIN input — no zu-shadow import (dependency-light):
|
|
743
|
+
|
|
744
|
+
* an already-emitted ``reachability.Fsm`` (the synthesizer's induced FSM,
|
|
745
|
+
``SynthesisResult.fsm``) — consumed directly; or
|
|
746
|
+
* a sequence of shadow events (``data.shadow.user.*``) — folded via
|
|
747
|
+
``fsm_from_shadow_events`` into the SAME shape; or
|
|
748
|
+
* an object exposing ``.events`` / ``.shadow_events()`` (a RecordedSession-
|
|
749
|
+
shaped duck) — its events are folded.
|
|
750
|
+
|
|
751
|
+
When ``base`` is given, the new model is MERGED into it (``merge_transition_
|
|
752
|
+
models``), so a SECOND recording GROWS the learned graph — the apprenticeship
|
|
753
|
+
premise. The result feeds the SAME ``plan`` / ``live_mpc_step`` search."""
|
|
754
|
+
if isinstance(source, Fsm):
|
|
755
|
+
induced = source
|
|
756
|
+
else:
|
|
757
|
+
events = _shadow_events_of(source)
|
|
758
|
+
induced = fsm_from_shadow_events(events, initial=initial, goal=goal)
|
|
759
|
+
if base is not None:
|
|
760
|
+
return merge_transition_models(base, induced)
|
|
761
|
+
return induced
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def _shadow_events_of(source: Any) -> Sequence[Any]:
|
|
765
|
+
"""Extract the shadow events from a plain input: a bare sequence, or a
|
|
766
|
+
RecordedSession-shaped object exposing ``shadow_events()`` / ``events``."""
|
|
767
|
+
shadow_events = getattr(source, "shadow_events", None)
|
|
768
|
+
if callable(shadow_events):
|
|
769
|
+
return list(shadow_events())
|
|
770
|
+
events = getattr(source, "events", None)
|
|
771
|
+
if events is not None:
|
|
772
|
+
return list(events)
|
|
773
|
+
if isinstance(source, Sequence):
|
|
774
|
+
return source
|
|
775
|
+
raise TypeError(
|
|
776
|
+
"fsm_from_shadow source must be an Fsm, a sequence of shadow events, or a "
|
|
777
|
+
"RecordedSession-shaped object (with .events / .shadow_events())"
|
|
778
|
+
)
|