zu-patterns 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_patterns/search.py ADDED
@@ -0,0 +1,778 @@
1
+ """Offline guided search — best-first planning OVER the Phase-1 induced FSM.
2
+
3
+ This is the planner half of the §5 stack (the policy prior is the recognizer).
4
+ It runs over ``zu_core.reachability.Fsm`` (REUSE — there is no second FSM type)
5
+ with the pattern recognizer as the move-ordering prior, the same AlphaZero shape
6
+ the doc describes: explore the residual the pattern does not resolve, guided by
7
+ ``co_reachable`` (a cheap value estimate) and pruned of ``trap_states``.
8
+
9
+ Two pieces:
10
+ * ``fsm_from_events`` — an EMPIRICAL transition-model builder: fold the event
11
+ log's surface→action→surface triples into FSM edges (the documented future
12
+ ``fsm_from_track`` helper, sourced from the event log NOW; Shadow recordings
13
+ EXTEND this later — DEFERRED, see below).
14
+ * ``plan`` — best-first search over the FSM, ordered by
15
+ ``f = co-reachability + prior``, pruning edges into traps, and FLAGGING which
16
+ edges cross a committing boundary (so the deferred live executor knows where
17
+ lookahead must stop). Offline the whole learned graph is explorable; the plan
18
+ never auto-crosses a COMMITTING edge in the live seam.
19
+
20
+ Now also:
21
+ * ``live_mpc_step`` / ``mpc_run`` — the LIVE guided-MPC loop (§5.2, the
22
+ AlphaZero shape): the model PROPOSES ≤K candidates (the recognizer is the
23
+ move-ordering prior), a shallow lookahead over the learned FSM DISPOSES via
24
+ the rail (``co_reachable``/traps), one REVERSIBLE step executes via an injected
25
+ executor, then re-plan — STOPPING at the commit boundary (default-to-committing).
26
+ * ``fsm_from_shadow`` / ``merge_transition_models`` — the Shadow-sourced
27
+ transition model: fold a recording's induced FSM / shadow events into the SAME
28
+ search model; accumulating recordings GROWS the graph.
29
+
30
+ Pure, offline, $0 — the executor is the only I/O and it is injected (a fake in
31
+ tests, a real browser in production).
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import hashlib
37
+ from collections.abc import Awaitable, Callable, Sequence
38
+ from dataclasses import dataclass, field
39
+ from typing import Any
40
+
41
+ from zu_core import events as ev
42
+ from zu_core.ports import ModelProvider, ModelRequest, RecognitionResult
43
+ from zu_core.reachability import Fsm, FsmEdge, co_reachable, trap_states
44
+ from zu_core.surface import SurfaceAffordance, SurfaceView
45
+
46
+ from .recognizer import recognize
47
+ from .reversibility import Commitment, classify_action
48
+
49
+ # --- (A) the empirical transition-model builder ---------------------------
50
+
51
+
52
+ def _payload(e: Any) -> dict:
53
+ p = getattr(e, "payload", None)
54
+ return p if isinstance(p, dict) else {}
55
+
56
+
57
+ def surface_state_id(payload: dict) -> str:
58
+ """A stable digest of the surface the agent was on — the FSM state id.
59
+
60
+ Heuristic (documented): prefer ``url`` + ``title`` when present (a web locus);
61
+ otherwise hash the sorted affordance handles (the structural fingerprint of
62
+ the surface). Two visits to the same page collapse to the same state; two
63
+ structurally different surfaces stay distinct. Shadow recordings (next phase)
64
+ supply richer state; this is the event-log source.
65
+ """
66
+ url = str(payload.get("url", ""))
67
+ title = str(payload.get("title", ""))
68
+ if url or title:
69
+ basis = f"url={url}\x1ftitle={title}"
70
+ else:
71
+ handles = payload.get("handles") or []
72
+ basis = "h=" + ",".join(sorted(str(h) for h in handles))
73
+ return "s_" + hashlib.sha256(basis.encode()).hexdigest()[:12]
74
+
75
+
76
+ def fsm_from_events(
77
+ events: Sequence[Any],
78
+ *,
79
+ goal_states: frozenset[str] | None = None,
80
+ initial: str | None = None,
81
+ ) -> Fsm:
82
+ """Fold an event log into an induced ``Fsm`` (pure).
83
+
84
+ The log is read as a sequence of ``data.surface.captured`` snapshots
85
+ interleaved with ``harness.tool.invoked`` actions: each
86
+ surface→tool→next-surface triple becomes an edge
87
+ ``FsmEdge(src=state_before, dst=state_after, label=action)``. The action
88
+ label is the tool name (plus handle when present), so the edge names the move
89
+ that induced the transition. Accepting states are ``goal_states`` (the caller
90
+ supplies them — e.g. states where a success Monitor would hold).
91
+ """
92
+ states: list[str] = []
93
+ edges: list[FsmEdge] = []
94
+ last_state: str | None = None
95
+ pending_action: str | None = None
96
+ for e in events:
97
+ etype = getattr(e, "type", None)
98
+ if etype == ev.SURFACE_CAPTURED:
99
+ sid = surface_state_id(_payload(e))
100
+ if sid not in states:
101
+ states.append(sid)
102
+ if last_state is not None and pending_action is not None:
103
+ edges.append(FsmEdge(src=last_state, dst=sid, label=pending_action))
104
+ last_state = sid
105
+ pending_action = None
106
+ elif etype == ev.TOOL_INVOKED:
107
+ p = _payload(e)
108
+ tool = str(p.get("tool", "action"))
109
+ handle = p.get("handle") or (p.get("args") or {}).get("handle")
110
+ pending_action = f"{tool}:{handle}" if handle else tool
111
+ init = initial if initial is not None else (states[0] if states else "")
112
+ accepting = goal_states if goal_states is not None else frozenset()
113
+ # Ensure declared goal states are part of the state set (a goal may be named
114
+ # before it is observed).
115
+ state_set = frozenset(states) | accepting | ({init} if init else frozenset())
116
+ return Fsm(states=state_set, initial=init, accepting=accepting & state_set, edges=tuple(edges))
117
+
118
+
119
+ # --- (B) the best-first planner over the induced FSM ----------------------
120
+
121
+
122
+ @dataclass(frozen=True)
123
+ class PlanStep:
124
+ """One move in a plan: the edge taken, and whether it crosses a commit
125
+ boundary (the live executor must STOP — never auto-cross — a committing edge)."""
126
+
127
+ src: str
128
+ dst: str
129
+ label: str
130
+ committing: bool
131
+
132
+
133
+ @dataclass(frozen=True)
134
+ class Plan:
135
+ """A planned path from the FSM's initial state toward a goal.
136
+
137
+ ``reached_goal`` says whether the path ends in an accepting state.
138
+ ``steps`` is the ordered moves; ``crosses_commit`` flags whether any step is a
139
+ committing boundary (the deferred live MPC must halt before it). ``expansions``
140
+ records search effort for the $0 test bar.
141
+ """
142
+
143
+ steps: tuple[PlanStep, ...]
144
+ reached_goal: bool
145
+ crosses_commit: bool
146
+ expansions: int = 0
147
+ detail: str | None = None
148
+
149
+
150
+ # A prior over an edge: returns a non-negative bonus (higher ⇒ explore first).
151
+ EdgePrior = Callable[[FsmEdge], float]
152
+ # A commitment classifier over an edge: REVERSIBLE | COMMITTING.
153
+ EdgeClassifier = Callable[[FsmEdge], Commitment]
154
+
155
+
156
+ def _default_classifier(edge: FsmEdge) -> Commitment:
157
+ # OFFLINE-EXPLORATION-ONLY. This REVERSIBLE default is the inverse of the
158
+ # project-wide default-to-committing rail discipline, and that is DELIBERATE
159
+ # AND SAFE *because it never gates a live side-effecting action*:
160
+ # * ``plan()`` runs purely offline over the learned/remembered FSM. Marking
161
+ # an unknown edge REVERSIBLE only lets the planner LOOK PAST it; it does not
162
+ # execute anything. The commit boundary is FLAGGED on each ``PlanStep``
163
+ # (``committing``) and aggregated in ``Plan.crosses_commit`` — surfaced, not
164
+ # crossed.
165
+ # * The LIVE seam (``live_mpc_step``, deferred) does NOT trust this default:
166
+ # it re-classifies every candidate edge with ``reversibility.classify_action``,
167
+ # which DEFAULTS TO COMMITTING on uncertainty (see
168
+ # ``test_live_classifier_defaults_to_committing``), and STOPS at the first
169
+ # committing boundary. So the offline REVERSIBLE default cannot leak into a
170
+ # live execution decision.
171
+ # If that separation could ever be violated, flip this to COMMITTING.
172
+ return Commitment.REVERSIBLE
173
+
174
+
175
+ def _default_prior(edge: FsmEdge) -> float:
176
+ return 0.0
177
+
178
+
179
+ def plan(
180
+ fsm: Fsm,
181
+ *,
182
+ prior: EdgePrior = _default_prior,
183
+ classifier: EdgeClassifier = _default_classifier,
184
+ max_expansions: int = 1000,
185
+ ) -> Plan:
186
+ """Best-first search from ``fsm.initial`` toward an accepting state (pure).
187
+
188
+ The frontier is ordered by ``f = co_reachability(dst) + prior(edge)``: an edge
189
+ whose destination can still reach the goal, preferred by the move-ordering
190
+ prior, is expanded first. Edges into trap states are PRUNED (reuse
191
+ ``trap_states``). Each chosen edge is classified; a committing edge is FLAGGED
192
+ in the plan (offline we still record the path, but ``crosses_commit`` tells the
193
+ deferred live executor where lookahead must stop).
194
+ """
195
+ co = co_reachable(fsm)
196
+ traps = trap_states(fsm)
197
+ start = fsm.initial
198
+ if start not in fsm.states:
199
+ return Plan(steps=(), reached_goal=False, crosses_commit=False, detail="no initial state")
200
+
201
+ # best-first over partial paths. A path is (steps, current_state, visited).
202
+ @dataclass(order=True)
203
+ class _Node:
204
+ score: float
205
+ seq: int # tie-breaker for determinism
206
+ steps: tuple[PlanStep, ...] = field(compare=False)
207
+ state: str = field(compare=False)
208
+ visited: frozenset[str] = field(compare=False)
209
+
210
+ import heapq
211
+
212
+ counter = 0
213
+ heap: list[_Node] = [_Node(score=0.0, seq=0, steps=(), state=start, visited=frozenset({start}))]
214
+ best_partial: tuple[PlanStep, ...] = ()
215
+ expansions = 0
216
+ while heap and expansions < max_expansions:
217
+ node = heapq.heappop(heap)
218
+ expansions += 1
219
+ if node.state in fsm.accepting:
220
+ return Plan(
221
+ steps=node.steps,
222
+ reached_goal=True,
223
+ crosses_commit=any(s.committing for s in node.steps),
224
+ expansions=expansions,
225
+ )
226
+ if len(node.steps) > len(best_partial):
227
+ best_partial = node.steps
228
+ # expand: outgoing edges, skipping traps and already-visited states.
229
+ out = sorted(
230
+ (e for e in fsm.edges if e.src == node.state),
231
+ key=lambda e: (-(float(e.dst in co) + prior(e)), e.label),
232
+ )
233
+ for e in out:
234
+ if e.dst in traps or e.dst in node.visited:
235
+ continue
236
+ committing = classifier(e) is Commitment.COMMITTING
237
+ step = PlanStep(src=e.src, dst=e.dst, label=e.label, committing=committing)
238
+ # f = negative so heapq (a min-heap) pops the highest-value first.
239
+ f = -(float(e.dst in co) + prior(e))
240
+ counter += 1
241
+ heapq.heappush(
242
+ heap,
243
+ _Node(
244
+ score=f,
245
+ seq=counter,
246
+ steps=node.steps + (step,),
247
+ state=e.dst,
248
+ visited=node.visited | {e.dst},
249
+ ),
250
+ )
251
+ return Plan(
252
+ steps=best_partial,
253
+ reached_goal=False,
254
+ crosses_commit=any(s.committing for s in best_partial),
255
+ expansions=expansions,
256
+ detail="goal not reached within max_expansions" if heap else "frontier exhausted",
257
+ )
258
+
259
+
260
+ # --- (C) the LIVE guided-MPC loop (§5.2, the AlphaZero shape) --------------
261
+ #
262
+ # MODEL PROPOSES, HARNESS DISPOSES. The model proposes ≤K candidate next actions
263
+ # (policy-pruned branching) — the pattern recognizer supplies the move-ordering
264
+ # PRIOR; a shallow lookahead over the LEARNED ``Fsm`` (the remembered transition
265
+ # model) estimates where each candidate leads; the rail/reachability DISPOSES
266
+ # (co_reachable to the goal? not a trap?). A pattern's prediction is a PRIOR
267
+ # confirmed by the deterministic lookahead/rail, NEVER ground truth.
268
+ #
269
+ # ``live_mpc_step`` is PURE decision logic — no real I/O. The executor is injected
270
+ # into the driver loop (``mpc_run``), so the whole thing is offline-testable with a
271
+ # ScriptedProvider + a hand-built Fsm + a fake executor.
272
+
273
+
274
+ # A proposed candidate: the action label (matching an FSM edge ``label``), the
275
+ # affordance handle it acts on (for the executor), and a generic interaction verb
276
+ # (``op``)/``role`` the commit-boundary classifier reads.
277
+ @dataclass(frozen=True)
278
+ class Candidate:
279
+ label: str
280
+ handle: str | None = None
281
+ op: str | None = None
282
+ role: str | None = None
283
+ http_method: str | None = None
284
+
285
+
286
+ @dataclass(frozen=True)
287
+ class MpcDecision:
288
+ """The result of one ``live_mpc_step``: the chosen candidate and WHY.
289
+
290
+ ``action`` is the picked on-rail candidate (``None`` ⇒ no on-rail/safe move —
291
+ the loop escalates). ``escalate`` is set when the best candidate crosses the
292
+ COMMIT BOUNDARY (a side-effecting/irreversible step the live loop must NOT
293
+ auto-cross) or when nothing is recognized/reachable. ``committing`` says the
294
+ chosen/blocking candidate was classified COMMITTING. ``scored`` is the full
295
+ ranked list (candidate, lookahead-score) for audit — the lookahead+rail
296
+ DISPOSED, the model only PROPOSED.
297
+ """
298
+
299
+ action: Candidate | None
300
+ escalate: bool
301
+ rationale: str
302
+ committing: bool = False
303
+ scored: tuple[tuple[Candidate, float], ...] = ()
304
+
305
+
306
+ # The state of the current surface within the learned FSM. The caller maps a live
307
+ # ``SurfaceView`` to an FSM state id; offline tests pass the id directly.
308
+ SurfaceToState = Callable[[SurfaceView], str]
309
+
310
+
311
+ def _surface_state(surface: SurfaceView) -> str:
312
+ """Default surface→FSM-state mapping: the same digest ``fsm_from_events``
313
+ uses, so a live surface lands on the learned state when the model remembers it."""
314
+ payload = {
315
+ "url": surface.url,
316
+ "title": surface.title,
317
+ "handles": [a.handle for a in surface.affordances],
318
+ }
319
+ return surface_state_id(payload)
320
+
321
+
322
+ def _prior_for_candidate(
323
+ cand: Candidate, recognized: RecognitionResult | None
324
+ ) -> float:
325
+ """The move-ordering PRIOR (the recognizer's confidence, biased to the handles
326
+ the recognized archetype bound). A recognized handle ⇒ explore-first bonus."""
327
+ if recognized is None:
328
+ return 0.0
329
+ bonus = recognized.confidence
330
+ if cand.handle is not None and cand.handle in recognized.matched_handles:
331
+ bonus += 1.0
332
+ return bonus
333
+
334
+
335
+ def _lookahead_score(fsm: Fsm, co: frozenset[str], traps: frozenset[str],
336
+ dst: str, depth: int) -> float:
337
+ """SHALLOW lookahead over the LEARNED fsm: how good is landing on ``dst``?
338
+
339
+ The rail evaluator (``co_reachable``) is the value estimate: ``dst`` accepting
340
+ ⇒ best; ``dst`` co-reachable (goal still reachable) ⇒ good; a trap ⇒ worst
341
+ (pruned). Within ``depth`` we look whether an accepting state is reachable from
342
+ ``dst`` (a cheap bounded BFS), preferring the shorter route. Pure graph query —
343
+ no model, no I/O. This is what DISPOSES."""
344
+ if dst in traps:
345
+ return -1.0
346
+ if dst in fsm.accepting:
347
+ return 100.0
348
+ if dst not in co:
349
+ # not co-reachable and not accepting: a dead end for the goal.
350
+ return -1.0
351
+ # bounded BFS to the nearest accepting state within ``depth`` — closer is
352
+ # better (the value estimate the rail's co_reachable underwrites).
353
+ frontier = {dst}
354
+ seen = {dst}
355
+ for d in range(1, max(depth, 1) + 1):
356
+ nxt: set[str] = set()
357
+ for s in frontier:
358
+ for e in fsm.edges:
359
+ if e.src == s and e.dst not in seen:
360
+ if e.dst in fsm.accepting:
361
+ return 100.0 - d
362
+ if e.dst in co:
363
+ nxt.add(e.dst)
364
+ seen.add(e.dst)
365
+ frontier = nxt
366
+ if not frontier:
367
+ break
368
+ # co-reachable but goal is beyond the horizon: still on-rail, mild positive.
369
+ return 1.0
370
+
371
+
372
+ async def live_mpc_step(
373
+ surface: SurfaceView,
374
+ model: ModelProvider,
375
+ fsm: Fsm,
376
+ patterns: Sequence[Any] = (),
377
+ *,
378
+ k: int = 3,
379
+ depth: int = 2,
380
+ surface_to_state: SurfaceToState | None = None,
381
+ priors: Sequence[Any] = (),
382
+ min_confidence: float = 0.6,
383
+ ) -> MpcDecision:
384
+ """One guided-MPC step — MODEL PROPOSES, deterministic lookahead+rail DISPOSES.
385
+
386
+ PROPOSE: the ``ModelProvider`` proposes ≤K candidate next actions from the
387
+ current ``SurfaceView`` (policy-pruned branching; K small). The pattern
388
+ recognizer supplies the move-ordering PRIOR — recognized archetypes/handles are
389
+ explored first (the heuristic network).
390
+
391
+ LOOK AHEAD: each candidate maps to an FSM edge out of the current state; a
392
+ SHALLOW lookahead over the LEARNED fsm estimates where it leads, SCORED by the
393
+ rail evaluator (``co_reachable`` to the goal / not a ``trap``).
394
+
395
+ DISPOSE: pick the best-scoring on-rail candidate. A pattern's prediction is a
396
+ PRIOR confirmed by the lookahead/rail, never trusted as ground truth.
397
+
398
+ SAFETY — STOP AT THE COMMIT BOUNDARY: the chosen candidate is re-classified by
399
+ ``classify_action`` (default-to-COMMITTING on uncertainty). A COMMITTING next
400
+ step is the live-search boundary: the step does NOT execute — the decision is
401
+ ``escalate``. Only a REVERSIBLE/idempotent candidate is returned for execution.
402
+ An UNRECOGNIZED / no-on-rail-candidate surface also escalates (fall through to
403
+ the model / route out). Pure: no I/O beyond the injected ``model.complete``.
404
+ """
405
+ to_state = surface_to_state or _surface_state
406
+ here = to_state(surface)
407
+ co = co_reachable(fsm)
408
+ traps = trap_states(fsm)
409
+
410
+ # PROPOSE — the model proposes ≤K candidates from the surface.
411
+ rec = recognize(surface, patterns, min_confidence=min_confidence)
412
+ proposals = await _propose_candidates(surface, model, rec.result, k=k)
413
+ if not proposals:
414
+ return MpcDecision(
415
+ action=None, escalate=True,
416
+ rationale="model proposed no candidates — escalate",
417
+ )
418
+
419
+ # LOOK AHEAD + score. Each candidate's label is matched to an outgoing FSM edge
420
+ # from the current state (the learned transition); its destination is scored by
421
+ # the rail. The recognizer's confidence is the move-ordering PRIOR (a tie-break
422
+ # / bias, NEVER overriding the deterministic lookahead).
423
+ edges_here = {e.label: e for e in fsm.edges if e.src == here}
424
+ scored: list[tuple[Candidate, float]] = []
425
+ for cand in proposals:
426
+ edge = edges_here.get(cand.label)
427
+ if edge is None:
428
+ # the learned model has no memory of this move from here: unknown
429
+ # transition ⇒ blind. Score it below any on-rail known move.
430
+ scored.append((cand, -2.0))
431
+ continue
432
+ base = _lookahead_score(fsm, co, traps, edge.dst, depth)
433
+ score = base + 0.001 * _prior_for_candidate(cand, rec.result)
434
+ scored.append((cand, score))
435
+ # deterministic ordering: score desc, then label for stable ties.
436
+ scored.sort(key=lambda cs: (-cs[1], cs[0].label))
437
+ scored_t = tuple(scored)
438
+
439
+ best, best_score = scored[0]
440
+ if best_score <= 0.0:
441
+ # no on-rail candidate (trap / unknown / unreachable). The deterministic
442
+ # lookahead+rail DISPOSED against the model's proposals — escalate rather
443
+ # than execute a blind/off-rail move.
444
+ return MpcDecision(
445
+ action=None, escalate=True,
446
+ rationale=(
447
+ f"no on-rail candidate from {here!r} "
448
+ f"(best {best.label!r} scored {best_score:.3f}) — escalate"
449
+ ),
450
+ scored=scored_t,
451
+ )
452
+
453
+ # DISPOSE — SAFETY: re-classify the chosen candidate at the COMMIT BOUNDARY.
454
+ # default-to-committing: an uncertain/side-effecting move STOPS the live loop.
455
+ commitment = classify_action(
456
+ http_method=best.http_method, role=best.role, op=best.op, priors=priors
457
+ )
458
+ if commitment is Commitment.COMMITTING:
459
+ return MpcDecision(
460
+ action=best, escalate=True, committing=True,
461
+ rationale=(
462
+ f"chosen on-rail candidate {best.label!r} is COMMITTING "
463
+ "(default-to-committing) — STOP at the commit boundary, escalate"
464
+ ),
465
+ scored=scored_t,
466
+ )
467
+
468
+ return MpcDecision(
469
+ action=best, escalate=False, committing=False,
470
+ rationale=(
471
+ f"chosen {best.label!r} → on-rail (score {best_score:.3f}); "
472
+ "REVERSIBLE — execute one step then re-plan"
473
+ ),
474
+ scored=scored_t,
475
+ )
476
+
477
+
478
+ def _aff(surface: SurfaceView, handle: str | None) -> SurfaceAffordance | None:
479
+ if handle is None:
480
+ return None
481
+ for a in surface.affordances:
482
+ if a.handle == handle:
483
+ return a
484
+ return None
485
+
486
+
487
+ async def _propose_candidates(
488
+ surface: SurfaceView, model: ModelProvider,
489
+ recognized: RecognitionResult | None, *, k: int,
490
+ ) -> list[Candidate]:
491
+ """Ask the ModelProvider to PROPOSE ≤K candidate next actions over the surface.
492
+
493
+ The model emits tool_calls (the policy-pruned branching factor): each call's
494
+ ``args`` carry ``{label, handle?, op?, role?, http_method?}``. A proposal's
495
+ ``op``/``role`` default from the named affordance when the model omits them, so
496
+ the commit-boundary classifier always has signal. ≤K are kept (the model
497
+ prunes; we cap)."""
498
+ req = _proposal_request(surface, recognized, k=k)
499
+ resp = await model.complete(req)
500
+ out: list[Candidate] = []
501
+ for call in resp.tool_calls[:k]:
502
+ args = call.args or {}
503
+ label = str(args.get("label") or call.name)
504
+ handle = args.get("handle")
505
+ aff = _aff(surface, handle if isinstance(handle, str) else None)
506
+ op = args.get("op") or (call.name if call.name in _OP_NAMES else None)
507
+ role = args.get("role") or (aff.role if aff is not None else None)
508
+ out.append(
509
+ Candidate(
510
+ label=label,
511
+ handle=handle if isinstance(handle, str) else None,
512
+ op=op if isinstance(op, str) else None,
513
+ role=role if isinstance(role, str) else None,
514
+ http_method=(
515
+ str(args["http_method"]) if args.get("http_method") else None
516
+ ),
517
+ )
518
+ )
519
+ return out
520
+
521
+
522
+ # Generic interaction verbs a tool name may itself be (so a bare ``fill``/``submit``
523
+ # tool call carries an op signal to the classifier without explicit args).
524
+ _OP_NAMES = frozenset(
525
+ {"fill", "read", "open", "select", "expand", "focus",
526
+ "submit", "confirm", "purchase", "pay", "checkout", "delete", "click"}
527
+ )
528
+
529
+
530
+ def _proposal_request(
531
+ surface: SurfaceView, recognized: RecognitionResult | None, *, k: int
532
+ ) -> ModelRequest:
533
+ """The ModelRequest handed to the proposing policy: the surface affordances and
534
+ the recognizer's PRIOR (archetype + handles), asking for ≤K candidate moves as
535
+ tool calls. The recognized handles are surfaced as a hint to bias move ordering;
536
+ the model is free to ignore them (the lookahead/rail still DISPOSES)."""
537
+ affs = [
538
+ {"handle": a.handle, "role": a.role, "label": a.label} for a in surface.affordances
539
+ ]
540
+ hint: dict[str, Any] = {}
541
+ if recognized is not None:
542
+ hint = {
543
+ "archetype": recognized.archetype,
544
+ "confidence": recognized.confidence,
545
+ "suggested_handles": list(recognized.matched_handles),
546
+ }
547
+ sys = (
548
+ "Propose up to K candidate next actions over the current surface as tool "
549
+ "calls. Each call's args carry {label, handle, op?, role?}. You PROPOSE; a "
550
+ "deterministic lookahead over the learned model disposes — do not commit."
551
+ )
552
+ user = {"k": k, "url": surface.url, "title": surface.title,
553
+ "affordances": affs, "prior": hint}
554
+ import json
555
+
556
+ return ModelRequest(
557
+ messages=[{"role": "system", "content": sys},
558
+ {"role": "user", "content": json.dumps(user)}]
559
+ )
560
+
561
+
562
+ # An injected executor: act ONE step in the real world (browser/tool) and return
563
+ # the resulting ``SurfaceView``. Offline tests inject a fake returning scripted
564
+ # next-surfaces; a real run drives the browser. It is async and may be awaited.
565
+ ActionExecutor = Callable[[Candidate, SurfaceView], Awaitable[SurfaceView]]
566
+
567
+
568
+ @dataclass(frozen=True)
569
+ class MpcOutcome:
570
+ """The result of an ``mpc_run`` driver loop."""
571
+
572
+ reached_goal: bool
573
+ escalated: bool
574
+ steps: tuple[Candidate, ...]
575
+ rationale: str
576
+ surface: SurfaceView
577
+
578
+
579
+ async def mpc_run(
580
+ surface: SurfaceView,
581
+ model: ModelProvider,
582
+ fsm: Fsm,
583
+ executor: ActionExecutor,
584
+ patterns: Sequence[Any] = (),
585
+ *,
586
+ k: int = 3,
587
+ depth: int = 2,
588
+ max_steps: int = 25,
589
+ surface_to_state: SurfaceToState | None = None,
590
+ priors: Sequence[Any] = (),
591
+ min_confidence: float = 0.6,
592
+ ) -> MpcOutcome:
593
+ """The driver loop: ``live_mpc_step`` → execute ONE step via the injected
594
+ ``executor`` → re-plan from the REAL resulting state → repeat.
595
+
596
+ Stops when: the goal FSM state is reached (success), a trap/terminal/no-on-rail
597
+ candidate is hit (escalate), or a COMMITTING step is chosen (STOP at the commit
598
+ boundary — escalate, NEVER auto-cross). Reversible/idempotent steps execute
599
+ freely. ``max_steps`` bounds the loop. The executor is the only I/O; everything
600
+ else is the pure decision above, so the whole loop runs offline with a fake
601
+ executor."""
602
+ to_state = surface_to_state or _surface_state
603
+ taken: list[Candidate] = []
604
+ cur = surface
605
+ for _ in range(max_steps):
606
+ if to_state(cur) in fsm.accepting:
607
+ return MpcOutcome(
608
+ reached_goal=True, escalated=False, steps=tuple(taken),
609
+ rationale="reached goal state", surface=cur,
610
+ )
611
+ decision = await live_mpc_step(
612
+ cur, model, fsm, patterns, k=k, depth=depth,
613
+ surface_to_state=to_state, priors=priors, min_confidence=min_confidence,
614
+ )
615
+ if decision.escalate or decision.action is None:
616
+ return MpcOutcome(
617
+ reached_goal=False, escalated=True, steps=tuple(taken),
618
+ rationale=decision.rationale, surface=cur,
619
+ )
620
+ # execute exactly ONE reversible step via the injected executor, then
621
+ # re-plan from the REAL resulting surface.
622
+ taken.append(decision.action)
623
+ cur = await executor(decision.action, cur)
624
+ return MpcOutcome(
625
+ reached_goal=to_state(cur) in fsm.accepting, escalated=False,
626
+ steps=tuple(taken), rationale="max_steps reached", surface=cur,
627
+ )
628
+
629
+
630
+ # --- (D) the transition model FROM SHADOW recordings (Part B) -------------
631
+ #
632
+ # ``fsm_from_events`` folds the EVENT LOG into an ``Fsm``. ``fsm_from_shadow`` does
633
+ # the same from a Shadow recording, so a recording and the event log feed the SAME
634
+ # search transition model. The shapes are aligned (both produce a ``reachability.
635
+ # Fsm``), so the two sources MERGE — accumulating recordings GROWS the learned
636
+ # graph (the apprenticeship premise).
637
+ #
638
+ # DEPENDENCY DIRECTION: zu-shadow depends on zu-core AND zu-cli. Importing zu-shadow
639
+ # from zu-patterns risks a package cycle and violates the "dependency-light" rule,
640
+ # so ``fsm_from_shadow`` takes PLAIN inputs — either the already-emitted induced
641
+ # ``Fsm`` (the synthesizer's ``SynthesisResult.fsm``) OR the list of shadow events
642
+ # (``data.shadow.user.*``) — and NEVER imports zu-shadow. zu-patterns still depends
643
+ # only on zu-core.
644
+
645
+
646
+ def _shadow_state_id(seq: int) -> str:
647
+ return f"shadow_s{seq}"
648
+
649
+
650
+ def _shadow_action_label(e: Any) -> str:
651
+ """The edge label for one ``data.shadow.user.*`` event — verb[:target], the
652
+ same human-readable shape the synthesizer's ``_action_label`` produces."""
653
+ t = getattr(e, "type", "")
654
+ p = _payload(e)
655
+ if t == ev.SHADOW_USER_NAVIGATE:
656
+ return "navigate"
657
+ verb = "click" if t == ev.SHADOW_USER_CLICK else "type"
658
+ target = p.get("target") or {}
659
+ name = ""
660
+ if isinstance(target, dict):
661
+ name = target.get("name") or target.get("label") or target.get("role") or ""
662
+ return f"{verb}:{name}" if name else verb
663
+
664
+
665
+ def fsm_from_shadow_events(
666
+ events: Sequence[Any],
667
+ *,
668
+ initial: str = "shadow_start",
669
+ goal: str = "shadow_goal",
670
+ ) -> Fsm:
671
+ """Fold a Shadow recording's ``data.shadow.user.*`` action sequence into an
672
+ induced ``Fsm`` (pure) — the SAME shape ``fsm_from_events`` and the synthesizer's
673
+ ``induce_fsm`` produce, so the search transition model is source-agnostic.
674
+
675
+ One state per recorded action, an edge per consecutive pair labelled by the
676
+ action, ``initial`` → … → ``goal`` (a ``done`` edge into the accepting goal).
677
+ Takes plain events (no zu-shadow import)."""
678
+ actions = [
679
+ e for e in events
680
+ if getattr(e, "type", "") in (
681
+ ev.SHADOW_USER_CLICK, ev.SHADOW_USER_TYPE, ev.SHADOW_USER_NAVIGATE
682
+ )
683
+ ]
684
+ states = [initial]
685
+ edges: list[FsmEdge] = []
686
+ prev = initial
687
+ for i, e in enumerate(actions):
688
+ s = _shadow_state_id(i + 1)
689
+ states.append(s)
690
+ edges.append(FsmEdge(src=prev, dst=s, label=_shadow_action_label(e)))
691
+ prev = s
692
+ states.append(goal)
693
+ edges.append(FsmEdge(src=prev, dst=goal, label="done"))
694
+ return Fsm(
695
+ states=frozenset(states),
696
+ initial=initial,
697
+ accepting=frozenset({goal}),
698
+ edges=tuple(edges),
699
+ )
700
+
701
+
702
+ def merge_transition_models(*fsms: Fsm) -> Fsm:
703
+ """Merge induced ``Fsm``s into ONE learned transition model — the union of
704
+ states and edges (de-duplicated), so accumulating recordings GROWS the graph.
705
+
706
+ The first FSM's ``initial`` is kept as the merged initial; the accepting sets
707
+ union (any source's goal is a goal). Pure set/tuple algebra — no new machinery,
708
+ just graph union over ``reachability.Fsm``. This is what lets a Shadow recording
709
+ and the event log feed the same search, and a second recording extend the
710
+ first."""
711
+ if not fsms:
712
+ return Fsm(states=frozenset(), initial="", accepting=frozenset(), edges=())
713
+ states: set[str] = set()
714
+ accepting: set[str] = set()
715
+ seen_edges: set[tuple[str, str, str]] = set()
716
+ edges: list[FsmEdge] = []
717
+ for f in fsms:
718
+ states |= f.states
719
+ accepting |= f.accepting
720
+ for e in f.edges:
721
+ key = (e.src, e.dst, e.label)
722
+ if key not in seen_edges:
723
+ seen_edges.add(key)
724
+ edges.append(e)
725
+ return Fsm(
726
+ states=frozenset(states),
727
+ initial=fsms[0].initial,
728
+ accepting=frozenset(accepting),
729
+ edges=tuple(edges),
730
+ )
731
+
732
+
733
+ def fsm_from_shadow(
734
+ source: Any,
735
+ *,
736
+ base: Fsm | None = None,
737
+ initial: str = "shadow_start",
738
+ goal: str = "shadow_goal",
739
+ ) -> Fsm:
740
+ """Build/extend the empirical transition model from a Shadow recording (Part B).
741
+
742
+ ``source`` is taken as PLAIN input — no zu-shadow import (dependency-light):
743
+
744
+ * an already-emitted ``reachability.Fsm`` (the synthesizer's induced FSM,
745
+ ``SynthesisResult.fsm``) — consumed directly; or
746
+ * a sequence of shadow events (``data.shadow.user.*``) — folded via
747
+ ``fsm_from_shadow_events`` into the SAME shape; or
748
+ * an object exposing ``.events`` / ``.shadow_events()`` (a RecordedSession-
749
+ shaped duck) — its events are folded.
750
+
751
+ When ``base`` is given, the new model is MERGED into it (``merge_transition_
752
+ models``), so a SECOND recording GROWS the learned graph — the apprenticeship
753
+ premise. The result feeds the SAME ``plan`` / ``live_mpc_step`` search."""
754
+ if isinstance(source, Fsm):
755
+ induced = source
756
+ else:
757
+ events = _shadow_events_of(source)
758
+ induced = fsm_from_shadow_events(events, initial=initial, goal=goal)
759
+ if base is not None:
760
+ return merge_transition_models(base, induced)
761
+ return induced
762
+
763
+
764
+ def _shadow_events_of(source: Any) -> Sequence[Any]:
765
+ """Extract the shadow events from a plain input: a bare sequence, or a
766
+ RecordedSession-shaped object exposing ``shadow_events()`` / ``events``."""
767
+ shadow_events = getattr(source, "shadow_events", None)
768
+ if callable(shadow_events):
769
+ return list(shadow_events())
770
+ events = getattr(source, "events", None)
771
+ if events is not None:
772
+ return list(events)
773
+ if isinstance(source, Sequence):
774
+ return source
775
+ raise TypeError(
776
+ "fsm_from_shadow source must be an Fsm, a sequence of shadow events, or a "
777
+ "RecordedSession-shaped object (with .events / .shadow_events())"
778
+ )