website-agent-server 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2757 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import struct
5
+ import zlib
6
+ import json
7
+ import logging
8
+ import re
9
+ import secrets
10
+ import shutil
11
+ import time
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import Any
15
+ from urllib.parse import urlsplit
16
+ from uuid import uuid4
17
+
18
+ from fastapi import WebSocket
19
+ from starlette.websockets import WebSocketDisconnect
20
+
21
+ from playwright.async_api import (
22
+ Browser,
23
+ BrowserContext,
24
+ Download,
25
+ FileChooser,
26
+ Page,
27
+ Playwright,
28
+ Request as PlaywrightRequest,
29
+ Route,
30
+ TimeoutError as PlaywrightTimeoutError,
31
+ WebSocketRoute,
32
+ async_playwright,
33
+ )
34
+
35
+ from .config import Settings
36
+ from .url_policy import HostAccessPolicy, URLPolicyError
37
+
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ MOBILE_DEVICE_DESCRIPTOR_NAMES = ("Pixel 7", "Pixel 5", "Pixel 4", "Galaxy S9+")
43
+ MOBILE_FALLBACK_USER_AGENT = (
44
+ "Mozilla/5.0 (Linux; Android 14; Pixel 7) AppleWebKit/537.36 "
45
+ "(KHTML, like Gecko) Chrome/148.0.0.0 Mobile Safari/537.36"
46
+ )
47
+ MOBILE_CLIENT_HINT_HEADERS = {
48
+ "sec-ch-ua-mobile": "?1",
49
+ "sec-ch-ua-platform": '"Android"',
50
+ }
51
+ MOUSE_MOVE_MAX_DURATION_SECONDS = 0.1
52
+ MOUSE_MOVE_SPEED_PIXELS_PER_SECOND = 12000.0
53
+ MOUSE_MOVE_STEP_INTERVAL_SECONDS = 0.008
54
+ MOUSE_MOVE_STEP_DISTANCE = 42.0
55
+ MOUSE_MOVE_MAX_STEPS = 16
56
+ MAX_AUDIO_CHUNK_BASE64_LENGTH = 2_000_000
57
+ FORCE_RELOAD_NO_CACHE_SECONDS = 20.0
58
+ INTERACTION_FRAME_MESSAGE_TYPES = {
59
+ "mouse_down",
60
+ "mouse_up",
61
+ "tap",
62
+ "wheel",
63
+ "pinch",
64
+ "key",
65
+ "text",
66
+ "paste",
67
+ }
68
+
69
+ NATIVE_CARET_SUPPRESSION_SCRIPT = """() => {
70
+ const styleId = "__website_agent_native_caret_hidden";
71
+ const listenersFlag = "__websiteAgentNativeCaretListeners";
72
+ const editableSelector = "input, textarea, [contenteditable], [role='textbox']";
73
+ const styleText = `
74
+ html,
75
+ body,
76
+ input,
77
+ textarea,
78
+ [contenteditable],
79
+ [contenteditable] *,
80
+ [role="textbox"],
81
+ [role="textbox"] * {
82
+ caret-color: transparent !important;
83
+ }
84
+ `;
85
+
86
+ function forceTransparentCaret(element) {
87
+ if (!element || !element.style) {
88
+ return;
89
+ }
90
+ element.style.setProperty("caret-color", "transparent", "important");
91
+ }
92
+
93
+ function forceEditableCaret(element) {
94
+ if (!element || !element.closest) {
95
+ return;
96
+ }
97
+ const editable = element.closest(editableSelector);
98
+ if (!editable) {
99
+ return;
100
+ }
101
+ forceTransparentCaret(editable);
102
+ let current = element;
103
+ while (current && current !== editable && current.style) {
104
+ forceTransparentCaret(current);
105
+ current = current.parentElement;
106
+ }
107
+ }
108
+
109
+ function forceSelectionCaret() {
110
+ const selection = window.getSelection ? window.getSelection() : null;
111
+ if (!selection || !selection.anchorNode) {
112
+ return;
113
+ }
114
+ const anchor =
115
+ selection.anchorNode.nodeType === Node.ELEMENT_NODE
116
+ ? selection.anchorNode
117
+ : selection.anchorNode.parentElement;
118
+ forceEditableCaret(anchor);
119
+ }
120
+
121
+ function install() {
122
+ const root = document.documentElement;
123
+ if (!root) {
124
+ return;
125
+ }
126
+ root.style.setProperty("caret-color", "transparent", "important");
127
+ if (document.body) {
128
+ document.body.style.setProperty("caret-color", "transparent", "important");
129
+ }
130
+ forceEditableCaret(document.activeElement);
131
+ forceSelectionCaret();
132
+ if (document.getElementById(styleId)) {
133
+ return;
134
+ }
135
+ const style = document.createElement("style");
136
+ style.id = styleId;
137
+ style.textContent = styleText;
138
+ (document.head || root).appendChild(style);
139
+ }
140
+
141
+ install();
142
+ if (document.readyState === "loading") {
143
+ document.addEventListener("DOMContentLoaded", install, { once: true });
144
+ }
145
+ if (!window[listenersFlag]) {
146
+ window[listenersFlag] = true;
147
+ document.addEventListener("focusin", (event) => forceEditableCaret(event.target), true);
148
+ document.addEventListener("selectionchange", forceSelectionCaret, true);
149
+ }
150
+ }"""
151
+ NATIVE_CARET_SUPPRESSION_INIT_SCRIPT = f"({NATIVE_CARET_SUPPRESSION_SCRIPT})()"
152
+
153
+ AUDIO_CAPTURE_SCRIPT = """() => {
154
+ if (window.__websiteAgentAudioCaptureInstalled) {
155
+ return;
156
+ }
157
+ window.__websiteAgentAudioCaptureInstalled = true;
158
+ if (
159
+ typeof window.__websiteAgentAudioBridge !== "function" ||
160
+ typeof MediaRecorder === "undefined" ||
161
+ typeof MediaStream === "undefined"
162
+ ) {
163
+ return;
164
+ }
165
+ const mimeType = [
166
+ "audio/webm;codecs=opus",
167
+ "audio/webm",
168
+ ].find((candidate) => MediaRecorder.isTypeSupported(candidate));
169
+ if (!mimeType) {
170
+ return;
171
+ }
172
+
173
+ const records = new WeakMap();
174
+ let nextStreamId = 1;
175
+ let audioContextPatched = false;
176
+
177
+ function post(message) {
178
+ try {
179
+ window.__websiteAgentAudioBridge(message);
180
+ } catch {
181
+ // The local bridge may have gone away during navigation.
182
+ }
183
+ }
184
+
185
+ function bufferToBase64(buffer) {
186
+ const bytes = new Uint8Array(buffer);
187
+ let binary = "";
188
+ const size = 0x8000;
189
+ for (let offset = 0; offset < bytes.length; offset += size) {
190
+ const chunk = bytes.subarray(offset, offset + size);
191
+ binary += String.fromCharCode(...chunk);
192
+ }
193
+ return btoa(binary);
194
+ }
195
+
196
+ function eligible(element) {
197
+ return (
198
+ element instanceof HTMLMediaElement &&
199
+ !element.paused &&
200
+ !element.ended &&
201
+ !element.muted &&
202
+ element.volume > 0
203
+ );
204
+ }
205
+
206
+ function ensureRecord(element, streamFactory) {
207
+ if (!(element instanceof HTMLMediaElement)) {
208
+ return null;
209
+ }
210
+ let record = records.get(element);
211
+ if (record) {
212
+ return record;
213
+ }
214
+ let stream;
215
+ try {
216
+ stream = streamFactory(element);
217
+ } catch {
218
+ return null;
219
+ }
220
+ if (!stream || typeof stream.getAudioTracks !== "function") {
221
+ return null;
222
+ }
223
+ record = {
224
+ stream,
225
+ recorder: null,
226
+ streamId: `media-${Date.now()}-${nextStreamId++}`,
227
+ watched: false,
228
+ };
229
+ records.set(element, record);
230
+ stream.addEventListener("addtrack", () => {
231
+ window.setTimeout(() => startRecording(element), 0);
232
+ });
233
+ return record;
234
+ }
235
+
236
+ function stopRecording(element) {
237
+ const record = records.get(element);
238
+ if (!record || !record.recorder) {
239
+ return;
240
+ }
241
+ const recorder = record.recorder;
242
+ record.recorder = null;
243
+ try {
244
+ if (recorder.state !== "inactive") {
245
+ recorder.stop();
246
+ }
247
+ } catch {
248
+ post({ kind: "stop", streamId: record.streamId });
249
+ }
250
+ }
251
+
252
+ function startRecording(element) {
253
+ if (!eligible(element)) {
254
+ stopRecording(element);
255
+ return;
256
+ }
257
+ let record = records.get(element);
258
+ if (!record) {
259
+ if (typeof element.captureStream !== "function") {
260
+ post({ kind: "debug", reason: "capture-stream-unavailable" });
261
+ return;
262
+ }
263
+ record = ensureRecord(element, (mediaElement) => mediaElement.captureStream());
264
+ }
265
+ if (!record || (record.recorder && record.recorder.state !== "inactive")) {
266
+ return;
267
+ }
268
+ const tracks = record.stream
269
+ .getAudioTracks()
270
+ .filter((track) => track.readyState === "live");
271
+ if (tracks.length === 0) {
272
+ post({ kind: "debug", streamId: record.streamId, reason: "no-live-audio-tracks" });
273
+ return;
274
+ }
275
+ let recorder;
276
+ try {
277
+ recorder = new MediaRecorder(new MediaStream(tracks), { mimeType });
278
+ } catch {
279
+ return;
280
+ }
281
+ record.recorder = recorder;
282
+ recorder.addEventListener("start", () => {
283
+ post({ kind: "start", streamId: record.streamId, mime: mimeType });
284
+ });
285
+ recorder.addEventListener("stop", () => {
286
+ post({ kind: "stop", streamId: record.streamId });
287
+ });
288
+ recorder.addEventListener("dataavailable", async (event) => {
289
+ if (!event.data || event.data.size <= 0) {
290
+ return;
291
+ }
292
+ try {
293
+ const buffer = await event.data.arrayBuffer();
294
+ post({
295
+ kind: "chunk",
296
+ streamId: record.streamId,
297
+ mime: recorder.mimeType || mimeType,
298
+ data: bufferToBase64(buffer),
299
+ });
300
+ } catch {
301
+ // Dropping one audio chunk is better than breaking page playback.
302
+ }
303
+ });
304
+ try {
305
+ recorder.start(500);
306
+ } catch {
307
+ post({ kind: "debug", streamId: record.streamId, reason: "recorder-start-failed" });
308
+ record.recorder = null;
309
+ }
310
+ }
311
+
312
+ function watchMediaElement(element) {
313
+ if (!(element instanceof HTMLMediaElement)) {
314
+ return;
315
+ }
316
+ if (typeof element.captureStream !== "function") {
317
+ post({ kind: "debug", reason: "capture-stream-unavailable" });
318
+ return;
319
+ }
320
+ const record = ensureRecord(element, (mediaElement) => mediaElement.captureStream());
321
+ if (!record || record.watched) {
322
+ return;
323
+ }
324
+ record.watched = true;
325
+ element.addEventListener("play", () => startRecording(element), true);
326
+ element.addEventListener("playing", () => startRecording(element), true);
327
+ element.addEventListener("canplay", () => startRecording(element), true);
328
+ element.addEventListener("volumechange", () => startRecording(element), true);
329
+ element.addEventListener("pause", () => stopRecording(element), true);
330
+ element.addEventListener("ended", () => stopRecording(element), true);
331
+ element.addEventListener("emptied", () => stopRecording(element), true);
332
+ startRecording(element);
333
+ }
334
+
335
+ function startRecordingFromStream(element, stream) {
336
+ if (!(element instanceof HTMLMediaElement) || !stream) {
337
+ return;
338
+ }
339
+ let record = records.get(element);
340
+ if (!record) {
341
+ record = {
342
+ stream,
343
+ recorder: null,
344
+ streamId: `media-${Date.now()}-${nextStreamId++}`,
345
+ watched: true,
346
+ };
347
+ records.set(element, record);
348
+ } else if (!record.stream || record.stream.getAudioTracks().length === 0) {
349
+ record.stream = stream;
350
+ }
351
+ stream.addEventListener("addtrack", () => {
352
+ window.setTimeout(() => startRecording(element), 0);
353
+ });
354
+ window.setTimeout(() => startRecording(element), 0);
355
+ }
356
+
357
+ function patchAudioContextClass(ContextClass) {
358
+ if (!ContextClass || ContextClass.prototype.__websiteAgentAudioPatched) {
359
+ return;
360
+ }
361
+ Object.defineProperty(ContextClass.prototype, "__websiteAgentAudioPatched", {
362
+ value: true,
363
+ configurable: true,
364
+ });
365
+ const original = ContextClass.prototype.createMediaElementSource;
366
+ if (typeof original !== "function") {
367
+ return;
368
+ }
369
+ ContextClass.prototype.createMediaElementSource = function (element) {
370
+ const source = original.call(this, element);
371
+ try {
372
+ const destination = this.createMediaStreamDestination();
373
+ source.connect(destination);
374
+ startRecordingFromStream(element, destination.stream);
375
+ } catch {
376
+ // Fall back to captureStream.
377
+ }
378
+ return source;
379
+ };
380
+ }
381
+
382
+ function patchAudioContext() {
383
+ if (audioContextPatched) {
384
+ return;
385
+ }
386
+ audioContextPatched = true;
387
+ patchAudioContextClass(window.AudioContext);
388
+ patchAudioContextClass(window.webkitAudioContext);
389
+ }
390
+
391
+ const originalPlay = HTMLMediaElement.prototype.play;
392
+ HTMLMediaElement.prototype.play = function (...args) {
393
+ patchAudioContext();
394
+ watchMediaElement(this);
395
+ const result = originalPlay.apply(this, args);
396
+ if (result && typeof result.then === "function") {
397
+ result.then(() => startRecording(this)).catch(() => {});
398
+ } else {
399
+ window.setTimeout(() => startRecording(this), 0);
400
+ }
401
+ return result;
402
+ };
403
+
404
+ function scan(root) {
405
+ if (!root || !root.querySelectorAll) {
406
+ return;
407
+ }
408
+ if (root instanceof HTMLMediaElement) {
409
+ watchMediaElement(root);
410
+ }
411
+ root.querySelectorAll("audio, video").forEach(watchMediaElement);
412
+ }
413
+
414
+ patchAudioContext();
415
+ scan(document);
416
+ new MutationObserver((mutations) => {
417
+ for (const mutation of mutations) {
418
+ for (const node of mutation.addedNodes) {
419
+ if (node instanceof Element) {
420
+ scan(node);
421
+ }
422
+ }
423
+ }
424
+ }).observe(document.documentElement, { childList: true, subtree: true });
425
+ }"""
426
+ AUDIO_CAPTURE_INIT_SCRIPT = f"({AUDIO_CAPTURE_SCRIPT})()"
427
+
428
+ EDITABLE_METRICS_SCRIPT = """() => {
429
+ const textTypes = new Set([
430
+ "email",
431
+ "number",
432
+ "password",
433
+ "search",
434
+ "tel",
435
+ "text",
436
+ "url",
437
+ ]);
438
+
439
+ function clamp(value, minimum, maximum) {
440
+ return Math.max(minimum, Math.min(maximum, value));
441
+ }
442
+
443
+ function numeric(value) {
444
+ const parsed = Number.parseFloat(value);
445
+ return Number.isFinite(parsed) ? parsed : 0;
446
+ }
447
+
448
+ function documentSize() {
449
+ return {
450
+ width: Math.max(
451
+ document.documentElement.scrollWidth,
452
+ document.body ? document.body.scrollWidth : 0,
453
+ window.innerWidth
454
+ ),
455
+ height: Math.max(
456
+ document.documentElement.scrollHeight,
457
+ document.body ? document.body.scrollHeight : 0,
458
+ window.innerHeight
459
+ ),
460
+ };
461
+ }
462
+
463
+ function findEditable() {
464
+ const active = document.activeElement;
465
+ if (!active || active === document.body || active === document.documentElement) {
466
+ return null;
467
+ }
468
+ const editable = active.closest(
469
+ "input, textarea, [contenteditable], [role='textbox']"
470
+ );
471
+ if (!editable) {
472
+ return null;
473
+ }
474
+ if (editable.matches("input")) {
475
+ const type = (editable.getAttribute("type") || "text").toLowerCase();
476
+ if (!textTypes.has(type) || editable.disabled || editable.readOnly) {
477
+ return null;
478
+ }
479
+ }
480
+ if (editable.matches("textarea") && (editable.disabled || editable.readOnly)) {
481
+ return null;
482
+ }
483
+ if (
484
+ editable.matches("[contenteditable]") &&
485
+ editable.getAttribute("contenteditable") === "false"
486
+ ) {
487
+ return null;
488
+ }
489
+ return editable;
490
+ }
491
+
492
+ function copyTextStyles(source, target) {
493
+ const style = window.getComputedStyle(source);
494
+ const properties = [
495
+ "boxSizing",
496
+ "borderTopWidth",
497
+ "borderRightWidth",
498
+ "borderBottomWidth",
499
+ "borderLeftWidth",
500
+ "borderStyle",
501
+ "direction",
502
+ "fontFamily",
503
+ "fontFeatureSettings",
504
+ "fontKerning",
505
+ "fontSize",
506
+ "fontStretch",
507
+ "fontStyle",
508
+ "fontVariant",
509
+ "fontWeight",
510
+ "letterSpacing",
511
+ "lineHeight",
512
+ "paddingTop",
513
+ "paddingRight",
514
+ "paddingBottom",
515
+ "paddingLeft",
516
+ "tabSize",
517
+ "textAlign",
518
+ "textDecoration",
519
+ "textIndent",
520
+ "textTransform",
521
+ "wordBreak",
522
+ "wordSpacing",
523
+ ];
524
+ for (const property of properties) {
525
+ target.style[property] = style[property];
526
+ }
527
+ return style;
528
+ }
529
+
530
+ function caretResult(x, y, height, fontSize, rect) {
531
+ const caretHeight = Math.max(8, Math.min(rect.height, height || fontSize || rect.height));
532
+ const caretWidth = Math.max(1, Math.min(3, (fontSize || caretHeight) * 0.12));
533
+ const centerY = clamp(y, rect.top, rect.bottom);
534
+ const top = clamp(centerY - caretHeight / 2, rect.top, Math.max(rect.top, rect.bottom - caretHeight));
535
+ const centerX = clamp(x, rect.left, rect.right);
536
+ return {
537
+ focusLeft: centerX,
538
+ focusTop: top + caretHeight / 2,
539
+ caretX: centerX,
540
+ caretY: top,
541
+ caretWidth,
542
+ caretHeight,
543
+ };
544
+ }
545
+
546
+ function textControlFocusPoint(editable, rect) {
547
+ if (typeof editable.selectionStart !== "number") {
548
+ return null;
549
+ }
550
+ const value = editable.value || "";
551
+ const selectionStart = editable.selectionStart ?? value.length;
552
+ const selectionEnd = editable.selectionEnd ?? selectionStart;
553
+ const caret = clamp(Math.max(selectionStart, selectionEnd), 0, value.length);
554
+ const inputType = (editable.getAttribute("type") || "text").toLowerCase();
555
+ const mirror = document.createElement("div");
556
+ const style = copyTextStyles(editable, mirror);
557
+ mirror.style.position = "absolute";
558
+ mirror.style.visibility = "hidden";
559
+ mirror.style.pointerEvents = "none";
560
+ mirror.style.left = "-10000px";
561
+ mirror.style.top = "0";
562
+ mirror.style.overflow = "hidden";
563
+ mirror.style.whiteSpace = editable.tagName === "TEXTAREA" ? "pre-wrap" : "pre";
564
+ mirror.style.overflowWrap =
565
+ editable.tagName === "TEXTAREA" ? "break-word" : "normal";
566
+ mirror.style.width = editable.tagName === "TEXTAREA" ? `${rect.width}px` : "auto";
567
+ mirror.style.minWidth = `${rect.width}px`;
568
+ mirror.style.height = "auto";
569
+
570
+ let before = value.slice(0, caret);
571
+ if (inputType === "password") {
572
+ before = "x".repeat(before.length);
573
+ }
574
+ if (editable.tagName === "TEXTAREA" && before.endsWith("\\n")) {
575
+ before += " ";
576
+ }
577
+ mirror.textContent = before;
578
+ const marker = document.createElement("span");
579
+ marker.textContent = "\\u200b";
580
+ mirror.appendChild(marker);
581
+ document.body.appendChild(mirror);
582
+
583
+ const markerRect = marker.getBoundingClientRect();
584
+ const mirrorRect = mirror.getBoundingClientRect();
585
+ const fontSize = numeric(style.fontSize) || rect.height;
586
+ const lineHeight = numeric(style.lineHeight) || markerRect.height || fontSize;
587
+ let focusLeft = rect.left + markerRect.left - mirrorRect.left - editable.scrollLeft;
588
+ let focusTop;
589
+ if (editable.tagName === "TEXTAREA") {
590
+ focusTop =
591
+ rect.top +
592
+ markerRect.top -
593
+ mirrorRect.top -
594
+ editable.scrollTop +
595
+ lineHeight / 2;
596
+ } else {
597
+ focusTop = rect.top + rect.height / 2;
598
+ }
599
+ mirror.remove();
600
+
601
+ const leftInset = numeric(style.borderLeftWidth) + numeric(style.paddingLeft);
602
+ const rightInset = numeric(style.borderRightWidth) + numeric(style.paddingRight);
603
+ const minLeft = rect.left + Math.max(0, Math.min(rect.width / 2, leftInset));
604
+ const maxLeft = rect.right - Math.max(0, Math.min(rect.width / 2, rightInset));
605
+ const safeLeft = clamp(focusLeft, minLeft, Math.max(minLeft, maxLeft));
606
+ return caretResult(safeLeft, focusTop, lineHeight, fontSize, rect);
607
+ }
608
+
609
+ function selectionFocusPoint(editable, rect) {
610
+ const selection = window.getSelection();
611
+ if (!selection || selection.rangeCount < 1) {
612
+ return null;
613
+ }
614
+ const selectedRange = selection.getRangeAt(0);
615
+ if (!editable.contains(selectedRange.endContainer)) {
616
+ return null;
617
+ }
618
+ const range = selectedRange.cloneRange();
619
+ range.collapse(false);
620
+ let caretRect = null;
621
+ const rects = range.getClientRects();
622
+ if (rects.length > 0) {
623
+ caretRect = rects[rects.length - 1];
624
+ } else {
625
+ const restoreRange = selectedRange.cloneRange();
626
+ const marker = document.createElement("span");
627
+ marker.textContent = "\\u200b";
628
+ marker.style.display = "inline-block";
629
+ marker.style.width = "1px";
630
+ marker.style.height = "1em";
631
+ range.insertNode(marker);
632
+ caretRect = marker.getBoundingClientRect();
633
+ marker.remove();
634
+ selection.removeAllRanges();
635
+ selection.addRange(restoreRange);
636
+ }
637
+ if (!caretRect) {
638
+ return null;
639
+ }
640
+ return caretResult(
641
+ caretRect.left + caretRect.width / 2,
642
+ caretRect.top + caretRect.height / 2,
643
+ caretRect.height,
644
+ caretRect.height,
645
+ rect
646
+ );
647
+ }
648
+
649
+ const active = document.activeElement;
650
+ const editable = findEditable();
651
+ if (!editable) {
652
+ return null;
653
+ }
654
+ const rect = editable.getBoundingClientRect();
655
+ const size = documentSize();
656
+ const editableStyle = window.getComputedStyle(editable);
657
+ const caret =
658
+ active.matches("input, textarea")
659
+ ? textControlFocusPoint(active, rect)
660
+ : selectionFocusPoint(editable, rect);
661
+ const fallbackHeight = Math.max(8, Math.min(rect.height, 18));
662
+ const fallback = caretResult(
663
+ rect.left + rect.width / 2,
664
+ rect.top + Math.min(rect.height / 2, 32),
665
+ fallbackHeight,
666
+ fallbackHeight,
667
+ rect
668
+ );
669
+ const focus = caret || fallback;
670
+ const visualViewport = window.visualViewport;
671
+ return {
672
+ left: rect.left,
673
+ top: rect.top,
674
+ width: rect.width,
675
+ height: rect.height,
676
+ editableTag: editable.tagName,
677
+ editableType: active.matches("input") ? (active.getAttribute("type") || "text").toLowerCase() : "",
678
+ selectionStart: typeof active.selectionStart === "number" ? active.selectionStart : null,
679
+ selectionEnd: typeof active.selectionEnd === "number" ? active.selectionEnd : null,
680
+ valueLength: typeof active.value === "string" ? active.value.length : null,
681
+ borderLeft: numeric(editableStyle.borderLeftWidth),
682
+ borderTop: numeric(editableStyle.borderTopWidth),
683
+ borderRight: numeric(editableStyle.borderRightWidth),
684
+ borderBottom: numeric(editableStyle.borderBottomWidth),
685
+ paddingLeft: numeric(editableStyle.paddingLeft),
686
+ paddingTop: numeric(editableStyle.paddingTop),
687
+ paddingRight: numeric(editableStyle.paddingRight),
688
+ paddingBottom: numeric(editableStyle.paddingBottom),
689
+ focusLeft: focus.focusLeft,
690
+ focusTop: focus.focusTop,
691
+ caretX: focus.caretX,
692
+ caretY: focus.caretY,
693
+ caretWidth: focus.caretWidth,
694
+ caretHeight: focus.caretHeight,
695
+ scrollX: window.scrollX,
696
+ scrollY: window.scrollY,
697
+ innerWidth: window.innerWidth,
698
+ innerHeight: window.innerHeight,
699
+ documentWidth: size.width,
700
+ documentHeight: size.height,
701
+ visualScale: visualViewport ? visualViewport.scale : 1,
702
+ visualOffsetLeft: visualViewport ? visualViewport.offsetLeft : 0,
703
+ visualOffsetTop: visualViewport ? visualViewport.offsetTop : 0,
704
+ };
705
+ }"""
706
+
707
+
708
+ def _safe_filename(filename: str) -> str:
709
+ cleaned = re.sub(r"[^A-Za-z0-9._ -]+", "_", filename).strip(" .")
710
+ return cleaned or "download"
711
+
712
+
713
+ def _clamp(value: int, minimum: int, maximum: int) -> int:
714
+ return max(minimum, min(maximum, value))
715
+
716
+
717
+ def _float_value(value: Any, default: float | None = 0.0) -> float | None:
718
+ try:
719
+ return float(value)
720
+ except (TypeError, ValueError):
721
+ return default
722
+
723
+
724
+ def _visual_viewport_offset(
725
+ viewport: dict[str, Any], offset_key: str, page_key: str, scroll_key: str
726
+ ) -> float:
727
+ reported_offset = _float_value(viewport.get(offset_key), 0.0) or 0.0
728
+ page_offset = _float_value(viewport.get(page_key), None)
729
+ scroll_offset = _float_value(viewport.get(scroll_key), None)
730
+ if page_offset is not None and scroll_offset is not None:
731
+ return max(0.0, page_offset - scroll_offset)
732
+ return max(0.0, reported_offset)
733
+
734
+
735
+ def _png_rgba_rows(data: bytes) -> tuple[int, int, list[bytes]] | None:
736
+ if data[:8] != b"\x89PNG\r\n\x1a\n":
737
+ return None
738
+ position = 8
739
+ width = 0
740
+ height = 0
741
+ bit_depth = 0
742
+ color_type = 0
743
+ idat = bytearray()
744
+ try:
745
+ while position + 8 <= len(data):
746
+ length = struct.unpack(">I", data[position : position + 4])[0]
747
+ kind = data[position + 4 : position + 8]
748
+ chunk = data[position + 8 : position + 8 + length]
749
+ position += 12 + length
750
+ if kind == b"IHDR":
751
+ width = struct.unpack(">I", chunk[:4])[0]
752
+ height = struct.unpack(">I", chunk[4:8])[0]
753
+ bit_depth = chunk[8]
754
+ color_type = chunk[9]
755
+ elif kind == b"IDAT":
756
+ idat.extend(chunk)
757
+ elif kind == b"IEND":
758
+ break
759
+ if bit_depth != 8 or color_type not in {2, 6} or width <= 0 or height <= 0:
760
+ return None
761
+ channels = 4 if color_type == 6 else 3
762
+ stride = width * channels
763
+ raw = zlib.decompress(bytes(idat))
764
+ rows: list[bytes] = []
765
+ previous = bytearray(stride)
766
+ index = 0
767
+ for _ in range(height):
768
+ filter_type = raw[index]
769
+ index += 1
770
+ scanline = bytearray(raw[index : index + stride])
771
+ index += stride
772
+ reconstructed = bytearray(stride)
773
+ for i, value in enumerate(scanline):
774
+ left = reconstructed[i - channels] if i >= channels else 0
775
+ up = previous[i]
776
+ up_left = previous[i - channels] if i >= channels else 0
777
+ if filter_type == 0:
778
+ predictor = 0
779
+ elif filter_type == 1:
780
+ predictor = left
781
+ elif filter_type == 2:
782
+ predictor = up
783
+ elif filter_type == 3:
784
+ predictor = (left + up) // 2
785
+ elif filter_type == 4:
786
+ p = left + up - up_left
787
+ pa = abs(p - left)
788
+ pb = abs(p - up)
789
+ pc = abs(p - up_left)
790
+ predictor = left if pa <= pb and pa <= pc else up if pb <= pc else up_left
791
+ else:
792
+ return None
793
+ reconstructed[i] = (value + predictor) & 0xFF
794
+ if channels == 3:
795
+ rgba = bytearray(width * 4)
796
+ for x in range(width):
797
+ rgba[x * 4 : x * 4 + 3] = reconstructed[x * 3 : x * 3 + 3]
798
+ rgba[x * 4 + 3] = 255
799
+ rows.append(bytes(rgba))
800
+ else:
801
+ rows.append(bytes(reconstructed))
802
+ previous = reconstructed
803
+ return width, height, rows
804
+ except Exception:
805
+ return None
806
+
807
+
808
+ @dataclass(frozen=True)
809
+ class DownloadRecord:
810
+ filename: str
811
+ path: Path
812
+
813
+
814
+ @dataclass
815
+ class ClientContextEntry:
816
+ context: BrowserContext
817
+ last_used: float
818
+
819
+
820
+ @dataclass
821
+ class NavigationEntry:
822
+ page: Page
823
+ url: str
824
+
825
+
826
+ def _cookie_identity(cookie: dict[str, Any]) -> tuple[str, str, str]:
827
+ return (
828
+ str(cookie.get("name") or ""),
829
+ str(cookie.get("domain") or ""),
830
+ str(cookie.get("path") or "/"),
831
+ )
832
+
833
+
834
+ class BrowserSession:
835
+ def __init__(
836
+ self,
837
+ manager: "BrowserManager",
838
+ session_id: str,
839
+ client_uuid: str,
840
+ lock_url: str | None,
841
+ ) -> None:
842
+ self.manager = manager
843
+ self.id = session_id
844
+ self.client_uuid = client_uuid
845
+ self.settings = manager.settings
846
+ self.lock_url = lock_url
847
+ self.policy = HostAccessPolicy(self.settings.allow_private_hosts)
848
+ self.context: BrowserContext | None = None
849
+ self.page: Page | None = None
850
+ self.viewport_width = 1280
851
+ self.viewport_height = 720
852
+ self.last_activity = time.monotonic()
853
+ self.downloads: dict[str, DownloadRecord] = {}
854
+ self.file_choosers: dict[str, FileChooser] = {}
855
+ self._history: list[NavigationEntry] = []
856
+ self._history_index = -1
857
+ self._history_replaying = False
858
+ self._is_mobile = False
859
+ self._device_scale_factor = 1.0
860
+ self._page_scale_factor = 1.0
861
+ self._mobile_focus_zoom = 1.0
862
+ self._mobile_focus_clip: dict[str, float] | None = None
863
+ self._calibrate_caret_until = 0.0
864
+ self._mouse_position: tuple[float, float] = (0.0, 0.0)
865
+ self._pending_document_navigation_url = ""
866
+ self._force_reload_until = 0.0
867
+ self._media_state_checked_at = 0.0
868
+ self._media_playing = False
869
+ self._action_lock = asyncio.Lock()
870
+ self._frame_capture_lock = asyncio.Lock()
871
+ self._requested_frame_task: asyncio.Task[None] | None = None
872
+ self._outgoing: asyncio.Queue[dict[str, Any]] | None = None
873
+ self._audio_outgoing: asyncio.Queue[dict[str, Any]] | None = None
874
+ self._websocket: WebSocket | None = None
875
+ self._audio_websocket: WebSocket | None = None
876
+ self._closed = False
877
+ self._connected = False
878
+ self.disconnected_at = time.monotonic()
879
+
880
+ @property
881
+ def is_locked(self) -> bool:
882
+ return self.lock_url is not None
883
+
884
+ async def start(
885
+ self,
886
+ raw_url: str,
887
+ width: int,
888
+ height: int,
889
+ is_mobile: bool,
890
+ device_scale_factor: float,
891
+ ) -> None:
892
+ self._is_mobile = is_mobile
893
+ self._device_scale_factor = max(0.5, min(4.0, device_scale_factor))
894
+ initial_url = await self.policy.ensure_navigation_url_allowed(
895
+ raw_url,
896
+ verify_https=not self.settings.ignore_https_errors,
897
+ )
898
+ self.viewport_width = _clamp(
899
+ width, self.settings.min_viewport_width, self.settings.max_viewport_width
900
+ )
901
+ self.viewport_height = _clamp(
902
+ height, self.settings.min_viewport_height, self.settings.max_viewport_height
903
+ )
904
+ context_options: dict[str, Any] = {
905
+ "accept_downloads": True,
906
+ "ignore_https_errors": self.settings.ignore_https_errors,
907
+ "viewport": {"width": self.viewport_width, "height": self.viewport_height},
908
+ "device_scale_factor": self._device_scale_factor,
909
+ }
910
+ if self._is_mobile:
911
+ context_options.update(
912
+ self.manager.mobile_context_options(self.viewport_width, self.viewport_height)
913
+ )
914
+ context = await self.manager.acquire_context(self.client_uuid, context_options)
915
+ self.context = context
916
+ await self.context.grant_permissions(["clipboard-read", "clipboard-write"])
917
+ page = await self.context.new_page()
918
+ await self._prepare_page(page)
919
+ await self.navigate(initial_url)
920
+
921
+ async def close(self) -> None:
922
+ self._closed = True
923
+ context = self.context
924
+ page = self.page
925
+ websocket = self._websocket
926
+ audio_websocket = self._audio_websocket
927
+ self.context = None
928
+ self.page = None
929
+ self._websocket = None
930
+ self._audio_websocket = None
931
+ self._outgoing = None
932
+ self._audio_outgoing = None
933
+ self._connected = False
934
+ requested_frame_task = self._requested_frame_task
935
+ self._requested_frame_task = None
936
+ if requested_frame_task is not None:
937
+ requested_frame_task.cancel()
938
+ if websocket is not None:
939
+ await _ignore_shutdown_disconnect(websocket.close(code=1001))
940
+ if audio_websocket is not None:
941
+ await _ignore_shutdown_disconnect(audio_websocket.close(code=1001))
942
+ pages_to_close = self._session_pages(page)
943
+ for session_page in pages_to_close:
944
+ await _ignore_shutdown_disconnect(session_page.close())
945
+ if context is not None:
946
+ self.manager.touch_context(self.client_uuid)
947
+ self._history.clear()
948
+ self._history_index = -1
949
+ self._history_replaying = False
950
+
951
+ async def connect(self, websocket: WebSocket) -> None:
952
+ await websocket.accept()
953
+ outgoing: asyncio.Queue[dict[str, Any]] = asyncio.Queue(maxsize=10)
954
+ previous_websocket = self._websocket
955
+ if previous_websocket is not None:
956
+ await _ignore_shutdown_disconnect(
957
+ previous_websocket.close(code=1001, reason="Reconnected")
958
+ )
959
+ self.last_activity = time.monotonic()
960
+ self.disconnected_at = 0.0
961
+ self._connected = True
962
+ self._outgoing = outgoing
963
+ self._websocket = websocket
964
+ await self._queue_message(self._status_message("connected"))
965
+
966
+ tasks = {
967
+ asyncio.create_task(self._send_loop(websocket, outgoing)),
968
+ asyncio.create_task(self._receive_loop(websocket)),
969
+ asyncio.create_task(self._frame_loop()),
970
+ }
971
+ try:
972
+ done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)
973
+ for task in done:
974
+ exc = task.exception()
975
+ if exc and not isinstance(exc, WebSocketDisconnect) and not _is_websocket_disconnect(exc):
976
+ raise exc
977
+ for task in pending:
978
+ task.cancel()
979
+ except WebSocketDisconnect:
980
+ pass
981
+ finally:
982
+ for task in tasks:
983
+ task.cancel()
984
+ if self._outgoing is outgoing:
985
+ self._outgoing = None
986
+ if self._websocket is websocket:
987
+ self._websocket = None
988
+ self._connected = False
989
+ self.disconnected_at = time.monotonic()
990
+
991
+ @property
992
+ def is_connected(self) -> bool:
993
+ return self._connected
994
+
995
+ async def connect_audio(self, websocket: WebSocket) -> None:
996
+ await websocket.accept()
997
+ outgoing: asyncio.Queue[dict[str, Any]] = asyncio.Queue(maxsize=48)
998
+ previous_websocket = self._audio_websocket
999
+ if previous_websocket is not None:
1000
+ await _ignore_shutdown_disconnect(
1001
+ previous_websocket.close(code=1001, reason="Reconnected")
1002
+ )
1003
+ self.last_activity = time.monotonic()
1004
+ self._audio_outgoing = outgoing
1005
+ self._audio_websocket = websocket
1006
+ send_task = asyncio.create_task(self._send_loop(websocket, outgoing))
1007
+ receive_task = asyncio.create_task(self._audio_receive_loop(websocket))
1008
+ tasks = {send_task, receive_task}
1009
+ try:
1010
+ done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)
1011
+ for task in done:
1012
+ exc = task.exception()
1013
+ if exc and not isinstance(exc, WebSocketDisconnect) and not _is_websocket_disconnect(exc):
1014
+ raise exc
1015
+ for task in pending:
1016
+ task.cancel()
1017
+ except WebSocketDisconnect:
1018
+ pass
1019
+ finally:
1020
+ for task in tasks:
1021
+ task.cancel()
1022
+ if self._audio_outgoing is outgoing:
1023
+ self._audio_outgoing = None
1024
+ if self._audio_websocket is websocket:
1025
+ self._audio_websocket = None
1026
+
1027
+ async def navigate(self, raw_url: str) -> None:
1028
+ page = self._require_page()
1029
+ url = await self.policy.ensure_navigation_url_allowed(
1030
+ raw_url,
1031
+ verify_https=not self.settings.ignore_https_errors,
1032
+ )
1033
+ self._clear_mobile_focus_zoom()
1034
+ self.last_activity = time.monotonic()
1035
+ await self._queue_message({"type": "status", "state": "loading", "url": url})
1036
+ async with self._action_lock:
1037
+ try:
1038
+ await page.goto(
1039
+ url,
1040
+ wait_until="domcontentloaded",
1041
+ timeout=self.settings.navigation_timeout_ms,
1042
+ )
1043
+ except PlaywrightTimeoutError:
1044
+ await self._queue_message(
1045
+ {
1046
+ "type": "warning",
1047
+ "message": "Navigation timed out; showing the current browser state.",
1048
+ }
1049
+ )
1050
+ await self._queue_message(self._status_message("ready"))
1051
+
1052
+ async def upload_file_chooser(
1053
+ self, token: str, files: list[Path] | list[dict[str, object]]
1054
+ ) -> None:
1055
+ chooser = self.file_choosers.pop(token, None)
1056
+ if chooser is None:
1057
+ raise KeyError("File chooser is no longer available.")
1058
+ if files and isinstance(files[0], Path):
1059
+ await chooser.set_files([str(path) for path in files])
1060
+ else:
1061
+ await chooser.set_files(files)
1062
+ await self._queue_message({"type": "status", "state": "files-selected"})
1063
+
1064
+ def get_download(self, token: str) -> DownloadRecord | None:
1065
+ return self.downloads.get(token)
1066
+
1067
+ async def list_cookies(self) -> list[dict[str, Any]]:
1068
+ context = self._require_context()
1069
+ page = self._require_page()
1070
+ self.last_activity = time.monotonic()
1071
+ async with self._action_lock:
1072
+ return await context.cookies([page.url])
1073
+
1074
+ async def replace_cookies(self, cookies: list[dict[str, Any]]) -> list[dict[str, Any]]:
1075
+ context = self._require_context()
1076
+ page = self._require_page()
1077
+ self.last_activity = time.monotonic()
1078
+ async with self._action_lock:
1079
+ page_url = page.url
1080
+ existing = await context.cookies([page_url])
1081
+ normalized = [self._normalize_cookie(cookie, page_url) for cookie in cookies]
1082
+ if normalized:
1083
+ await context.add_cookies(normalized)
1084
+ next_keys = {_cookie_identity(cookie) for cookie in normalized}
1085
+ for cookie in existing:
1086
+ if _cookie_identity(cookie) not in next_keys:
1087
+ await context.clear_cookies(
1088
+ name=str(cookie.get("name") or ""),
1089
+ domain=str(cookie.get("domain") or ""),
1090
+ path=str(cookie.get("path") or "/"),
1091
+ )
1092
+ return await context.cookies([page_url])
1093
+
1094
+ async def handle_message(self, payload: dict[str, Any]) -> None:
1095
+ self.last_activity = time.monotonic()
1096
+ message_type = payload.get("type")
1097
+ if message_type == "resize":
1098
+ await self._resize(int(payload.get("width", self.viewport_width)), int(payload.get("height", self.viewport_height)))
1099
+ elif message_type == "navigate":
1100
+ if self.is_locked:
1101
+ await self._queue_message(
1102
+ {"type": "warning", "message": "Options are locked by the server."}
1103
+ )
1104
+ return
1105
+ await self.navigate(str(payload.get("url", "")))
1106
+ elif message_type == "reload":
1107
+ await self._reload()
1108
+ elif message_type == "back":
1109
+ if self.is_locked:
1110
+ await self._queue_message(
1111
+ {"type": "warning", "message": "Options are locked by the server."}
1112
+ )
1113
+ return
1114
+ await self._go_back()
1115
+ elif message_type == "forward":
1116
+ if self.is_locked:
1117
+ await self._queue_message(
1118
+ {"type": "warning", "message": "Options are locked by the server."}
1119
+ )
1120
+ return
1121
+ await self._go_forward()
1122
+ elif message_type == "mouse_move":
1123
+ await self._mouse_move(payload)
1124
+ elif message_type == "mouse_down":
1125
+ await self._mouse_button(payload, down=True)
1126
+ elif message_type == "mouse_up":
1127
+ await self._mouse_button(payload, down=False)
1128
+ elif message_type == "tap":
1129
+ await self._tap(payload)
1130
+ elif message_type == "wheel":
1131
+ await self._wheel(payload)
1132
+ elif message_type == "pinch":
1133
+ await self._pinch(payload)
1134
+ elif message_type == "probe_editable":
1135
+ await self._probe_editable(payload)
1136
+ elif message_type == "key":
1137
+ await self._press_key(payload)
1138
+ elif message_type == "text":
1139
+ await self._insert_text(
1140
+ str(payload.get("text", "")),
1141
+ focus_view=bool(payload.get("focus_view")),
1142
+ )
1143
+ elif message_type == "paste":
1144
+ await self._paste_text(str(payload.get("text", "")))
1145
+ elif message_type == "copy":
1146
+ await self.copy_selection(cut=False)
1147
+ elif message_type == "cut":
1148
+ await self.copy_selection(cut=True)
1149
+ if message_type in INTERACTION_FRAME_MESSAGE_TYPES:
1150
+ self._request_frame_soon()
1151
+
1152
+ async def _prepare_page(self, page: Page) -> None:
1153
+ await page.set_viewport_size({"width": self.viewport_width, "height": self.viewport_height})
1154
+ await page.add_init_script(NATIVE_CARET_SUPPRESSION_INIT_SCRIPT)
1155
+ await page.add_init_script(AUDIO_CAPTURE_INIT_SCRIPT)
1156
+ await page.route("**/*", self._guard_route)
1157
+ await page.route_web_socket("**/*", self._guard_websocket)
1158
+ await self._attach_page(page)
1159
+ await self._suppress_native_caret(page)
1160
+ await self._install_audio_capture(page)
1161
+
1162
+ async def _attach_page(self, page: Page) -> None:
1163
+ self.page = page
1164
+ page.on("popup", lambda popup: asyncio.create_task(self._on_popup(popup)))
1165
+ page.on("download", lambda download: asyncio.create_task(self._on_download(download)))
1166
+ page.on("filechooser", lambda chooser: asyncio.create_task(self._on_filechooser(chooser)))
1167
+ page.on("dialog", lambda dialog: asyncio.create_task(self._on_dialog(dialog)))
1168
+ page.on("request", lambda request: asyncio.create_task(self._on_request(request)))
1169
+ page.on("requestfailed", lambda request: asyncio.create_task(self._on_request_failed(request)))
1170
+ page.on("framenavigated", lambda frame: asyncio.create_task(self._on_frame_navigated(frame)))
1171
+
1172
+ async def _guard_route(self, route: Route) -> None:
1173
+ url = route.request.url
1174
+ try:
1175
+ await self.policy.ensure_request_url_allowed(url)
1176
+ if time.monotonic() < self._force_reload_until:
1177
+ headers = dict(route.request.headers)
1178
+ headers["cache-control"] = "no-cache"
1179
+ headers["pragma"] = "no-cache"
1180
+ headers["expires"] = "0"
1181
+ await route.continue_(headers=headers)
1182
+ else:
1183
+ await route.continue_()
1184
+ except URLPolicyError as exc:
1185
+ await route.abort()
1186
+ await self._queue_message({"type": "blocked", "url": url, "reason": str(exc)})
1187
+
1188
+ async def _guard_websocket(self, websocket: WebSocketRoute) -> None:
1189
+ url = websocket.url
1190
+ try:
1191
+ await self.policy.ensure_request_url_allowed(url)
1192
+ websocket.connect_to_server()
1193
+ except URLPolicyError as exc:
1194
+ await websocket.close(code=1008, reason=str(exc)[:120])
1195
+ await self._queue_message({"type": "blocked", "url": url, "reason": str(exc)})
1196
+
1197
+ async def _suppress_native_caret(self, page: Page) -> None:
1198
+ try:
1199
+ await page.evaluate(NATIVE_CARET_SUPPRESSION_SCRIPT)
1200
+ except Exception:
1201
+ return
1202
+
1203
+ async def _install_audio_capture(self, page: Page) -> None:
1204
+ try:
1205
+ await page.evaluate(AUDIO_CAPTURE_SCRIPT)
1206
+ except Exception:
1207
+ return
1208
+
1209
+ async def handle_audio_bridge_message(self, message: Any) -> None:
1210
+ if not isinstance(message, dict):
1211
+ return
1212
+ kind = str(message.get("kind") or "")
1213
+ if kind == "debug":
1214
+ reason = str(message.get("reason") or "unknown")
1215
+ await self._queue_message({"type": "warning", "message": f"Audio capture: {reason}"})
1216
+ return
1217
+ if kind not in {"start", "chunk", "stop"}:
1218
+ return
1219
+ payload: dict[str, Any] = {
1220
+ "type": "audio",
1221
+ "kind": kind,
1222
+ "streamId": str(message.get("streamId") or "media"),
1223
+ }
1224
+ mime = message.get("mime")
1225
+ if isinstance(mime, str) and mime:
1226
+ payload["mime"] = mime
1227
+ data = message.get("data")
1228
+ if kind == "chunk":
1229
+ if not isinstance(data, str) or len(data) > MAX_AUDIO_CHUNK_BASE64_LENGTH:
1230
+ return
1231
+ payload["data"] = data
1232
+ await self._queue_audio(payload)
1233
+
1234
+ async def _on_popup(self, popup: Page) -> None:
1235
+ await self._prepare_page(popup)
1236
+ self._record_navigation(popup, popup.url)
1237
+ await self._queue_message({"type": "status", "state": "popup", "url": popup.url})
1238
+
1239
+ async def _on_download(self, download: Download) -> None:
1240
+ token = secrets.token_urlsafe(18)
1241
+ filename = _safe_filename(download.suggested_filename or "download")
1242
+ download_dir = self.manager.client_downloads_dir(self.client_uuid) / self.id
1243
+ download_dir.mkdir(parents=True, exist_ok=True)
1244
+ path = download_dir / f"{token}-{filename}"
1245
+ await download.save_as(str(path))
1246
+ self.downloads[token] = DownloadRecord(filename=filename, path=path)
1247
+ await self._queue_message(
1248
+ {
1249
+ "type": "download",
1250
+ "filename": filename,
1251
+ "url": f"/api/sessions/{self.id}/downloads/{token}",
1252
+ }
1253
+ )
1254
+
1255
+ async def _on_filechooser(self, chooser: FileChooser) -> None:
1256
+ token = secrets.token_urlsafe(18)
1257
+ self.file_choosers[token] = chooser
1258
+ await self._queue_message(
1259
+ {"type": "filechooser", "token": token, "multiple": chooser.is_multiple()}
1260
+ )
1261
+
1262
+ async def _on_dialog(self, dialog: Any) -> None:
1263
+ message = dialog.message
1264
+ dialog_type = dialog.type
1265
+ default_value = getattr(dialog, "default_value", "") or ""
1266
+ await dialog.accept(default_value)
1267
+ await self._queue_message({"type": "dialog", "dialogType": dialog_type, "message": message})
1268
+
1269
+ async def _on_request(self, request: PlaywrightRequest) -> None:
1270
+ page = self.page
1271
+ if page is None or request.frame != page.main_frame:
1272
+ return
1273
+ if request.resource_type != "document":
1274
+ return
1275
+ url = request.url
1276
+ if not url or url == "about:blank" or url == self._pending_document_navigation_url:
1277
+ return
1278
+ self._pending_document_navigation_url = url
1279
+ self._clear_mobile_focus_zoom()
1280
+ await self._queue_message({"type": "status", "state": "loading", "url": url})
1281
+
1282
+ async def _on_request_failed(self, request: PlaywrightRequest) -> None:
1283
+ page = self.page
1284
+ if page is None or request.frame != page.main_frame:
1285
+ return
1286
+ if request.resource_type != "document":
1287
+ return
1288
+ if request.url != self._pending_document_navigation_url:
1289
+ return
1290
+ self._pending_document_navigation_url = ""
1291
+ await self._queue_message(self._status_message("ready"))
1292
+
1293
+ async def _on_frame_navigated(self, frame: Any) -> None:
1294
+ page = self.page
1295
+ if page is not None and frame == page.main_frame:
1296
+ self._pending_document_navigation_url = ""
1297
+ self._clear_mobile_focus_zoom()
1298
+ self._record_navigation(page, page.url)
1299
+ await self._queue_message(self._status_message("ready"))
1300
+
1301
+ def _record_navigation(self, page: Page, url: str) -> None:
1302
+ if not url or url == "about:blank":
1303
+ return
1304
+
1305
+ if self._history_replaying:
1306
+ if 0 <= self._history_index < len(self._history):
1307
+ current = self._history[self._history_index]
1308
+ if current.page is page:
1309
+ current.url = url
1310
+ return
1311
+
1312
+ if self._history_index >= 0 and self._history_index < len(self._history):
1313
+ current = self._history[self._history_index]
1314
+ if current.page is page and current.url == url:
1315
+ return
1316
+
1317
+ if self._history_index < len(self._history) - 1:
1318
+ self._history = self._history[: self._history_index + 1]
1319
+
1320
+ self._history.append(NavigationEntry(page=page, url=url))
1321
+ self._history_index = len(self._history) - 1
1322
+ self._prune_closed_history()
1323
+
1324
+ def _session_pages(self, current_page: Page | None = None) -> list[Page]:
1325
+ pages: list[Page] = []
1326
+ seen: set[Page] = set()
1327
+ for page in [current_page, *[entry.page for entry in self._history]]:
1328
+ if page is None or page in seen or page.is_closed():
1329
+ continue
1330
+ pages.append(page)
1331
+ seen.add(page)
1332
+ return pages
1333
+
1334
+ def _prune_closed_history(self) -> None:
1335
+ if not self._history:
1336
+ self._history_index = -1
1337
+ return
1338
+ active_entry = (
1339
+ self._history[self._history_index]
1340
+ if 0 <= self._history_index < len(self._history)
1341
+ else None
1342
+ )
1343
+ self._history = [entry for entry in self._history if not entry.page.is_closed()]
1344
+ if not self._history:
1345
+ self._history_index = -1
1346
+ return
1347
+ if active_entry is not None and not active_entry.page.is_closed():
1348
+ for index, entry in enumerate(self._history):
1349
+ if entry is active_entry:
1350
+ self._history_index = index
1351
+ return
1352
+ self._history_index = min(max(self._history_index, 0), len(self._history) - 1)
1353
+
1354
+ async def _send_loop(
1355
+ self, websocket: WebSocket, outgoing: asyncio.Queue[dict[str, Any]]
1356
+ ) -> None:
1357
+ while True:
1358
+ message = await outgoing.get()
1359
+ if message.get("type") == "frame":
1360
+ image = message.get("image", b"")
1361
+ if isinstance(image, bytes):
1362
+ header = {key: value for key, value in message.items() if key != "image"}
1363
+ await websocket.send_text(json.dumps(header, separators=(",", ":")))
1364
+ await websocket.send_bytes(image)
1365
+ continue
1366
+ await websocket.send_text(json.dumps(message, separators=(",", ":")))
1367
+
1368
+ async def _audio_receive_loop(self, websocket: WebSocket) -> None:
1369
+ while True:
1370
+ try:
1371
+ await websocket.receive_text()
1372
+ except RuntimeError as exc:
1373
+ if _is_websocket_disconnect(exc):
1374
+ raise WebSocketDisconnect() from exc
1375
+ raise
1376
+
1377
+ async def _receive_loop(self, websocket: WebSocket) -> None:
1378
+ while True:
1379
+ try:
1380
+ text = await websocket.receive_text()
1381
+ except RuntimeError as exc:
1382
+ if _is_websocket_disconnect(exc):
1383
+ raise WebSocketDisconnect() from exc
1384
+ raise
1385
+ try:
1386
+ payload = json.loads(text)
1387
+ except json.JSONDecodeError:
1388
+ continue
1389
+ if isinstance(payload, dict):
1390
+ try:
1391
+ await self.handle_message(payload)
1392
+ except URLPolicyError as exc:
1393
+ await self._queue_message({"type": "error", "message": str(exc)})
1394
+ except Exception as exc:
1395
+ await self._queue_message(
1396
+ {"type": "warning", "message": f"Input event ignored: {exc}"}
1397
+ )
1398
+
1399
+ async def _frame_loop(self) -> None:
1400
+ while not self._closed:
1401
+ await asyncio.sleep(await self._current_frame_interval())
1402
+ await self._send_current_frame()
1403
+
1404
+ async def _current_frame_interval(self) -> float:
1405
+ if await self._is_media_playing():
1406
+ return max(self.settings.frame_interval_seconds, self.settings.media_frame_interval_seconds)
1407
+ return self.settings.frame_interval_seconds
1408
+
1409
+ async def _is_media_playing(self) -> bool:
1410
+ now = time.monotonic()
1411
+ if now - self._media_state_checked_at < 0.75:
1412
+ return self._media_playing
1413
+ self._media_state_checked_at = now
1414
+ page = self.page
1415
+ if page is None or page.is_closed():
1416
+ self._media_playing = False
1417
+ return False
1418
+ try:
1419
+ result = await page.evaluate(
1420
+ """() => Array.from(document.querySelectorAll("video, audio")).some((element) => (
1421
+ element instanceof HTMLMediaElement &&
1422
+ !element.paused &&
1423
+ !element.ended &&
1424
+ element.readyState >= HTMLMediaElement.HAVE_CURRENT_DATA
1425
+ ))"""
1426
+ )
1427
+ except Exception:
1428
+ self._media_playing = False
1429
+ return False
1430
+ self._media_playing = bool(result)
1431
+ return self._media_playing
1432
+
1433
+ async def _send_current_frame(self) -> None:
1434
+ if self._frame_capture_lock.locked():
1435
+ return
1436
+ started_at = time.monotonic()
1437
+ async with self._frame_capture_lock:
1438
+ frame = await self._capture_frame()
1439
+ if frame is not None:
1440
+ await self._queue_frame(frame)
1441
+ elapsed = time.monotonic() - started_at
1442
+ if elapsed > 0.75:
1443
+ logger.debug(
1444
+ "Slow frame capture for session %s: %.3fs, media_playing=%s, viewport=%sx%s",
1445
+ self.id,
1446
+ elapsed,
1447
+ self._media_playing,
1448
+ self.viewport_width,
1449
+ self.viewport_height,
1450
+ )
1451
+
1452
+ def _request_frame_soon(self) -> None:
1453
+ if self._closed:
1454
+ return
1455
+ if self._requested_frame_task is not None and not self._requested_frame_task.done():
1456
+ return
1457
+ self._requested_frame_task = asyncio.create_task(self._send_delayed_frame())
1458
+
1459
+ async def _send_delayed_frame(self) -> None:
1460
+ try:
1461
+ await asyncio.sleep(0.03)
1462
+ await self._send_current_frame()
1463
+ except asyncio.CancelledError:
1464
+ raise
1465
+ except Exception:
1466
+ return
1467
+
1468
+ async def _capture_frame(self) -> dict[str, Any] | None:
1469
+ page = self.page
1470
+ if page is None or page.is_closed():
1471
+ return None
1472
+ try:
1473
+ caret = await self._editable_caret(page)
1474
+ await self._suppress_native_caret(page)
1475
+ clip = self._mobile_focus_clip
1476
+ focus_zoom_frame = clip is not None
1477
+ lossless_frame = self.settings.screenshot_quality >= 100
1478
+ media_playing = await self._is_media_playing()
1479
+ screenshot_quality = (
1480
+ self.settings.screenshot_quality
1481
+ if lossless_frame or not media_playing
1482
+ else min(self.settings.screenshot_quality, self.settings.media_screenshot_quality)
1483
+ )
1484
+ screenshot_type = "png" if focus_zoom_frame or lossless_frame else "jpeg"
1485
+ screenshot_options: dict[str, Any] = {
1486
+ "type": screenshot_type,
1487
+ "scale": "device" if focus_zoom_frame else "css",
1488
+ "full_page": False,
1489
+ "timeout": 8000,
1490
+ }
1491
+ if screenshot_type == "jpeg":
1492
+ screenshot_options["quality"] = screenshot_quality
1493
+ if clip is not None:
1494
+ screenshot_options["clip"] = clip
1495
+ try:
1496
+ image = await page.screenshot(**screenshot_options)
1497
+ except Exception:
1498
+ if clip is None:
1499
+ raise
1500
+ self._clear_mobile_focus_zoom()
1501
+ focus_zoom_frame = False
1502
+ screenshot_type = "png" if lossless_frame else "jpeg"
1503
+ screenshot_options = {
1504
+ "type": screenshot_type,
1505
+ "scale": "css",
1506
+ "full_page": False,
1507
+ "timeout": 8000,
1508
+ }
1509
+ if screenshot_type == "jpeg":
1510
+ screenshot_options["quality"] = screenshot_quality
1511
+ image = await page.screenshot(**screenshot_options)
1512
+ title = await page.title()
1513
+ url = page.url
1514
+ except Exception as exc:
1515
+ await self._queue_message({"type": "warning", "message": f"Frame capture failed: {exc}"})
1516
+ return None
1517
+
1518
+ return {
1519
+ "type": "frame",
1520
+ "mime": "image/png" if screenshot_type == "png" else "image/jpeg",
1521
+ "image": image,
1522
+ "width": self.viewport_width,
1523
+ "height": self.viewport_height,
1524
+ "url": url,
1525
+ "title": title,
1526
+ "caret": caret,
1527
+ }
1528
+
1529
+ async def _resize(self, width: int, height: int) -> None:
1530
+ self._clear_mobile_focus_zoom()
1531
+ self.viewport_width = _clamp(
1532
+ width, self.settings.min_viewport_width, self.settings.max_viewport_width
1533
+ )
1534
+ self.viewport_height = _clamp(
1535
+ height, self.settings.min_viewport_height, self.settings.max_viewport_height
1536
+ )
1537
+ page = self._require_page()
1538
+ async with self._action_lock:
1539
+ await page.set_viewport_size({"width": self.viewport_width, "height": self.viewport_height})
1540
+
1541
+ async def _reload(self) -> None:
1542
+ self._clear_mobile_focus_zoom()
1543
+ page = self._require_page()
1544
+ self.last_activity = time.monotonic()
1545
+ self._force_reload_until = time.monotonic() + FORCE_RELOAD_NO_CACHE_SECONDS
1546
+ await self._queue_message({"type": "status", "state": "loading", "url": page.url})
1547
+ async with self._action_lock:
1548
+ client = None
1549
+ try:
1550
+ client = await page.context.new_cdp_session(page)
1551
+ await client.send("Network.enable")
1552
+ await client.send("Network.setCacheDisabled", {"cacheDisabled": True})
1553
+ await client.send("Network.setBypassServiceWorker", {"bypass": True})
1554
+ except Exception:
1555
+ client = None
1556
+
1557
+ try:
1558
+ if client is not None:
1559
+ await client.send("Page.reload", {"ignoreCache": True})
1560
+ await page.wait_for_load_state(
1561
+ "load",
1562
+ timeout=self.settings.navigation_timeout_ms,
1563
+ )
1564
+ else:
1565
+ await page.reload(
1566
+ wait_until="load",
1567
+ timeout=self.settings.navigation_timeout_ms,
1568
+ )
1569
+ except PlaywrightTimeoutError:
1570
+ await self._queue_message(
1571
+ {
1572
+ "type": "warning",
1573
+ "message": "Reload timed out; showing the current browser state.",
1574
+ }
1575
+ )
1576
+ finally:
1577
+ if client is not None:
1578
+ await _ignore_shutdown_disconnect(
1579
+ client.send("Network.setCacheDisabled", {"cacheDisabled": False})
1580
+ )
1581
+ await _ignore_shutdown_disconnect(
1582
+ client.send("Network.setBypassServiceWorker", {"bypass": False})
1583
+ )
1584
+ await _ignore_shutdown_disconnect(client.detach())
1585
+ await self._queue_message(self._status_message("ready"))
1586
+ await self._send_current_frame()
1587
+
1588
+ async def _go_back(self) -> None:
1589
+ self._clear_mobile_focus_zoom()
1590
+ if await self._activate_history_delta(-1):
1591
+ return
1592
+ page = self._require_page()
1593
+ await self._queue_message({"type": "status", "state": "loading", "url": page.url})
1594
+ async with self._action_lock:
1595
+ try:
1596
+ await page.go_back(
1597
+ wait_until="domcontentloaded",
1598
+ timeout=self.settings.navigation_timeout_ms,
1599
+ )
1600
+ except PlaywrightTimeoutError:
1601
+ await self._queue_message(
1602
+ {
1603
+ "type": "warning",
1604
+ "message": "Navigation timed out; showing the current browser state.",
1605
+ }
1606
+ )
1607
+ await self._queue_message(self._status_message("ready"))
1608
+
1609
+ async def _go_forward(self) -> None:
1610
+ self._clear_mobile_focus_zoom()
1611
+ if await self._activate_history_delta(1):
1612
+ return
1613
+ page = self._require_page()
1614
+ await self._queue_message({"type": "status", "state": "loading", "url": page.url})
1615
+ async with self._action_lock:
1616
+ try:
1617
+ await page.go_forward(
1618
+ wait_until="domcontentloaded",
1619
+ timeout=self.settings.navigation_timeout_ms,
1620
+ )
1621
+ except PlaywrightTimeoutError:
1622
+ await self._queue_message(
1623
+ {
1624
+ "type": "warning",
1625
+ "message": "Navigation timed out; showing the current browser state.",
1626
+ }
1627
+ )
1628
+ await self._queue_message(self._status_message("ready"))
1629
+
1630
+ async def _activate_history_delta(self, delta: int) -> bool:
1631
+ self._prune_closed_history()
1632
+ target_index = self._history_index + delta
1633
+ if target_index < 0 or target_index >= len(self._history):
1634
+ await self._queue_message({"type": "warning", "message": "No history entry available."})
1635
+ return True
1636
+
1637
+ entry = self._history[target_index]
1638
+ self._history_index = target_index
1639
+ if entry.page.is_closed():
1640
+ self._prune_closed_history()
1641
+ return True
1642
+
1643
+ self.page = entry.page
1644
+ await self._queue_message({"type": "status", "state": "loading", "url": entry.url})
1645
+ async with self._action_lock:
1646
+ if entry.page.url != entry.url:
1647
+ try:
1648
+ self._history_replaying = True
1649
+ await entry.page.goto(
1650
+ entry.url,
1651
+ wait_until="domcontentloaded",
1652
+ timeout=self.settings.navigation_timeout_ms,
1653
+ )
1654
+ except PlaywrightTimeoutError:
1655
+ await self._queue_message(
1656
+ {
1657
+ "type": "warning",
1658
+ "message": "Navigation timed out; showing the current browser state.",
1659
+ }
1660
+ )
1661
+ finally:
1662
+ self._history_replaying = False
1663
+ await self._queue_message(self._status_message("ready"))
1664
+ return True
1665
+
1666
+ async def _move_mouse_smoothly(self, page: Page, x: float, y: float) -> None:
1667
+ start_x, start_y = self._mouse_position
1668
+ distance = ((x - start_x) ** 2 + (y - start_y) ** 2) ** 0.5
1669
+ if distance < 0.5:
1670
+ self._mouse_position = (x, y)
1671
+ return
1672
+
1673
+ duration = min(
1674
+ MOUSE_MOVE_MAX_DURATION_SECONDS,
1675
+ max(0.0, distance / MOUSE_MOVE_SPEED_PIXELS_PER_SECOND),
1676
+ )
1677
+ steps_by_distance = int(distance / MOUSE_MOVE_STEP_DISTANCE) + 1
1678
+ steps_by_time = int(duration / MOUSE_MOVE_STEP_INTERVAL_SECONDS) + 1
1679
+ steps = max(2, min(MOUSE_MOVE_MAX_STEPS, steps_by_distance, steps_by_time))
1680
+ delay = duration / steps if duration > 0 else 0
1681
+
1682
+ for index in range(1, steps + 1):
1683
+ progress = index / steps
1684
+ # Smoothstep avoids a harsh start/stop without making the path slow.
1685
+ eased = progress * progress * (3 - 2 * progress)
1686
+ next_x = start_x + (x - start_x) * eased
1687
+ next_y = start_y + (y - start_y) * eased
1688
+ await page.mouse.move(next_x, next_y)
1689
+ if delay > 0 and index < steps:
1690
+ await asyncio.sleep(delay)
1691
+ self._mouse_position = (x, y)
1692
+
1693
+ async def _move_mouse_directly(self, page: Page, x: float, y: float) -> None:
1694
+ await page.mouse.move(x, y)
1695
+ self._mouse_position = (x, y)
1696
+
1697
+ async def _mouse_move(self, payload: dict[str, Any]) -> None:
1698
+ page = self._require_page()
1699
+ async with self._action_lock:
1700
+ x, y = await self._input_point(page, payload)
1701
+ await self._move_mouse_smoothly(page, x, y)
1702
+ if not payload.get("focus_view_keep"):
1703
+ self._clear_mobile_focus_zoom()
1704
+
1705
+ async def _mouse_button(self, payload: dict[str, Any], down: bool) -> None:
1706
+ page = self._require_page()
1707
+ button = str(payload.get("button", "left"))
1708
+ if button not in {"left", "middle", "right"}:
1709
+ button = "left"
1710
+ async with self._action_lock:
1711
+ x, y = await self._input_point(page, payload)
1712
+ await self._move_mouse_directly(page, x, y)
1713
+ if down:
1714
+ await page.mouse.down(button=button)
1715
+ else:
1716
+ await page.mouse.up(button=button)
1717
+ self._clear_mobile_focus_zoom()
1718
+
1719
+ async def _wheel(self, payload: dict[str, Any]) -> None:
1720
+ page = self._require_page()
1721
+ delta_x = float(payload.get("deltaX", 0))
1722
+ delta_y = float(payload.get("deltaY", 0))
1723
+ async with self._action_lock:
1724
+ if "x" in payload and "y" in payload:
1725
+ x, y = await self._input_point(page, payload)
1726
+ await self._move_mouse_smoothly(page, x, y)
1727
+ await page.mouse.wheel(delta_x, delta_y)
1728
+ self._clear_mobile_focus_zoom()
1729
+
1730
+ async def _tap(self, payload: dict[str, Any]) -> None:
1731
+ page = self._require_page()
1732
+ async with self._action_lock:
1733
+ x, y = await self._input_point(page, payload)
1734
+ if self._is_mobile:
1735
+ await page.touchscreen.tap(x, y)
1736
+ else:
1737
+ await self._move_mouse_directly(page, x, y)
1738
+ await page.mouse.down()
1739
+ await page.mouse.up()
1740
+
1741
+ async def _pinch(self, payload: dict[str, Any]) -> None:
1742
+ page = self._require_page()
1743
+ scale = max(0.2, min(5.0, float(payload.get("scale", 1.0))))
1744
+ if abs(scale - 1.0) < 0.01:
1745
+ return
1746
+ async with self._action_lock:
1747
+ next_page_scale_factor = max(0.5, min(5.0, self._page_scale_factor * scale))
1748
+ try:
1749
+ client = await page.context.new_cdp_session(page)
1750
+ try:
1751
+ await client.send(
1752
+ "Emulation.setPageScaleFactor",
1753
+ {"pageScaleFactor": next_page_scale_factor},
1754
+ )
1755
+ self._page_scale_factor = next_page_scale_factor
1756
+ finally:
1757
+ await client.detach()
1758
+ except Exception:
1759
+ x, y = await self._input_point(page, payload)
1760
+ await self._move_mouse_smoothly(page, x, y)
1761
+ await page.keyboard.down("Control")
1762
+ try:
1763
+ await page.mouse.wheel(0, -360 * (scale - 1.0))
1764
+ finally:
1765
+ await page.keyboard.up("Control")
1766
+ self._clear_mobile_focus_zoom()
1767
+
1768
+ async def _probe_editable(self, payload: dict[str, Any]) -> None:
1769
+ page = self._require_page()
1770
+ async with self._action_lock:
1771
+ x, y = await self._input_point(page, payload, dom_coordinates=True)
1772
+ editable = await page.evaluate(
1773
+ """({ x, y }) => {
1774
+ let element = document.elementFromPoint(x, y);
1775
+ while (element && element.shadowRoot) {
1776
+ const nested = element.shadowRoot.elementFromPoint(x, y);
1777
+ if (!nested || nested === element) {
1778
+ break;
1779
+ }
1780
+ element = nested;
1781
+ }
1782
+ if (!element) {
1783
+ return false;
1784
+ }
1785
+ const editable = element.closest(
1786
+ "input, textarea, [contenteditable], [role='textbox']"
1787
+ );
1788
+ if (!editable) {
1789
+ return false;
1790
+ }
1791
+ if (editable.matches("input")) {
1792
+ const type = (editable.getAttribute("type") || "text").toLowerCase();
1793
+ const textTypes = new Set([
1794
+ "email",
1795
+ "number",
1796
+ "password",
1797
+ "search",
1798
+ "tel",
1799
+ "text",
1800
+ "url",
1801
+ ]);
1802
+ return textTypes.has(type) && !editable.disabled && !editable.readOnly;
1803
+ }
1804
+ if (editable.matches("textarea")) {
1805
+ return !editable.disabled && !editable.readOnly;
1806
+ }
1807
+ if (editable.matches("[contenteditable]")) {
1808
+ return editable.getAttribute("contenteditable") !== "false";
1809
+ }
1810
+ return true;
1811
+ }""",
1812
+ {"x": x, "y": y},
1813
+ )
1814
+ self._clear_mobile_focus_zoom()
1815
+ await self._queue_message({"type": "editable", "editable": bool(editable)})
1816
+
1817
+ async def _press_key(self, payload: dict[str, Any]) -> None:
1818
+ page = self._require_page()
1819
+ key = str(payload.get("key", ""))
1820
+ if not key:
1821
+ return
1822
+ playwright_key = self._playwright_key(key, payload)
1823
+ if playwright_key is None:
1824
+ return
1825
+ async with self._action_lock:
1826
+ await page.keyboard.press(playwright_key)
1827
+ await self._send_current_frame()
1828
+
1829
+ async def _insert_text(self, text: str, *, focus_view: bool = False) -> None:
1830
+ if not text:
1831
+ return
1832
+ page = self._require_page()
1833
+ async with self._action_lock:
1834
+ await page.keyboard.insert_text(text)
1835
+ self._calibrate_caret_until = time.monotonic() + 1.0
1836
+ if focus_view and self._is_mobile:
1837
+ await self._focus_mobile_input_view(page)
1838
+ await self._send_current_frame()
1839
+
1840
+ async def _editable_caret(self, page: Page) -> dict[str, float] | None:
1841
+ try:
1842
+ metrics = await page.evaluate(EDITABLE_METRICS_SCRIPT)
1843
+ except Exception:
1844
+ return None
1845
+ if not isinstance(metrics, dict):
1846
+ return None
1847
+
1848
+ def metric_number(key: str, default: float) -> float:
1849
+ value = _float_value(metrics.get(key), None)
1850
+ return default if value is None else value
1851
+
1852
+ caret_x = metric_number("caretX", metric_number("focusLeft", 0.0))
1853
+ caret_y = metric_number("caretY", metric_number("focusTop", 0.0))
1854
+ caret_width = max(1.0, metric_number("caretWidth", 2.0))
1855
+ caret_height = max(8.0, metric_number("caretHeight", 18.0))
1856
+ if time.monotonic() <= self._calibrate_caret_until:
1857
+ corrected_caret_x = await self._calibrate_text_input_caret_x(page, metrics, caret_x)
1858
+ if corrected_caret_x is not None:
1859
+ caret_x = corrected_caret_x
1860
+ visual_scale = metric_number("visualScale", 1.0)
1861
+ if visual_scale <= 0:
1862
+ visual_scale = 1.0
1863
+ visual_offset_left = metric_number("visualOffsetLeft", 0.0)
1864
+ visual_offset_top = metric_number("visualOffsetTop", 0.0)
1865
+
1866
+ x = (caret_x - visual_offset_left) * visual_scale
1867
+ y = (caret_y - visual_offset_top) * visual_scale
1868
+ width = caret_width * visual_scale
1869
+ height = caret_height * visual_scale
1870
+
1871
+ focus_clip = self._mobile_focus_clip
1872
+ if focus_clip is not None and self._mobile_focus_zoom > 0:
1873
+ clip_x = _float_value(focus_clip.get("x"), 0.0) or 0.0
1874
+ clip_y = _float_value(focus_clip.get("y"), 0.0) or 0.0
1875
+ x = (x - clip_x) * self._mobile_focus_zoom
1876
+ y = (y - clip_y) * self._mobile_focus_zoom
1877
+ width *= self._mobile_focus_zoom
1878
+ height *= self._mobile_focus_zoom
1879
+
1880
+ frame_width = float(self.viewport_width)
1881
+ frame_height = float(self.viewport_height)
1882
+ if x + width < 0 or y + height < 0 or x > frame_width or y > frame_height:
1883
+ return None
1884
+ return {
1885
+ "x": max(0.0, min(frame_width, x)),
1886
+ "y": max(0.0, min(frame_height, y)),
1887
+ "width": max(1.0, min(frame_width, width)),
1888
+ "height": max(8.0, min(frame_height, height)),
1889
+ }
1890
+
1891
+ async def _calibrate_text_input_caret_x(
1892
+ self, page: Page, metrics: dict[str, Any], caret_x: float
1893
+ ) -> float | None:
1894
+ if str(metrics.get("editableTag") or "").upper() != "INPUT":
1895
+ return None
1896
+ if str(metrics.get("editableType") or "text").lower() == "password":
1897
+ return None
1898
+ selection_start = _float_value(metrics.get("selectionStart"), None)
1899
+ selection_end = _float_value(metrics.get("selectionEnd"), None)
1900
+ value_length = _float_value(metrics.get("valueLength"), None)
1901
+ if (
1902
+ selection_start is None
1903
+ or selection_end is None
1904
+ or value_length is None
1905
+ or int(selection_start) != int(selection_end)
1906
+ or int(selection_end) != int(value_length)
1907
+ or value_length <= 0
1908
+ ):
1909
+ return None
1910
+
1911
+ left = _float_value(metrics.get("left"), None)
1912
+ top = _float_value(metrics.get("top"), None)
1913
+ width = _float_value(metrics.get("width"), None)
1914
+ height = _float_value(metrics.get("height"), None)
1915
+ if left is None or top is None or width is None or height is None:
1916
+ return None
1917
+ if width < 12 or height < 12:
1918
+ return None
1919
+
1920
+ clip_x = max(0.0, left)
1921
+ clip_y = max(0.0, top)
1922
+ clip_width = max(1.0, min(width, float(self.viewport_width) - clip_x))
1923
+ clip_height = max(1.0, min(height, float(self.viewport_height) - clip_y))
1924
+ try:
1925
+ image = await page.screenshot(
1926
+ type="png",
1927
+ scale="css",
1928
+ full_page=False,
1929
+ clip={"x": clip_x, "y": clip_y, "width": clip_width, "height": clip_height},
1930
+ timeout=3000,
1931
+ )
1932
+ except Exception:
1933
+ return None
1934
+ decoded = _png_rgba_rows(image)
1935
+ if decoded is None:
1936
+ return None
1937
+ image_width, image_height, rows = decoded
1938
+ if image_width < 4 or image_height < 4:
1939
+ return None
1940
+
1941
+ border_left = max(0, int(round(_float_value(metrics.get("borderLeft"), 0.0) or 0.0)))
1942
+ border_top = max(0, int(round(_float_value(metrics.get("borderTop"), 0.0) or 0.0)))
1943
+ border_right = max(0, int(round(_float_value(metrics.get("borderRight"), 0.0) or 0.0)))
1944
+ border_bottom = max(0, int(round(_float_value(metrics.get("borderBottom"), 0.0) or 0.0)))
1945
+ padding_left = max(0, int(round(_float_value(metrics.get("paddingLeft"), 0.0) or 0.0)))
1946
+ padding_top = max(0, int(round(_float_value(metrics.get("paddingTop"), 0.0) or 0.0)))
1947
+ padding_right = max(0, int(round(_float_value(metrics.get("paddingRight"), 0.0) or 0.0)))
1948
+ padding_bottom = max(0, int(round(_float_value(metrics.get("paddingBottom"), 0.0) or 0.0)))
1949
+ content_left = min(image_width - 1, border_left + padding_left)
1950
+ content_right = max(content_left + 1, image_width - border_right - padding_right)
1951
+ content_top = min(image_height - 1, border_top + padding_top)
1952
+ content_bottom = max(content_top + 1, image_height - border_bottom - padding_bottom)
1953
+
1954
+ def pixel(x: int, y: int) -> tuple[int, int, int]:
1955
+ row = rows[y]
1956
+ index = x * 4
1957
+ return row[index], row[index + 1], row[index + 2]
1958
+
1959
+ samples = [
1960
+ pixel(content_left, content_top),
1961
+ pixel(max(content_left, content_right - 1), content_top),
1962
+ pixel(content_left, max(content_top, content_bottom - 1)),
1963
+ pixel(max(content_left, content_right - 1), max(content_top, content_bottom - 1)),
1964
+ ]
1965
+ background = tuple(sum(sample[channel] for sample in samples) / len(samples) for channel in range(3))
1966
+
1967
+ def distance_from_background(x: int, y: int) -> float:
1968
+ red, green, blue = pixel(x, y)
1969
+ return (
1970
+ abs(red - background[0])
1971
+ + abs(green - background[1])
1972
+ + abs(blue - background[2])
1973
+ )
1974
+
1975
+ threshold = 72.0
1976
+ min_y = max(content_top, int(image_height * 0.18))
1977
+ max_y = min(content_bottom, int(image_height * 0.82))
1978
+ rightmost = -1
1979
+ for y in range(min_y, max_y):
1980
+ for x in range(content_left, content_right):
1981
+ if distance_from_background(x, y) >= threshold:
1982
+ rightmost = max(rightmost, x)
1983
+
1984
+ if rightmost < 0:
1985
+ return None
1986
+ corrected = clip_x + min(float(image_width), float(rightmost + 1))
1987
+ if abs(corrected - caret_x) > max(36.0, width * 0.6):
1988
+ return None
1989
+ return corrected
1990
+
1991
+ async def _focus_mobile_input_view(self, page: Page) -> None:
1992
+ try:
1993
+ metrics = await page.evaluate(EDITABLE_METRICS_SCRIPT)
1994
+ if not isinstance(metrics, dict):
1995
+ return
1996
+
1997
+ def metric_number(source: dict[str, Any], key: str, default: float) -> float:
1998
+ value = _float_value(source.get(key), None)
1999
+ return default if value is None else value
2000
+
2001
+ zoom = 2.0
2002
+ clip_width = max(1.0, float(self.viewport_width) / zoom)
2003
+ clip_height = max(1.0, float(self.viewport_height) / zoom)
2004
+ element_left = metric_number(metrics, "left", 0.0)
2005
+ element_top = metric_number(metrics, "top", 0.0)
2006
+ element_width = metric_number(metrics, "width", 0.0)
2007
+ element_height = metric_number(metrics, "height", 0.0)
2008
+ focus_left = metric_number(metrics, "focusLeft", element_left + element_width / 2)
2009
+ focus_top = metric_number(metrics, "focusTop", element_top + element_height / 2)
2010
+ scroll_x = metric_number(metrics, "scrollX", 0.0)
2011
+ scroll_y = metric_number(metrics, "scrollY", 0.0)
2012
+ inner_width = metric_number(metrics, "innerWidth", float(self.viewport_width))
2013
+ inner_height = metric_number(metrics, "innerHeight", float(self.viewport_height))
2014
+ inner_width = inner_width if inner_width > 0 else float(self.viewport_width)
2015
+ inner_height = inner_height if inner_height > 0 else float(self.viewport_height)
2016
+ document_width = metric_number(metrics, "documentWidth", inner_width)
2017
+ document_height = metric_number(metrics, "documentHeight", inner_height)
2018
+ focus_page_x = scroll_x + focus_left
2019
+ focus_page_y = scroll_y + focus_top
2020
+ target_x_in_viewport = clip_width / 2
2021
+ target_y_in_viewport = clip_height * 0.42
2022
+ max_scroll_x = max(0.0, document_width - inner_width)
2023
+ max_scroll_y = max(0.0, document_height - inner_height)
2024
+ scroll_target_x = max(
2025
+ 0.0, min(max_scroll_x, focus_page_x - target_x_in_viewport)
2026
+ )
2027
+ scroll_target_y = max(
2028
+ 0.0, min(max_scroll_y, focus_page_y - target_y_in_viewport)
2029
+ )
2030
+ await page.evaluate(
2031
+ """({ x, y }) => {
2032
+ window.scrollTo({ left: x, top: y, behavior: "instant" });
2033
+ }""",
2034
+ {"x": scroll_target_x, "y": scroll_target_y},
2035
+ )
2036
+ await page.wait_for_timeout(50)
2037
+ scrolled_metrics = await page.evaluate(EDITABLE_METRICS_SCRIPT)
2038
+ if isinstance(scrolled_metrics, dict):
2039
+ metrics = scrolled_metrics
2040
+ element_left = metric_number(metrics, "left", element_left)
2041
+ element_top = metric_number(metrics, "top", element_top)
2042
+ element_width = metric_number(metrics, "width", element_width)
2043
+ element_height = metric_number(metrics, "height", element_height)
2044
+ focus_left = metric_number(metrics, "focusLeft", element_left + element_width / 2)
2045
+ focus_top = metric_number(metrics, "focusTop", element_top + element_height / 2)
2046
+ inner_width = metric_number(metrics, "innerWidth", inner_width)
2047
+ inner_height = metric_number(metrics, "innerHeight", inner_height)
2048
+ target_x_in_clip = clip_width / 2
2049
+ target_y_in_clip = clip_height * 0.42
2050
+ crop_width = min(float(self.viewport_width), inner_width)
2051
+ crop_height = min(float(self.viewport_height), inner_height)
2052
+ max_clip_x = max(0.0, crop_width - clip_width)
2053
+ max_clip_y = max(0.0, crop_height - clip_height)
2054
+ clip_x = max(
2055
+ 0.0, min(max_clip_x, focus_left - target_x_in_clip)
2056
+ )
2057
+ clip_y = max(
2058
+ 0.0, min(max_clip_y, focus_top - target_y_in_clip)
2059
+ )
2060
+ self._mobile_focus_zoom = zoom
2061
+ self._mobile_focus_clip = {
2062
+ "x": clip_x,
2063
+ "y": clip_y,
2064
+ "width": clip_width,
2065
+ "height": clip_height,
2066
+ }
2067
+ await page.wait_for_timeout(80)
2068
+ except Exception:
2069
+ return
2070
+
2071
+ def _clear_mobile_focus_zoom(self) -> None:
2072
+ self._mobile_focus_zoom = 1.0
2073
+ self._mobile_focus_clip = None
2074
+
2075
+ async def _paste_text(self, text: str) -> None:
2076
+ if not text:
2077
+ return
2078
+ page = self._require_page()
2079
+ async with self._action_lock:
2080
+ if await self._write_clipboard_text(page, text):
2081
+ await page.keyboard.press("Control+V")
2082
+ elif not await self._dispatch_synthetic_paste(page, text):
2083
+ await page.keyboard.insert_text(text)
2084
+ await self._send_current_frame()
2085
+
2086
+ async def _write_clipboard_text(self, page: Page, text: str) -> bool:
2087
+ try:
2088
+ await page.evaluate(
2089
+ """async (value) => {
2090
+ if (!navigator.clipboard || !navigator.clipboard.writeText) {
2091
+ return false;
2092
+ }
2093
+ await navigator.clipboard.writeText(value);
2094
+ return true;
2095
+ }""",
2096
+ text,
2097
+ )
2098
+ except Exception:
2099
+ return False
2100
+ return True
2101
+
2102
+ async def _dispatch_synthetic_paste(self, page: Page, text: str) -> bool:
2103
+ try:
2104
+ result = await page.evaluate(
2105
+ """(value) => {
2106
+ const active = document.activeElement;
2107
+ const target = active && active !== document.body ? active : document.body;
2108
+ let event;
2109
+ try {
2110
+ const data = new DataTransfer();
2111
+ data.setData("text/plain", value);
2112
+ event = new ClipboardEvent("paste", {
2113
+ bubbles: true,
2114
+ cancelable: true,
2115
+ clipboardData: data,
2116
+ });
2117
+ } catch {
2118
+ event = new Event("paste", { bubbles: true, cancelable: true });
2119
+ }
2120
+
2121
+ const accepted = target.dispatchEvent(event);
2122
+ if (!accepted) {
2123
+ return true;
2124
+ }
2125
+
2126
+ if (!active) {
2127
+ return false;
2128
+ }
2129
+
2130
+ const tagName = active.tagName;
2131
+ const editableInput =
2132
+ tagName === "TEXTAREA" ||
2133
+ (tagName === "INPUT" &&
2134
+ ![
2135
+ "button",
2136
+ "checkbox",
2137
+ "color",
2138
+ "file",
2139
+ "hidden",
2140
+ "image",
2141
+ "radio",
2142
+ "range",
2143
+ "reset",
2144
+ "submit",
2145
+ ].includes(active.type));
2146
+
2147
+ if (editableInput) {
2148
+ const start = active.selectionStart ?? active.value.length;
2149
+ const end = active.selectionEnd ?? active.value.length;
2150
+ active.setRangeText(value, start, end, "end");
2151
+ active.dispatchEvent(new InputEvent("input", {
2152
+ bubbles: true,
2153
+ inputType: "insertFromPaste",
2154
+ data: value,
2155
+ }));
2156
+ return true;
2157
+ }
2158
+
2159
+ if (active.isContentEditable) {
2160
+ document.execCommand("insertText", false, value);
2161
+ return true;
2162
+ }
2163
+
2164
+ return false;
2165
+ }""",
2166
+ text,
2167
+ )
2168
+ except Exception:
2169
+ return False
2170
+ return bool(result)
2171
+
2172
+ async def copy_selection(self, cut: bool) -> str:
2173
+ page = self._require_page()
2174
+ async with self._action_lock:
2175
+ fallback = await self._selected_text(page)
2176
+ await page.keyboard.press("Control+X" if cut else "Control+C")
2177
+ await asyncio.sleep(0.05)
2178
+ text = await self._clipboard_text(page)
2179
+ result = text or fallback
2180
+ await self._queue_message({"type": "clipboard", "text": result})
2181
+ return result
2182
+
2183
+ async def _clipboard_text(self, page: Page) -> str:
2184
+ try:
2185
+ result = await page.evaluate(
2186
+ "() => navigator.clipboard ? navigator.clipboard.readText() : ''"
2187
+ )
2188
+ except Exception:
2189
+ return ""
2190
+ return result if isinstance(result, str) else ""
2191
+
2192
+ async def _selected_text(self, page: Page) -> str:
2193
+ try:
2194
+ result = await page.evaluate(
2195
+ """() => {
2196
+ const active = document.activeElement;
2197
+ if (
2198
+ active &&
2199
+ (active.tagName === "TEXTAREA" ||
2200
+ (active.tagName === "INPUT" &&
2201
+ !["button", "checkbox", "color", "file", "hidden", "image", "radio", "range", "reset", "submit"].includes(active.type)))
2202
+ ) {
2203
+ const start = active.selectionStart ?? 0;
2204
+ const end = active.selectionEnd ?? 0;
2205
+ return active.value.slice(start, end);
2206
+ }
2207
+ const selection = window.getSelection();
2208
+ return selection ? selection.toString() : "";
2209
+ }"""
2210
+ )
2211
+ except Exception:
2212
+ return ""
2213
+ return result if isinstance(result, str) else ""
2214
+
2215
+ async def _input_point(
2216
+ self, page: Page, payload: dict[str, Any], *, dom_coordinates: bool = False
2217
+ ) -> tuple[float, float]:
2218
+ x, y = self._point(payload)
2219
+ focus_clip = self._mobile_focus_clip
2220
+ if focus_clip is not None and self._mobile_focus_zoom > 0:
2221
+ x = (_float_value(focus_clip.get("x"), 0.0) or 0.0) + x / self._mobile_focus_zoom
2222
+ y = (_float_value(focus_clip.get("y"), 0.0) or 0.0) + y / self._mobile_focus_zoom
2223
+ if not self._is_mobile:
2224
+ return x, y
2225
+ try:
2226
+ viewport = await page.evaluate(
2227
+ """() => ({
2228
+ scale: window.visualViewport ? window.visualViewport.scale : 1,
2229
+ offsetLeft: window.visualViewport ? window.visualViewport.offsetLeft : 0,
2230
+ offsetTop: window.visualViewport ? window.visualViewport.offsetTop : 0,
2231
+ pageLeft: window.visualViewport ? window.visualViewport.pageLeft : window.scrollX,
2232
+ pageTop: window.visualViewport ? window.visualViewport.pageTop : window.scrollY,
2233
+ scrollX: window.scrollX,
2234
+ scrollY: window.scrollY,
2235
+ width: window.visualViewport ? window.visualViewport.width : window.innerWidth,
2236
+ height: window.visualViewport ? window.visualViewport.height : window.innerHeight,
2237
+ })"""
2238
+ )
2239
+ except Exception:
2240
+ return x, y
2241
+
2242
+ scale = _float_value(viewport.get("scale"), 1.0) or 1.0
2243
+ if scale <= 0:
2244
+ return x, y
2245
+ offset_left = _visual_viewport_offset(viewport, "offsetLeft", "pageLeft", "scrollX")
2246
+ offset_top = _visual_viewport_offset(viewport, "offsetTop", "pageTop", "scrollY")
2247
+ visible_width = _float_value(viewport.get("width"), float(self.viewport_width))
2248
+ visible_height = _float_value(viewport.get("height"), float(self.viewport_height))
2249
+ visible_width = visible_width if visible_width and visible_width > 0 else float(self.viewport_width)
2250
+ visible_height = (
2251
+ visible_height if visible_height and visible_height > 0 else float(self.viewport_height)
2252
+ )
2253
+ css_x = x / scale
2254
+ css_y = y / scale
2255
+ if dom_coordinates:
2256
+ css_x += offset_left
2257
+ css_y += offset_top
2258
+ css_x = max(offset_left, min(offset_left + visible_width, css_x))
2259
+ css_y = max(offset_top, min(offset_top + visible_height, css_y))
2260
+ else:
2261
+ css_x = max(0.0, min(visible_width, css_x))
2262
+ css_y = max(0.0, min(visible_height, css_y))
2263
+ return css_x, css_y
2264
+
2265
+ def _point(self, payload: dict[str, Any]) -> tuple[float, float]:
2266
+ x = float(payload.get("x", 0))
2267
+ y = float(payload.get("y", 0))
2268
+ x = max(0.0, min(float(self.viewport_width), x))
2269
+ y = max(0.0, min(float(self.viewport_height), y))
2270
+ return x, y
2271
+
2272
+ def _playwright_key(self, key: str, payload: dict[str, Any]) -> str | None:
2273
+ ignored_keys = {
2274
+ "Process",
2275
+ "Unidentified",
2276
+ "Dead",
2277
+ "Compose",
2278
+ "Convert",
2279
+ "NonConvert",
2280
+ "KanaMode",
2281
+ "HangulMode",
2282
+ "JunjaMode",
2283
+ "FinalMode",
2284
+ "HanjaMode",
2285
+ "KanjiMode",
2286
+ "ModeChange",
2287
+ }
2288
+ if key in ignored_keys:
2289
+ return None
2290
+ key_map = {
2291
+ " ": "Space",
2292
+ "ArrowUp": "ArrowUp",
2293
+ "ArrowDown": "ArrowDown",
2294
+ "ArrowLeft": "ArrowLeft",
2295
+ "ArrowRight": "ArrowRight",
2296
+ "Escape": "Escape",
2297
+ "Enter": "Enter",
2298
+ "Tab": "Tab",
2299
+ "Backspace": "Backspace",
2300
+ "Delete": "Delete",
2301
+ "Home": "Home",
2302
+ "End": "End",
2303
+ "PageUp": "PageUp",
2304
+ "PageDown": "PageDown",
2305
+ }
2306
+ normalized = key_map.get(key, key)
2307
+ if len(normalized) > 1 and normalized not in set(key_map.values()) and not re.fullmatch(
2308
+ r"F(?:[1-9]|1[0-9]|2[0-4])", normalized
2309
+ ):
2310
+ return None
2311
+ modifiers: list[str] = []
2312
+ if payload.get("ctrlKey") and normalized not in {"Control", "ControlLeft", "ControlRight"}:
2313
+ modifiers.append("Control")
2314
+ if payload.get("altKey") and normalized not in {"Alt", "AltLeft", "AltRight"}:
2315
+ modifiers.append("Alt")
2316
+ if payload.get("shiftKey") and normalized not in {"Shift", "ShiftLeft", "ShiftRight"}:
2317
+ modifiers.append("Shift")
2318
+ if payload.get("metaKey") and normalized not in {"Meta", "MetaLeft", "MetaRight"}:
2319
+ modifiers.append("Meta")
2320
+ if modifiers and len(normalized) == 1:
2321
+ normalized = normalized.upper()
2322
+ return "+".join([*modifiers, normalized])
2323
+
2324
+ def _status_message(self, state: str) -> dict[str, Any]:
2325
+ page = self.page
2326
+ return {
2327
+ "type": "status",
2328
+ "state": state,
2329
+ "url": page.url if page is not None and not page.is_closed() else "",
2330
+ "width": self.viewport_width,
2331
+ "height": self.viewport_height,
2332
+ }
2333
+
2334
+ async def _queue_message(self, message: dict[str, Any]) -> None:
2335
+ queue = self._outgoing
2336
+ if queue is None:
2337
+ return
2338
+ message_type = message.get("type")
2339
+ try:
2340
+ queue.put_nowait(message)
2341
+ return
2342
+ except asyncio.QueueFull:
2343
+ pass
2344
+
2345
+ if message_type == "audio" and message.get("kind") == "chunk":
2346
+ self._drop_queued_messages(
2347
+ queue,
2348
+ lambda queued: queued.get("type") == "audio" and queued.get("kind") == "chunk",
2349
+ limit=1,
2350
+ )
2351
+ if not queue.full():
2352
+ queue.put_nowait(message)
2353
+ return
2354
+
2355
+ self._drop_queued_messages(queue, lambda queued: queued.get("type") == "frame")
2356
+ if queue.full():
2357
+ self._drop_queued_messages(
2358
+ queue,
2359
+ lambda queued: queued.get("type") == "audio" and queued.get("kind") == "chunk",
2360
+ limit=1,
2361
+ )
2362
+ if queue.full():
2363
+ try:
2364
+ queue.get_nowait()
2365
+ except asyncio.QueueEmpty:
2366
+ return
2367
+ queue.put_nowait(message)
2368
+
2369
+ async def _queue_audio(self, message: dict[str, Any]) -> None:
2370
+ queue = self._audio_outgoing
2371
+ if queue is None:
2372
+ return
2373
+ try:
2374
+ queue.put_nowait(message)
2375
+ return
2376
+ except asyncio.QueueFull:
2377
+ pass
2378
+
2379
+ if message.get("kind") == "chunk":
2380
+ self._drop_queued_messages(
2381
+ queue,
2382
+ lambda queued: queued.get("type") == "audio" and queued.get("kind") == "chunk",
2383
+ limit=4,
2384
+ )
2385
+ if queue.full():
2386
+ return
2387
+ queue.put_nowait(message)
2388
+ return
2389
+
2390
+ self._drop_queued_messages(
2391
+ queue,
2392
+ lambda queued: queued.get("type") == "audio" and queued.get("kind") == "chunk",
2393
+ limit=1,
2394
+ )
2395
+ if queue.full():
2396
+ try:
2397
+ queue.get_nowait()
2398
+ except asyncio.QueueEmpty:
2399
+ return
2400
+ queue.put_nowait(message)
2401
+
2402
+ async def _queue_frame(self, message: dict[str, Any]) -> None:
2403
+ queue = self._outgoing
2404
+ if queue is None:
2405
+ return
2406
+ self._drop_queued_messages(queue, lambda queued: queued.get("type") == "frame")
2407
+ if queue.full():
2408
+ self._drop_queued_messages(
2409
+ queue,
2410
+ lambda queued: queued.get("type") == "audio" and queued.get("kind") == "chunk",
2411
+ limit=1,
2412
+ )
2413
+ if queue.full():
2414
+ return
2415
+ await queue.put(message)
2416
+
2417
+ @staticmethod
2418
+ def _drop_queued_messages(
2419
+ queue: asyncio.Queue[dict[str, Any]],
2420
+ predicate: Any,
2421
+ *,
2422
+ limit: int | None = None,
2423
+ ) -> int:
2424
+ kept: list[dict[str, Any]] = []
2425
+ dropped = 0
2426
+ while True:
2427
+ try:
2428
+ message = queue.get_nowait()
2429
+ except asyncio.QueueEmpty:
2430
+ break
2431
+ if (limit is None or dropped < limit) and predicate(message):
2432
+ dropped += 1
2433
+ continue
2434
+ kept.append(message)
2435
+ for message in kept:
2436
+ queue.put_nowait(message)
2437
+ return dropped
2438
+
2439
+ def _require_page(self) -> Page:
2440
+ if self.page is None or self.page.is_closed():
2441
+ raise RuntimeError("Browser page is not available.")
2442
+ return self.page
2443
+
2444
+ def _require_context(self) -> BrowserContext:
2445
+ if self.context is None:
2446
+ raise RuntimeError("Browser context is not available.")
2447
+ return self.context
2448
+
2449
+ def _normalize_cookie(self, cookie: dict[str, Any], page_url: str) -> dict[str, Any]:
2450
+ name = str(cookie.get("name") or "").strip()
2451
+ if not name:
2452
+ raise ValueError("Cookie name is required.")
2453
+ path = str(cookie.get("path") or "/").strip() or "/"
2454
+ domain = str(cookie.get("domain") or "").strip()
2455
+ if not domain:
2456
+ host = urlsplit(page_url).hostname
2457
+ if not host:
2458
+ raise ValueError("Cookie domain is required.")
2459
+ domain = host
2460
+
2461
+ normalized: dict[str, Any] = {
2462
+ "name": name,
2463
+ "value": str(cookie.get("value") or ""),
2464
+ "domain": domain,
2465
+ "path": path,
2466
+ "httpOnly": bool(cookie.get("httpOnly", False)),
2467
+ "secure": bool(cookie.get("secure", False)),
2468
+ }
2469
+ expires = cookie.get("expires")
2470
+ if isinstance(expires, (int, float)) and expires > 0:
2471
+ normalized["expires"] = float(expires)
2472
+ same_site = cookie.get("sameSite")
2473
+ if same_site in {"Lax", "None", "Strict"}:
2474
+ normalized["sameSite"] = same_site
2475
+ partition_key = cookie.get("partitionKey")
2476
+ if isinstance(partition_key, str) and partition_key:
2477
+ normalized["partitionKey"] = partition_key
2478
+ return normalized
2479
+
2480
+
2481
+ class BrowserManager:
2482
+ def __init__(self, settings: Settings) -> None:
2483
+ self.settings = settings
2484
+ self._playwright: Playwright | None = None
2485
+ self._browser: Browser | None = None
2486
+ self._sessions: dict[str, BrowserSession] = {}
2487
+ self._client_sessions: dict[str, str] = {}
2488
+ self._client_contexts: dict[str, ClientContextEntry] = {}
2489
+ self._client_contexts_lock = asyncio.Lock()
2490
+ self._cleanup_task: asyncio.Task[None] | None = None
2491
+ self._shared_context_policy = HostAccessPolicy(self.settings.allow_private_hosts)
2492
+ self._stopping = False
2493
+
2494
+ async def start(self) -> None:
2495
+ self._stopping = False
2496
+ self.settings.downloads_dir.mkdir(parents=True, exist_ok=True)
2497
+ self.settings.uploads_dir.mkdir(parents=True, exist_ok=True)
2498
+ self._playwright = await async_playwright().start()
2499
+ self._browser = await self._playwright.chromium.launch(
2500
+ headless=self.settings.headless,
2501
+ args=["--disable-dev-shm-usage"],
2502
+ )
2503
+ self._cleanup_task = asyncio.create_task(self._cleanup_loop())
2504
+
2505
+ async def stop(self) -> None:
2506
+ self._stopping = True
2507
+ cleanup_task = self._cleanup_task
2508
+ self._cleanup_task = None
2509
+ if cleanup_task is not None:
2510
+ cleanup_task.cancel()
2511
+ await asyncio.gather(cleanup_task, return_exceptions=True)
2512
+
2513
+ for session_id in list(self._sessions):
2514
+ await self.close_session(session_id)
2515
+
2516
+ await self._close_all_client_contexts()
2517
+
2518
+ browser = self._browser
2519
+ self._browser = None
2520
+ if browser is not None:
2521
+ await _ignore_shutdown_disconnect(browser.close())
2522
+
2523
+ playwright = self._playwright
2524
+ self._playwright = None
2525
+ if playwright is not None:
2526
+ await _ignore_shutdown_disconnect(playwright.stop())
2527
+
2528
+ async def get_browser(self) -> Browser:
2529
+ if self._stopping:
2530
+ raise RuntimeError("Browser manager is shutting down.")
2531
+ if self._browser is None:
2532
+ await self.start()
2533
+ if self._browser is None:
2534
+ raise RuntimeError("Browser failed to start.")
2535
+ return self._browser
2536
+
2537
+ def mobile_context_options(self, width: int, height: int) -> dict[str, Any]:
2538
+ options: dict[str, Any] = {
2539
+ "is_mobile": True,
2540
+ "has_touch": True,
2541
+ "screen": {"width": width, "height": height},
2542
+ "extra_http_headers": dict(MOBILE_CLIENT_HINT_HEADERS),
2543
+ "user_agent": MOBILE_FALLBACK_USER_AGENT,
2544
+ }
2545
+ if self._playwright is None:
2546
+ return options
2547
+
2548
+ for name in MOBILE_DEVICE_DESCRIPTOR_NAMES:
2549
+ descriptor = self._playwright.devices.get(name)
2550
+ if not descriptor:
2551
+ continue
2552
+ user_agent = descriptor.get("user_agent")
2553
+ if user_agent:
2554
+ options["user_agent"] = user_agent
2555
+ return options
2556
+ return options
2557
+
2558
+ async def acquire_context(
2559
+ self, client_uuid: str, context_options: dict[str, Any]
2560
+ ) -> BrowserContext:
2561
+ async with self._client_contexts_lock:
2562
+ cached = self._client_contexts.get(client_uuid)
2563
+ if cached is not None:
2564
+ cached.last_used = time.monotonic()
2565
+ return cached.context
2566
+
2567
+ browser = await self.get_browser()
2568
+ context = await browser.new_context(**context_options)
2569
+ await context.expose_binding(
2570
+ "__websiteAgentAudioBridge",
2571
+ lambda source, message, client_uuid=client_uuid: asyncio.create_task(
2572
+ self._handle_audio_bridge(client_uuid, message)
2573
+ ),
2574
+ )
2575
+ await context.route("**/*", self._guard_shared_context_route)
2576
+ await context.route_web_socket("**/*", self._guard_shared_context_websocket)
2577
+ self._client_contexts[client_uuid] = ClientContextEntry(
2578
+ context=context,
2579
+ last_used=time.monotonic(),
2580
+ )
2581
+ return context
2582
+
2583
+ def touch_context(self, client_uuid: str) -> None:
2584
+ cached = self._client_contexts.get(client_uuid)
2585
+ if cached is not None:
2586
+ cached.last_used = time.monotonic()
2587
+
2588
+ async def create_session(
2589
+ self,
2590
+ raw_url: str,
2591
+ width: int,
2592
+ height: int,
2593
+ client_uuid: str,
2594
+ lock_url: str | None = None,
2595
+ is_mobile: bool = False,
2596
+ device_scale_factor: float = 1.0,
2597
+ ) -> BrowserSession:
2598
+ if self._stopping:
2599
+ raise RuntimeError("Browser manager is shutting down.")
2600
+ previous_session_id = self._client_sessions.get(client_uuid)
2601
+ if previous_session_id is not None:
2602
+ await self.close_session(previous_session_id, close_context=False)
2603
+ session_id = uuid4().hex
2604
+ session = BrowserSession(self, session_id, client_uuid, lock_url)
2605
+ self._sessions[session_id] = session
2606
+ self._client_sessions[client_uuid] = session_id
2607
+ try:
2608
+ await session.start(raw_url, width, height, is_mobile, device_scale_factor)
2609
+ except Exception:
2610
+ self._sessions.pop(session_id, None)
2611
+ if self._client_sessions.get(client_uuid) == session_id:
2612
+ self._client_sessions.pop(client_uuid, None)
2613
+ await session.close()
2614
+ await self.close_client_context(client_uuid)
2615
+ raise
2616
+ return session
2617
+
2618
+ async def _guard_shared_context_route(self, route: Route) -> None:
2619
+ try:
2620
+ await self._shared_context_policy.ensure_request_url_allowed(route.request.url)
2621
+ await route.continue_()
2622
+ except URLPolicyError:
2623
+ await route.abort()
2624
+
2625
+ async def _guard_shared_context_websocket(self, websocket: WebSocketRoute) -> None:
2626
+ try:
2627
+ await self._shared_context_policy.ensure_request_url_allowed(websocket.url)
2628
+ websocket.connect_to_server()
2629
+ except URLPolicyError as exc:
2630
+ await websocket.close(code=1008, reason=str(exc)[:120])
2631
+
2632
+ async def _handle_audio_bridge(self, client_uuid: str, message: Any) -> None:
2633
+ session = self.get_session_for_client(client_uuid)
2634
+ if session is None:
2635
+ return
2636
+ await session.handle_audio_bridge_message(message)
2637
+
2638
+ def get_session(self, session_id: str) -> BrowserSession | None:
2639
+ return self._sessions.get(session_id)
2640
+
2641
+ def get_session_for_client(self, client_uuid: str) -> BrowserSession | None:
2642
+ session_id = self._client_sessions.get(client_uuid)
2643
+ if session_id is None:
2644
+ return None
2645
+ session = self._sessions.get(session_id)
2646
+ if session is None:
2647
+ self._client_sessions.pop(client_uuid, None)
2648
+ return session
2649
+
2650
+ async def close_session(self, session_id: str, *, close_context: bool = True) -> None:
2651
+ session = self._sessions.pop(session_id, None)
2652
+ if session is not None:
2653
+ client_uuid = session.client_uuid
2654
+ if self._client_sessions.get(session.client_uuid) == session_id:
2655
+ self._client_sessions.pop(session.client_uuid, None)
2656
+ await session.close()
2657
+ if close_context and client_uuid not in self._client_sessions:
2658
+ await self.close_client_context(client_uuid)
2659
+
2660
+ async def _cleanup_loop(self) -> None:
2661
+ while True:
2662
+ await asyncio.sleep(30)
2663
+ now = time.monotonic()
2664
+ expired = [
2665
+ session_id
2666
+ for session_id, session in self._sessions.items()
2667
+ if (
2668
+ not session.is_connected
2669
+ and session.disconnected_at > 0
2670
+ and now - session.disconnected_at > self.settings.session_ttl_seconds
2671
+ )
2672
+ ]
2673
+ for session_id in expired:
2674
+ await self.close_session(session_id)
2675
+ await self._cleanup_idle_client_contexts(now)
2676
+
2677
+ async def _cleanup_idle_client_contexts(self, now: float) -> None:
2678
+ ttl = self.settings.session_ttl_seconds
2679
+ async with self._client_contexts_lock:
2680
+ expired = [
2681
+ client_uuid
2682
+ for client_uuid, entry in self._client_contexts.items()
2683
+ if client_uuid not in self._client_sessions and now - entry.last_used > ttl
2684
+ ]
2685
+ for client_uuid in expired:
2686
+ await self.close_client_context(client_uuid)
2687
+
2688
+ async def close_client_context(self, client_uuid: str) -> None:
2689
+ async with self._client_contexts_lock:
2690
+ entry = self._client_contexts.pop(client_uuid, None)
2691
+ if entry is not None:
2692
+ await _ignore_shutdown_disconnect(entry.context.close())
2693
+ await self.delete_client_files(client_uuid)
2694
+
2695
+ async def _close_all_client_contexts(self) -> None:
2696
+ async with self._client_contexts_lock:
2697
+ items = list(self._client_contexts.items())
2698
+ self._client_contexts.clear()
2699
+ for client_uuid, entry in items:
2700
+ await _ignore_shutdown_disconnect(entry.context.close())
2701
+ await self.delete_client_files(client_uuid)
2702
+
2703
+ def client_downloads_dir(self, client_uuid: str) -> Path:
2704
+ return self.settings.downloads_dir / client_uuid
2705
+
2706
+ def client_uploads_dir(self, client_uuid: str) -> Path:
2707
+ return self.settings.uploads_dir / client_uuid
2708
+
2709
+ async def delete_client_files(self, client_uuid: str) -> None:
2710
+ roots = [self.client_downloads_dir(client_uuid), self.client_uploads_dir(client_uuid)]
2711
+ for root in roots:
2712
+ try:
2713
+ resolved = root.resolve()
2714
+ except OSError:
2715
+ continue
2716
+ allowed_roots = [
2717
+ self.settings.downloads_dir.resolve(),
2718
+ self.settings.uploads_dir.resolve(),
2719
+ ]
2720
+ if not any(resolved == allowed or allowed in resolved.parents for allowed in allowed_roots):
2721
+ logger.warning("Refusing to delete unexpected client data path: %s", resolved)
2722
+ continue
2723
+ await asyncio.to_thread(shutil.rmtree, resolved, ignore_errors=True)
2724
+
2725
+
2726
+ async def _ignore_shutdown_disconnect(awaitable: Any) -> None:
2727
+ try:
2728
+ await awaitable
2729
+ except asyncio.CancelledError:
2730
+ raise
2731
+ except Exception as exc:
2732
+ if not _is_shutdown_disconnect(exc) and not _is_websocket_disconnect(exc):
2733
+ raise
2734
+
2735
+
2736
+ def _is_shutdown_disconnect(exc: Exception) -> bool:
2737
+ message = str(exc).lower()
2738
+ return (
2739
+ "connection closed" in message
2740
+ or "target page, context or browser has been closed" in message
2741
+ or "browser has been closed" in message
2742
+ or "playwright connection closed" in message
2743
+ )
2744
+
2745
+
2746
+ def _is_websocket_disconnect(exc: Exception) -> bool:
2747
+ if isinstance(exc, WebSocketDisconnect):
2748
+ return True
2749
+ if not isinstance(exc, RuntimeError):
2750
+ return False
2751
+ message = str(exc).lower()
2752
+ return (
2753
+ "websocket is not connected" in message
2754
+ or "need to call \"accept\" first" in message
2755
+ or "cannot call \"send\" once a close message has been sent" in message
2756
+ or ("unexpected asgi message" in message and "websocket" in message)
2757
+ )