vibesurf 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vibesurf might be problematic. Click here for more details.
- vibe_surf/_version.py +2 -2
- vibe_surf/agents/browser_use_agent.py +48 -154
- vibe_surf/agents/vibe_surf_agent.py +10 -9
- vibe_surf/backend/shared_state.py +1 -1
- vibe_surf/backend/utils/encryption.py +5 -35
- vibe_surf/browser/agen_browser_profile.py +3 -4
- vibe_surf/browser/agent_browser_session.py +115 -52
- vibe_surf/browser/browser_manager.py +2 -2
- vibe_surf/browser/utils.py +8 -15
- vibe_surf/browser/watchdogs/action_watchdog.py +8 -194
- vibe_surf/browser/watchdogs/dom_watchdog.py +3 -675
- vibe_surf/controller/mcp_client.py +0 -4
- vibe_surf/controller/{vibesurf_controller.py → vibesurf_tools.py} +13 -48
- {vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/METADATA +8 -3
- {vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/RECORD +19 -19
- {vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/WHEEL +0 -0
- {vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/entry_points.txt +0 -0
- {vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""DOM watchdog for browser DOM tree management using CDP."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import pdb
|
|
4
5
|
import time
|
|
5
6
|
from typing import TYPE_CHECKING
|
|
6
7
|
|
|
@@ -11,6 +12,7 @@ from browser_use.browser.events import (
|
|
|
11
12
|
TabCreatedEvent,
|
|
12
13
|
)
|
|
13
14
|
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
15
|
+
from browser_use.browser.watchdogs.dom_watchdog import DOMWatchdog
|
|
14
16
|
from browser_use.dom.service import DomService
|
|
15
17
|
from browser_use.dom.views import (
|
|
16
18
|
EnhancedDOMTreeNode,
|
|
@@ -21,120 +23,7 @@ if TYPE_CHECKING:
|
|
|
21
23
|
from browser_use.browser.views import BrowserStateSummary, PageInfo
|
|
22
24
|
|
|
23
25
|
|
|
24
|
-
class CustomDOMWatchdog(
|
|
25
|
-
"""Handles DOM tree building, serialization, and element access via CDP.
|
|
26
|
-
|
|
27
|
-
This watchdog acts as a bridge between the event-driven browser session
|
|
28
|
-
and the DomService implementation, maintaining cached state and providing
|
|
29
|
-
helper methods for other watchdogs.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
LISTENS_TO = [TabCreatedEvent, BrowserStateRequestEvent]
|
|
33
|
-
EMITS = [BrowserErrorEvent]
|
|
34
|
-
|
|
35
|
-
# Public properties for other watchdogs
|
|
36
|
-
selector_map: dict[int, EnhancedDOMTreeNode] | None = None
|
|
37
|
-
current_dom_state: SerializedDOMState | None = None
|
|
38
|
-
enhanced_dom_tree: EnhancedDOMTreeNode | None = None
|
|
39
|
-
|
|
40
|
-
# Internal DOM service
|
|
41
|
-
_dom_service: DomService | None = None
|
|
42
|
-
|
|
43
|
-
async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
|
|
44
|
-
# self.logger.debug('Setting up init scripts in browser')
|
|
45
|
-
|
|
46
|
-
self.logger.debug('💉 Injecting DOM Service init script to track event listeners added to DOM elements by JS...')
|
|
47
|
-
|
|
48
|
-
init_script = """
|
|
49
|
-
// check to make sure we're not inside the PDF viewer
|
|
50
|
-
window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]')
|
|
51
|
-
if (!window.isPdfViewer) {
|
|
52
|
-
|
|
53
|
-
// Permissions
|
|
54
|
-
const originalQuery = window.navigator.permissions.query;
|
|
55
|
-
window.navigator.permissions.query = (parameters) => (
|
|
56
|
-
parameters.name === 'notifications' ?
|
|
57
|
-
Promise.resolve({ state: Notification.permission }) :
|
|
58
|
-
originalQuery(parameters)
|
|
59
|
-
);
|
|
60
|
-
(() => {
|
|
61
|
-
if (window._eventListenerTrackerInitialized) return;
|
|
62
|
-
window._eventListenerTrackerInitialized = true;
|
|
63
|
-
|
|
64
|
-
const originalAddEventListener = EventTarget.prototype.addEventListener;
|
|
65
|
-
const eventListenersMap = new WeakMap();
|
|
66
|
-
|
|
67
|
-
EventTarget.prototype.addEventListener = function(type, listener, options) {
|
|
68
|
-
if (typeof listener === "function") {
|
|
69
|
-
let listeners = eventListenersMap.get(this);
|
|
70
|
-
if (!listeners) {
|
|
71
|
-
listeners = [];
|
|
72
|
-
eventListenersMap.set(this, listeners);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
listeners.push({
|
|
76
|
-
type,
|
|
77
|
-
listener,
|
|
78
|
-
listenerPreview: listener.toString().slice(0, 100),
|
|
79
|
-
options
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return originalAddEventListener.call(this, type, listener, options);
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
window.getEventListenersForNode = (node) => {
|
|
87
|
-
const listeners = eventListenersMap.get(node) || [];
|
|
88
|
-
return listeners.map(({ type, listenerPreview, options }) => ({
|
|
89
|
-
type,
|
|
90
|
-
listenerPreview,
|
|
91
|
-
options
|
|
92
|
-
}));
|
|
93
|
-
};
|
|
94
|
-
})();
|
|
95
|
-
}
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
|
-
# Try to inject the script, but don't fail if the Page domain isn't ready yet
|
|
99
|
-
# This can happen when a new tab is created and the CDP session isn't fully attached
|
|
100
|
-
try:
|
|
101
|
-
await self.browser_session._cdp_add_init_script(init_script)
|
|
102
|
-
except Exception as e:
|
|
103
|
-
if "'Page.addScriptToEvaluateOnNewDocument' wasn't found" in str(e):
|
|
104
|
-
self.logger.debug(f'Page domain not ready for new tab, skipping init script injection: {e}')
|
|
105
|
-
# The script will be injected when the page actually navigates
|
|
106
|
-
else:
|
|
107
|
-
# Re-raise other errors
|
|
108
|
-
raise
|
|
109
|
-
|
|
110
|
-
def _get_recent_events_str(self, limit: int = 10) -> str | None:
|
|
111
|
-
"""Get the most recent event names from the event bus as CSV.
|
|
112
|
-
|
|
113
|
-
Args:
|
|
114
|
-
limit: Maximum number of recent events to include
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
117
|
-
CSV string of recent event names or None if not available
|
|
118
|
-
"""
|
|
119
|
-
try:
|
|
120
|
-
# Get all events from history, sorted by creation time (most recent first)
|
|
121
|
-
all_events = sorted(
|
|
122
|
-
self.browser_session.event_bus.event_history.values(), key=lambda e: e.event_created_at.timestamp(),
|
|
123
|
-
reverse=True
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# Take the most recent events and get their names
|
|
127
|
-
recent_event_names = [event.event_type for event in all_events[:limit]]
|
|
128
|
-
# TODO: in the future dump these as JSON instead of a CSV of the event names only
|
|
129
|
-
# some_event.model_dump(mode='json', exclude=some_event.event_builtin_fields)
|
|
130
|
-
# include event_results summarized / truncated to some reasonable length
|
|
131
|
-
|
|
132
|
-
if recent_event_names:
|
|
133
|
-
return ', '.join(recent_event_names)
|
|
134
|
-
except Exception as e:
|
|
135
|
-
self.logger.debug(f'Failed to get recent events: {e}')
|
|
136
|
-
|
|
137
|
-
return None
|
|
26
|
+
class CustomDOMWatchdog(DOMWatchdog):
|
|
138
27
|
|
|
139
28
|
async def get_browser_state_no_event_bus(self, include_dom: bool = True,
|
|
140
29
|
include_screenshot: bool = True,
|
|
@@ -391,564 +280,3 @@ class CustomDOMWatchdog(BaseWatchdog):
|
|
|
391
280
|
recent_events=None,
|
|
392
281
|
)
|
|
393
282
|
|
|
394
|
-
async def on_BrowserStateRequestEvent(self, event: BrowserStateRequestEvent) -> 'BrowserStateSummary':
|
|
395
|
-
"""Handle browser state request by coordinating DOM building and screenshot capture.
|
|
396
|
-
|
|
397
|
-
This is the main entry point for getting the complete browser state.
|
|
398
|
-
|
|
399
|
-
Args:
|
|
400
|
-
event: The browser state request event with options
|
|
401
|
-
|
|
402
|
-
Returns:
|
|
403
|
-
Complete BrowserStateSummary with DOM, screenshot, and target info
|
|
404
|
-
"""
|
|
405
|
-
from browser_use.browser.views import BrowserStateSummary, PageInfo
|
|
406
|
-
|
|
407
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: STARTING browser state request')
|
|
408
|
-
page_url = await self.browser_session.get_current_page_url()
|
|
409
|
-
self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page URL: {page_url}')
|
|
410
|
-
if self.browser_session.agent_focus:
|
|
411
|
-
self.logger.debug(
|
|
412
|
-
f'📍 Current page URL: {page_url}, target_id: {self.browser_session.agent_focus.target_id}, session_id: {self.browser_session.agent_focus.session_id}'
|
|
413
|
-
)
|
|
414
|
-
else:
|
|
415
|
-
self.logger.debug(f'📍 Current page URL: {page_url}, no cdp_session attached')
|
|
416
|
-
|
|
417
|
-
# check if we should skip DOM tree build for pointless pages
|
|
418
|
-
not_a_meaningful_website = page_url.lower().split(':', 1)[0] not in ('http', 'https')
|
|
419
|
-
|
|
420
|
-
# Wait for page stability using browser profile settings (main branch pattern)
|
|
421
|
-
if not not_a_meaningful_website:
|
|
422
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ⏳ Waiting for page stability...')
|
|
423
|
-
try:
|
|
424
|
-
await self._wait_for_stable_network()
|
|
425
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Page stability complete')
|
|
426
|
-
except Exception as e:
|
|
427
|
-
self.logger.warning(
|
|
428
|
-
f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Network waiting failed: {e}, continuing anyway...'
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
# Get tabs info once at the beginning for all paths
|
|
432
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting tabs info...')
|
|
433
|
-
tabs_info = await self.browser_session.get_tabs()
|
|
434
|
-
self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got {len(tabs_info)} tabs')
|
|
435
|
-
self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Tabs info: {tabs_info}')
|
|
436
|
-
|
|
437
|
-
# Get viewport / scroll position info, remember changing scroll position should invalidate selector_map cache because it only includes visible elements
|
|
438
|
-
# cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
|
|
439
|
-
# scroll_info = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
440
|
-
# params={'expression': 'JSON.stringify({y: document.body.scrollTop, x: document.body.scrollLeft, width: document.documentElement.clientWidth, height: document.documentElement.clientHeight})'},
|
|
441
|
-
# session_id=cdp_session.session_id,
|
|
442
|
-
# )
|
|
443
|
-
# self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got scroll info: {scroll_info["result"]}')
|
|
444
|
-
|
|
445
|
-
try:
|
|
446
|
-
# Fast path for empty pages
|
|
447
|
-
if not_a_meaningful_website:
|
|
448
|
-
self.logger.debug(f'⚡ Skipping BuildDOMTree for empty target: {page_url}')
|
|
449
|
-
self.logger.info(f'📸 Not taking screenshot for empty page: {page_url} (non-http/https URL)')
|
|
450
|
-
|
|
451
|
-
# Create minimal DOM state
|
|
452
|
-
content = SerializedDOMState(_root=None, selector_map={})
|
|
453
|
-
|
|
454
|
-
# Skip screenshot for empty pages
|
|
455
|
-
screenshot_b64 = None
|
|
456
|
-
|
|
457
|
-
# Try to get page info from CDP, fall back to defaults if unavailable
|
|
458
|
-
try:
|
|
459
|
-
page_info = await self._get_page_info()
|
|
460
|
-
except Exception as e:
|
|
461
|
-
self.logger.debug(f'Failed to get page info from CDP for empty page: {e}, using fallback')
|
|
462
|
-
# Use default viewport dimensions
|
|
463
|
-
viewport = self.browser_session.browser_profile.viewport or {'width': 1280, 'height': 720}
|
|
464
|
-
page_info = PageInfo(
|
|
465
|
-
viewport_width=viewport['width'],
|
|
466
|
-
viewport_height=viewport['height'],
|
|
467
|
-
page_width=viewport['width'],
|
|
468
|
-
page_height=viewport['height'],
|
|
469
|
-
scroll_x=0,
|
|
470
|
-
scroll_y=0,
|
|
471
|
-
pixels_above=0,
|
|
472
|
-
pixels_below=0,
|
|
473
|
-
pixels_left=0,
|
|
474
|
-
pixels_right=0,
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
return BrowserStateSummary(
|
|
478
|
-
dom_state=content,
|
|
479
|
-
url=page_url,
|
|
480
|
-
title='Empty Tab',
|
|
481
|
-
tabs=tabs_info,
|
|
482
|
-
screenshot=screenshot_b64,
|
|
483
|
-
page_info=page_info,
|
|
484
|
-
pixels_above=0,
|
|
485
|
-
pixels_below=0,
|
|
486
|
-
browser_errors=[],
|
|
487
|
-
is_pdf_viewer=False,
|
|
488
|
-
recent_events=self._get_recent_events_str() if event.include_recent_events else None,
|
|
489
|
-
)
|
|
490
|
-
|
|
491
|
-
# Execute DOM building and screenshot capture in parallel
|
|
492
|
-
dom_task = None
|
|
493
|
-
screenshot_task = None
|
|
494
|
-
|
|
495
|
-
# Start DOM building task if requested
|
|
496
|
-
if event.include_dom:
|
|
497
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🌳 Starting DOM tree build task...')
|
|
498
|
-
|
|
499
|
-
previous_state = (
|
|
500
|
-
self.browser_session._cached_browser_state_summary.dom_state
|
|
501
|
-
if self.browser_session._cached_browser_state_summary
|
|
502
|
-
else None
|
|
503
|
-
)
|
|
504
|
-
|
|
505
|
-
dom_task = asyncio.create_task(self._build_dom_tree_without_highlights(previous_state))
|
|
506
|
-
|
|
507
|
-
# Start clean screenshot task if requested (without JS highlights)
|
|
508
|
-
if event.include_screenshot:
|
|
509
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Starting clean screenshot task...')
|
|
510
|
-
screenshot_task = asyncio.create_task(self._capture_clean_screenshot())
|
|
511
|
-
|
|
512
|
-
# Wait for both tasks to complete
|
|
513
|
-
content = None
|
|
514
|
-
screenshot_b64 = None
|
|
515
|
-
|
|
516
|
-
if dom_task:
|
|
517
|
-
try:
|
|
518
|
-
content = await dom_task
|
|
519
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ DOM tree build completed')
|
|
520
|
-
except Exception as e:
|
|
521
|
-
self.logger.warning(
|
|
522
|
-
f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: DOM build failed: {e}, using minimal state')
|
|
523
|
-
content = SerializedDOMState(_root=None, selector_map={})
|
|
524
|
-
else:
|
|
525
|
-
content = SerializedDOMState(_root=None, selector_map={})
|
|
526
|
-
|
|
527
|
-
if screenshot_task:
|
|
528
|
-
try:
|
|
529
|
-
screenshot_b64 = await screenshot_task
|
|
530
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Clean screenshot captured')
|
|
531
|
-
except Exception as e:
|
|
532
|
-
self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Clean screenshot failed: {e}')
|
|
533
|
-
screenshot_b64 = None
|
|
534
|
-
|
|
535
|
-
# Apply Python-based highlighting if both DOM and screenshot are available
|
|
536
|
-
if screenshot_b64 and content and content.selector_map and self.browser_session.browser_profile.highlight_elements:
|
|
537
|
-
try:
|
|
538
|
-
self.logger.debug(
|
|
539
|
-
'🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🎨 Applying Python-based highlighting...')
|
|
540
|
-
from vibe_surf.browser.utils import create_highlighted_screenshot_async
|
|
541
|
-
|
|
542
|
-
# Get CDP session for viewport info
|
|
543
|
-
cdp_session = await self.browser_session.get_or_create_cdp_session()
|
|
544
|
-
|
|
545
|
-
screenshot_b64 = await create_highlighted_screenshot_async(screenshot_b64, content.selector_map,
|
|
546
|
-
cdp_session)
|
|
547
|
-
self.logger.debug(
|
|
548
|
-
f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Applied highlights to {len(content.selector_map)} elements'
|
|
549
|
-
)
|
|
550
|
-
except Exception as e:
|
|
551
|
-
self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Python highlighting failed: {e}')
|
|
552
|
-
|
|
553
|
-
# Ensure we have valid content
|
|
554
|
-
if not content:
|
|
555
|
-
content = SerializedDOMState(_root=None, selector_map={})
|
|
556
|
-
|
|
557
|
-
# Tabs info already fetched at the beginning
|
|
558
|
-
|
|
559
|
-
# Get target title safely
|
|
560
|
-
try:
|
|
561
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page title...')
|
|
562
|
-
title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=2.0)
|
|
563
|
-
self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got title: {title}')
|
|
564
|
-
except Exception as e:
|
|
565
|
-
self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get title: {e}')
|
|
566
|
-
title = 'Page'
|
|
567
|
-
|
|
568
|
-
# Get comprehensive page info from CDP
|
|
569
|
-
try:
|
|
570
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page info from CDP...')
|
|
571
|
-
page_info = await self._get_page_info()
|
|
572
|
-
self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page info from CDP: {page_info}')
|
|
573
|
-
except Exception as e:
|
|
574
|
-
self.logger.debug(
|
|
575
|
-
f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get page info from CDP: {e}, using fallback'
|
|
576
|
-
)
|
|
577
|
-
# Fallback to default viewport dimensions
|
|
578
|
-
viewport = self.browser_session.browser_profile.viewport or {'width': 1280, 'height': 720}
|
|
579
|
-
page_info = PageInfo(
|
|
580
|
-
viewport_width=viewport['width'],
|
|
581
|
-
viewport_height=viewport['height'],
|
|
582
|
-
page_width=viewport['width'],
|
|
583
|
-
page_height=viewport['height'],
|
|
584
|
-
scroll_x=0,
|
|
585
|
-
scroll_y=0,
|
|
586
|
-
pixels_above=0,
|
|
587
|
-
pixels_below=0,
|
|
588
|
-
pixels_left=0,
|
|
589
|
-
pixels_right=0,
|
|
590
|
-
)
|
|
591
|
-
|
|
592
|
-
# Check for PDF viewer
|
|
593
|
-
is_pdf_viewer = page_url.endswith('.pdf') or '/pdf/' in page_url
|
|
594
|
-
|
|
595
|
-
# Build and cache the browser state summary
|
|
596
|
-
if screenshot_b64:
|
|
597
|
-
self.logger.debug(
|
|
598
|
-
f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Creating BrowserStateSummary with screenshot, length: {len(screenshot_b64)}'
|
|
599
|
-
)
|
|
600
|
-
else:
|
|
601
|
-
self.logger.debug(
|
|
602
|
-
'🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Creating BrowserStateSummary WITHOUT screenshot'
|
|
603
|
-
)
|
|
604
|
-
|
|
605
|
-
browser_state = BrowserStateSummary(
|
|
606
|
-
dom_state=content,
|
|
607
|
-
url=page_url,
|
|
608
|
-
title=title,
|
|
609
|
-
tabs=tabs_info,
|
|
610
|
-
screenshot=screenshot_b64,
|
|
611
|
-
page_info=page_info,
|
|
612
|
-
pixels_above=0,
|
|
613
|
-
pixels_below=0,
|
|
614
|
-
browser_errors=[],
|
|
615
|
-
is_pdf_viewer=is_pdf_viewer,
|
|
616
|
-
recent_events=self._get_recent_events_str() if event.include_recent_events else None,
|
|
617
|
-
)
|
|
618
|
-
|
|
619
|
-
# Cache the state
|
|
620
|
-
self.browser_session._cached_browser_state_summary = browser_state
|
|
621
|
-
|
|
622
|
-
self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ COMPLETED - Returning browser state')
|
|
623
|
-
return browser_state
|
|
624
|
-
|
|
625
|
-
except Exception as e:
|
|
626
|
-
self.logger.error(f'Failed to get browser state: {e}')
|
|
627
|
-
|
|
628
|
-
# Return minimal recovery state
|
|
629
|
-
return BrowserStateSummary(
|
|
630
|
-
dom_state=SerializedDOMState(_root=None, selector_map={}),
|
|
631
|
-
url=page_url if 'page_url' in locals() else '',
|
|
632
|
-
title='Error',
|
|
633
|
-
tabs=[],
|
|
634
|
-
screenshot=None,
|
|
635
|
-
page_info=PageInfo(
|
|
636
|
-
viewport_width=1280,
|
|
637
|
-
viewport_height=720,
|
|
638
|
-
page_width=1280,
|
|
639
|
-
page_height=720,
|
|
640
|
-
scroll_x=0,
|
|
641
|
-
scroll_y=0,
|
|
642
|
-
pixels_above=0,
|
|
643
|
-
pixels_below=0,
|
|
644
|
-
pixels_left=0,
|
|
645
|
-
pixels_right=0,
|
|
646
|
-
),
|
|
647
|
-
pixels_above=0,
|
|
648
|
-
pixels_below=0,
|
|
649
|
-
browser_errors=[str(e)],
|
|
650
|
-
is_pdf_viewer=False,
|
|
651
|
-
recent_events=None,
|
|
652
|
-
)
|
|
653
|
-
|
|
654
|
-
async def _build_dom_tree(self, previous_state: SerializedDOMState | None = None) -> SerializedDOMState:
|
|
655
|
-
"""Internal method to build and serialize DOM tree.
|
|
656
|
-
|
|
657
|
-
This is the actual implementation that does the work, called by both
|
|
658
|
-
on_BrowserStateRequestEvent.
|
|
659
|
-
|
|
660
|
-
Returns:
|
|
661
|
-
SerializedDOMState with serialized DOM and selector map
|
|
662
|
-
"""
|
|
663
|
-
try:
|
|
664
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: STARTING DOM tree build')
|
|
665
|
-
# Remove any existing highlights before building new DOM
|
|
666
|
-
try:
|
|
667
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Removing existing highlights...')
|
|
668
|
-
await self.browser_session.remove_highlights()
|
|
669
|
-
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ Highlights removed')
|
|
670
|
-
except Exception as e:
|
|
671
|
-
self.logger.debug(f'🔍 DOMWatchdog._build_dom_tree: Failed to remove existing highlights: {e}')
|
|
672
|
-
|
|
673
|
-
# Create or reuse DOM service
|
|
674
|
-
if self._dom_service is None:
|
|
675
|
-
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Creating DomService...')
|
|
676
|
-
self._dom_service = DomService(browser_session=self.browser_session, logger=self.logger)
|
|
677
|
-
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ DomService created')
|
|
678
|
-
# else:
|
|
679
|
-
# self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Reusing existing DomService')
|
|
680
|
-
|
|
681
|
-
# Get serialized DOM tree using the service
|
|
682
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Calling DomService.get_serialized_dom_tree...')
|
|
683
|
-
start = time.time()
|
|
684
|
-
self.current_dom_state, self.enhanced_dom_tree, timing_info = await self._dom_service.get_serialized_dom_tree(
|
|
685
|
-
previous_cached_state=previous_state,
|
|
686
|
-
)
|
|
687
|
-
end = time.time()
|
|
688
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ DomService.get_serialized_dom_tree completed')
|
|
689
|
-
|
|
690
|
-
self.logger.debug(f'Time taken to get DOM tree: {end - start} seconds')
|
|
691
|
-
self.logger.debug(f'Timing breakdown: {timing_info}')
|
|
692
|
-
|
|
693
|
-
# Update selector map for other watchdogs
|
|
694
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Updating selector maps...')
|
|
695
|
-
self.selector_map = self.current_dom_state.selector_map
|
|
696
|
-
# Update BrowserSession's cached selector map
|
|
697
|
-
if self.browser_session:
|
|
698
|
-
self.browser_session.update_cached_selector_map(self.selector_map)
|
|
699
|
-
self.logger.debug(
|
|
700
|
-
f'🔍 DOMWatchdog._build_dom_tree: ✅ Selector maps updated, {len(self.selector_map)} elements')
|
|
701
|
-
|
|
702
|
-
# Inject highlighting for visual feedback if we have elements
|
|
703
|
-
if self.selector_map and self._dom_service:
|
|
704
|
-
try:
|
|
705
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Injecting highlighting script...')
|
|
706
|
-
from browser_use.dom.debug.highlights import inject_highlighting_script
|
|
707
|
-
|
|
708
|
-
await inject_highlighting_script(self._dom_service, self.selector_map)
|
|
709
|
-
self.logger.debug(
|
|
710
|
-
f'🔍 DOMWatchdog._build_dom_tree: ✅ Injected highlighting for {len(self.selector_map)} elements'
|
|
711
|
-
)
|
|
712
|
-
except Exception as e:
|
|
713
|
-
self.logger.debug(f'🔍 DOMWatchdog._build_dom_tree: Failed to inject highlighting: {e}')
|
|
714
|
-
|
|
715
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ COMPLETED DOM tree build')
|
|
716
|
-
return self.current_dom_state
|
|
717
|
-
|
|
718
|
-
except Exception as e:
|
|
719
|
-
self.logger.error(f'Failed to build DOM tree: {e}')
|
|
720
|
-
self.event_bus.dispatch(
|
|
721
|
-
BrowserErrorEvent(
|
|
722
|
-
error_type='DOMBuildFailed',
|
|
723
|
-
message=str(e),
|
|
724
|
-
)
|
|
725
|
-
)
|
|
726
|
-
raise
|
|
727
|
-
|
|
728
|
-
async def _build_dom_tree_without_highlights(self,
|
|
729
|
-
previous_state: SerializedDOMState | None = None) -> SerializedDOMState:
|
|
730
|
-
"""Build DOM tree without injecting JavaScript highlights (for parallel execution)."""
|
|
731
|
-
try:
|
|
732
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: STARTING DOM tree build')
|
|
733
|
-
|
|
734
|
-
# Create or reuse DOM service
|
|
735
|
-
if self._dom_service is None:
|
|
736
|
-
self._dom_service = DomService(browser_session=self.browser_session, logger=self.logger)
|
|
737
|
-
|
|
738
|
-
# Get serialized DOM tree using the service
|
|
739
|
-
self.logger.debug(
|
|
740
|
-
'🔍 DOMWatchdog._build_dom_tree_without_highlights: Calling DomService.get_serialized_dom_tree...')
|
|
741
|
-
start = time.time()
|
|
742
|
-
self.current_dom_state, self.enhanced_dom_tree, timing_info = await self._dom_service.get_serialized_dom_tree(
|
|
743
|
-
previous_cached_state=previous_state,
|
|
744
|
-
)
|
|
745
|
-
end = time.time()
|
|
746
|
-
self.logger.debug(
|
|
747
|
-
'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ DomService.get_serialized_dom_tree completed'
|
|
748
|
-
)
|
|
749
|
-
|
|
750
|
-
self.logger.debug(f'Time taken to get DOM tree: {end - start} seconds')
|
|
751
|
-
self.logger.debug(f'Timing breakdown: {timing_info}')
|
|
752
|
-
|
|
753
|
-
# Update selector map for other watchdogs
|
|
754
|
-
self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: Updating selector maps...')
|
|
755
|
-
self.selector_map = self.current_dom_state.selector_map
|
|
756
|
-
# Update BrowserSession's cached selector map
|
|
757
|
-
if self.browser_session:
|
|
758
|
-
self.browser_session.update_cached_selector_map(self.selector_map)
|
|
759
|
-
self.logger.debug(
|
|
760
|
-
f'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ Selector maps updated, {len(self.selector_map)} elements'
|
|
761
|
-
)
|
|
762
|
-
|
|
763
|
-
# Skip JavaScript highlighting injection - Python highlighting will be applied later
|
|
764
|
-
self.logger.debug(
|
|
765
|
-
'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ COMPLETED DOM tree build (no JS highlights)')
|
|
766
|
-
return self.current_dom_state
|
|
767
|
-
|
|
768
|
-
except Exception as e:
|
|
769
|
-
self.logger.error(f'Failed to build DOM tree without highlights: {e}')
|
|
770
|
-
self.event_bus.dispatch(
|
|
771
|
-
BrowserErrorEvent(
|
|
772
|
-
error_type='DOMBuildFailed',
|
|
773
|
-
message=str(e),
|
|
774
|
-
)
|
|
775
|
-
)
|
|
776
|
-
raise
|
|
777
|
-
|
|
778
|
-
async def _capture_clean_screenshot(self) -> str:
|
|
779
|
-
"""Capture a clean screenshot without JavaScript highlights."""
|
|
780
|
-
try:
|
|
781
|
-
self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: Capturing clean screenshot...')
|
|
782
|
-
|
|
783
|
-
# Ensure we have a focused CDP session
|
|
784
|
-
assert self.browser_session.agent_focus is not None, 'No current target ID'
|
|
785
|
-
await self.browser_session.get_or_create_cdp_session(target_id=self.browser_session.agent_focus.target_id,
|
|
786
|
-
focus=True)
|
|
787
|
-
|
|
788
|
-
# Check if handler is registered
|
|
789
|
-
handlers = self.event_bus.handlers.get('ScreenshotEvent', [])
|
|
790
|
-
handler_names = [getattr(h, '__name__', str(h)) for h in handlers]
|
|
791
|
-
self.logger.debug(f'📸 ScreenshotEvent handlers registered: {len(handlers)} - {handler_names}')
|
|
792
|
-
|
|
793
|
-
screenshot_event = self.event_bus.dispatch(ScreenshotEvent(full_page=False))
|
|
794
|
-
self.logger.debug('📸 Dispatched ScreenshotEvent, waiting for event to complete...')
|
|
795
|
-
|
|
796
|
-
# Wait for the event itself to complete (this waits for all handlers)
|
|
797
|
-
await screenshot_event
|
|
798
|
-
|
|
799
|
-
# Get the single handler result
|
|
800
|
-
screenshot_b64 = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True)
|
|
801
|
-
if screenshot_b64 is None:
|
|
802
|
-
raise RuntimeError('Screenshot handler returned None')
|
|
803
|
-
self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: ✅ Clean screenshot captured successfully')
|
|
804
|
-
return str(screenshot_b64)
|
|
805
|
-
|
|
806
|
-
except TimeoutError:
|
|
807
|
-
self.logger.warning('📸 Clean screenshot timed out after 6 seconds - no handler registered or slow page?')
|
|
808
|
-
raise
|
|
809
|
-
except Exception as e:
|
|
810
|
-
self.logger.warning(f'📸 Clean screenshot failed: {type(e).__name__}: {e}')
|
|
811
|
-
raise
|
|
812
|
-
|
|
813
|
-
async def _wait_for_stable_network(self):
|
|
814
|
-
"""Wait for page stability - simplified for CDP-only branch."""
|
|
815
|
-
start_time = time.time()
|
|
816
|
-
|
|
817
|
-
# Apply minimum wait time first (let page settle)
|
|
818
|
-
min_wait = self.browser_session.browser_profile.minimum_wait_page_load_time
|
|
819
|
-
if min_wait > 0:
|
|
820
|
-
self.logger.debug(f'⏳ Minimum wait: {min_wait}s')
|
|
821
|
-
await asyncio.sleep(min_wait)
|
|
822
|
-
|
|
823
|
-
# Apply network idle wait time (for dynamic content like iframes)
|
|
824
|
-
network_idle_wait = self.browser_session.browser_profile.wait_for_network_idle_page_load_time
|
|
825
|
-
if network_idle_wait > 0:
|
|
826
|
-
self.logger.debug(f'⏳ Network idle wait: {network_idle_wait}s')
|
|
827
|
-
await asyncio.sleep(network_idle_wait)
|
|
828
|
-
|
|
829
|
-
elapsed = time.time() - start_time
|
|
830
|
-
self.logger.debug(f'✅ Page stability wait completed in {elapsed:.2f}s')
|
|
831
|
-
|
|
832
|
-
async def _get_page_info(self) -> 'PageInfo':
|
|
833
|
-
"""Get comprehensive page information using a single CDP call.
|
|
834
|
-
|
|
835
|
-
TODO: should we make this an event as well?
|
|
836
|
-
|
|
837
|
-
Returns:
|
|
838
|
-
PageInfo with all viewport, page dimensions, and scroll information
|
|
839
|
-
"""
|
|
840
|
-
|
|
841
|
-
from browser_use.browser.views import PageInfo
|
|
842
|
-
|
|
843
|
-
# Get CDP session for the current target
|
|
844
|
-
if not self.browser_session.agent_focus:
|
|
845
|
-
raise RuntimeError('No active CDP session - browser may not be connected yet')
|
|
846
|
-
|
|
847
|
-
cdp_session = await self.browser_session.get_or_create_cdp_session(
|
|
848
|
-
target_id=self.browser_session.agent_focus.target_id, focus=True
|
|
849
|
-
)
|
|
850
|
-
|
|
851
|
-
# Get layout metrics which includes all the information we need
|
|
852
|
-
metrics = await asyncio.wait_for(
|
|
853
|
-
cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id), timeout=10.0
|
|
854
|
-
)
|
|
855
|
-
|
|
856
|
-
# Extract different viewport types
|
|
857
|
-
layout_viewport = metrics.get('layoutViewport', {})
|
|
858
|
-
visual_viewport = metrics.get('visualViewport', {})
|
|
859
|
-
css_visual_viewport = metrics.get('cssVisualViewport', {})
|
|
860
|
-
css_layout_viewport = metrics.get('cssLayoutViewport', {})
|
|
861
|
-
content_size = metrics.get('contentSize', {})
|
|
862
|
-
|
|
863
|
-
# Calculate device pixel ratio to convert between device pixels and CSS pixels
|
|
864
|
-
# This matches the approach in dom/service.py _get_viewport_ratio method
|
|
865
|
-
css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
|
|
866
|
-
device_width = visual_viewport.get('clientWidth', css_width)
|
|
867
|
-
device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
|
|
868
|
-
|
|
869
|
-
# For viewport dimensions, use CSS pixels (what JavaScript sees)
|
|
870
|
-
# Prioritize CSS layout viewport, then fall back to layout viewport
|
|
871
|
-
viewport_width = int(css_layout_viewport.get('clientWidth') or layout_viewport.get('clientWidth', 1280))
|
|
872
|
-
viewport_height = int(css_layout_viewport.get('clientHeight') or layout_viewport.get('clientHeight', 720))
|
|
873
|
-
|
|
874
|
-
# For total page dimensions, content size is typically in device pixels, so convert to CSS pixels
|
|
875
|
-
# by dividing by device pixel ratio
|
|
876
|
-
raw_page_width = content_size.get('width', viewport_width * device_pixel_ratio)
|
|
877
|
-
raw_page_height = content_size.get('height', viewport_height * device_pixel_ratio)
|
|
878
|
-
page_width = int(raw_page_width / device_pixel_ratio)
|
|
879
|
-
page_height = int(raw_page_height / device_pixel_ratio)
|
|
880
|
-
|
|
881
|
-
# For scroll position, use CSS visual viewport if available, otherwise CSS layout viewport
|
|
882
|
-
# These should already be in CSS pixels
|
|
883
|
-
scroll_x = int(css_visual_viewport.get('pageX') or css_layout_viewport.get('pageX', 0))
|
|
884
|
-
scroll_y = int(css_visual_viewport.get('pageY') or css_layout_viewport.get('pageY', 0))
|
|
885
|
-
|
|
886
|
-
# Calculate scroll information - pixels that are above/below/left/right of current viewport
|
|
887
|
-
pixels_above = scroll_y
|
|
888
|
-
pixels_below = max(0, page_height - viewport_height - scroll_y)
|
|
889
|
-
pixels_left = scroll_x
|
|
890
|
-
pixels_right = max(0, page_width - viewport_width - scroll_x)
|
|
891
|
-
|
|
892
|
-
page_info = PageInfo(
|
|
893
|
-
viewport_width=viewport_width,
|
|
894
|
-
viewport_height=viewport_height,
|
|
895
|
-
page_width=page_width,
|
|
896
|
-
page_height=page_height,
|
|
897
|
-
scroll_x=scroll_x,
|
|
898
|
-
scroll_y=scroll_y,
|
|
899
|
-
pixels_above=pixels_above,
|
|
900
|
-
pixels_below=pixels_below,
|
|
901
|
-
pixels_left=pixels_left,
|
|
902
|
-
pixels_right=pixels_right,
|
|
903
|
-
)
|
|
904
|
-
|
|
905
|
-
return page_info
|
|
906
|
-
|
|
907
|
-
# ========== Public Helper Methods ==========
|
|
908
|
-
|
|
909
|
-
async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
|
|
910
|
-
"""Get DOM element by index from cached selector map.
|
|
911
|
-
|
|
912
|
-
Builds DOM if not cached.
|
|
913
|
-
|
|
914
|
-
Returns:
|
|
915
|
-
EnhancedDOMTreeNode or None if index not found
|
|
916
|
-
"""
|
|
917
|
-
if not self.selector_map:
|
|
918
|
-
# Build DOM if not cached
|
|
919
|
-
await self._build_dom_tree()
|
|
920
|
-
|
|
921
|
-
return self.selector_map.get(index) if self.selector_map else None
|
|
922
|
-
|
|
923
|
-
def clear_cache(self) -> None:
|
|
924
|
-
"""Clear cached DOM state to force rebuild on next access."""
|
|
925
|
-
self.selector_map = None
|
|
926
|
-
self.current_dom_state = None
|
|
927
|
-
self.enhanced_dom_tree = None
|
|
928
|
-
|
|
929
|
-
# Keep the DOM service instance to reuse its CDP client connection
|
|
930
|
-
|
|
931
|
-
def is_file_input(self, element: EnhancedDOMTreeNode) -> bool:
|
|
932
|
-
"""Check if element is a file input."""
|
|
933
|
-
return element.node_name.upper() == 'INPUT' and element.attributes.get('type', '').lower() == 'file'
|
|
934
|
-
|
|
935
|
-
@staticmethod
|
|
936
|
-
def is_element_visible_according_to_all_parents(node: EnhancedDOMTreeNode,
|
|
937
|
-
html_frames: list[EnhancedDOMTreeNode]) -> bool:
|
|
938
|
-
"""Check if the element is visible according to all its parent HTML frames.
|
|
939
|
-
|
|
940
|
-
Delegates to the DomService static method.
|
|
941
|
-
"""
|
|
942
|
-
return DomService.is_element_visible_according_to_all_parents(node, html_frames)
|
|
943
|
-
|
|
944
|
-
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
945
|
-
"""Clean up DOM service on exit."""
|
|
946
|
-
if self._dom_service:
|
|
947
|
-
await self._dom_service.__aexit__(exc_type, exc_value, traceback)
|
|
948
|
-
self._dom_service = None
|
|
949
|
-
|
|
950
|
-
def __del__(self):
|
|
951
|
-
"""Clean up DOM service on deletion."""
|
|
952
|
-
super().__del__()
|
|
953
|
-
# DOM service will clean up its own CDP client
|
|
954
|
-
self._dom_service = None
|