vibesurf 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

@@ -1,6 +1,7 @@
1
1
  """DOM watchdog for browser DOM tree management using CDP."""
2
2
 
3
3
  import asyncio
4
+ import pdb
4
5
  import time
5
6
  from typing import TYPE_CHECKING
6
7
 
@@ -11,6 +12,7 @@ from browser_use.browser.events import (
11
12
  TabCreatedEvent,
12
13
  )
13
14
  from browser_use.browser.watchdog_base import BaseWatchdog
15
+ from browser_use.browser.watchdogs.dom_watchdog import DOMWatchdog
14
16
  from browser_use.dom.service import DomService
15
17
  from browser_use.dom.views import (
16
18
  EnhancedDOMTreeNode,
@@ -21,120 +23,7 @@ if TYPE_CHECKING:
21
23
  from browser_use.browser.views import BrowserStateSummary, PageInfo
22
24
 
23
25
 
24
- class CustomDOMWatchdog(BaseWatchdog):
25
- """Handles DOM tree building, serialization, and element access via CDP.
26
-
27
- This watchdog acts as a bridge between the event-driven browser session
28
- and the DomService implementation, maintaining cached state and providing
29
- helper methods for other watchdogs.
30
- """
31
-
32
- LISTENS_TO = [TabCreatedEvent, BrowserStateRequestEvent]
33
- EMITS = [BrowserErrorEvent]
34
-
35
- # Public properties for other watchdogs
36
- selector_map: dict[int, EnhancedDOMTreeNode] | None = None
37
- current_dom_state: SerializedDOMState | None = None
38
- enhanced_dom_tree: EnhancedDOMTreeNode | None = None
39
-
40
- # Internal DOM service
41
- _dom_service: DomService | None = None
42
-
43
- async def on_TabCreatedEvent(self, event: TabCreatedEvent) -> None:
44
- # self.logger.debug('Setting up init scripts in browser')
45
-
46
- self.logger.debug('💉 Injecting DOM Service init script to track event listeners added to DOM elements by JS...')
47
-
48
- init_script = """
49
- // check to make sure we're not inside the PDF viewer
50
- window.isPdfViewer = !!document?.body?.querySelector('body > embed[type="application/pdf"][width="100%"]')
51
- if (!window.isPdfViewer) {
52
-
53
- // Permissions
54
- const originalQuery = window.navigator.permissions.query;
55
- window.navigator.permissions.query = (parameters) => (
56
- parameters.name === 'notifications' ?
57
- Promise.resolve({ state: Notification.permission }) :
58
- originalQuery(parameters)
59
- );
60
- (() => {
61
- if (window._eventListenerTrackerInitialized) return;
62
- window._eventListenerTrackerInitialized = true;
63
-
64
- const originalAddEventListener = EventTarget.prototype.addEventListener;
65
- const eventListenersMap = new WeakMap();
66
-
67
- EventTarget.prototype.addEventListener = function(type, listener, options) {
68
- if (typeof listener === "function") {
69
- let listeners = eventListenersMap.get(this);
70
- if (!listeners) {
71
- listeners = [];
72
- eventListenersMap.set(this, listeners);
73
- }
74
-
75
- listeners.push({
76
- type,
77
- listener,
78
- listenerPreview: listener.toString().slice(0, 100),
79
- options
80
- });
81
- }
82
-
83
- return originalAddEventListener.call(this, type, listener, options);
84
- };
85
-
86
- window.getEventListenersForNode = (node) => {
87
- const listeners = eventListenersMap.get(node) || [];
88
- return listeners.map(({ type, listenerPreview, options }) => ({
89
- type,
90
- listenerPreview,
91
- options
92
- }));
93
- };
94
- })();
95
- }
96
- """
97
-
98
- # Try to inject the script, but don't fail if the Page domain isn't ready yet
99
- # This can happen when a new tab is created and the CDP session isn't fully attached
100
- try:
101
- await self.browser_session._cdp_add_init_script(init_script)
102
- except Exception as e:
103
- if "'Page.addScriptToEvaluateOnNewDocument' wasn't found" in str(e):
104
- self.logger.debug(f'Page domain not ready for new tab, skipping init script injection: {e}')
105
- # The script will be injected when the page actually navigates
106
- else:
107
- # Re-raise other errors
108
- raise
109
-
110
- def _get_recent_events_str(self, limit: int = 10) -> str | None:
111
- """Get the most recent event names from the event bus as CSV.
112
-
113
- Args:
114
- limit: Maximum number of recent events to include
115
-
116
- Returns:
117
- CSV string of recent event names or None if not available
118
- """
119
- try:
120
- # Get all events from history, sorted by creation time (most recent first)
121
- all_events = sorted(
122
- self.browser_session.event_bus.event_history.values(), key=lambda e: e.event_created_at.timestamp(),
123
- reverse=True
124
- )
125
-
126
- # Take the most recent events and get their names
127
- recent_event_names = [event.event_type for event in all_events[:limit]]
128
- # TODO: in the future dump these as JSON instead of a CSV of the event names only
129
- # some_event.model_dump(mode='json', exclude=some_event.event_builtin_fields)
130
- # include event_results summarized / truncated to some reasonable length
131
-
132
- if recent_event_names:
133
- return ', '.join(recent_event_names)
134
- except Exception as e:
135
- self.logger.debug(f'Failed to get recent events: {e}')
136
-
137
- return None
26
+ class CustomDOMWatchdog(DOMWatchdog):
138
27
 
139
28
  async def get_browser_state_no_event_bus(self, include_dom: bool = True,
140
29
  include_screenshot: bool = True,
@@ -391,564 +280,3 @@ class CustomDOMWatchdog(BaseWatchdog):
391
280
  recent_events=None,
392
281
  )
393
282
 
394
- async def on_BrowserStateRequestEvent(self, event: BrowserStateRequestEvent) -> 'BrowserStateSummary':
395
- """Handle browser state request by coordinating DOM building and screenshot capture.
396
-
397
- This is the main entry point for getting the complete browser state.
398
-
399
- Args:
400
- event: The browser state request event with options
401
-
402
- Returns:
403
- Complete BrowserStateSummary with DOM, screenshot, and target info
404
- """
405
- from browser_use.browser.views import BrowserStateSummary, PageInfo
406
-
407
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: STARTING browser state request')
408
- page_url = await self.browser_session.get_current_page_url()
409
- self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page URL: {page_url}')
410
- if self.browser_session.agent_focus:
411
- self.logger.debug(
412
- f'📍 Current page URL: {page_url}, target_id: {self.browser_session.agent_focus.target_id}, session_id: {self.browser_session.agent_focus.session_id}'
413
- )
414
- else:
415
- self.logger.debug(f'📍 Current page URL: {page_url}, no cdp_session attached')
416
-
417
- # check if we should skip DOM tree build for pointless pages
418
- not_a_meaningful_website = page_url.lower().split(':', 1)[0] not in ('http', 'https')
419
-
420
- # Wait for page stability using browser profile settings (main branch pattern)
421
- if not not_a_meaningful_website:
422
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ⏳ Waiting for page stability...')
423
- try:
424
- await self._wait_for_stable_network()
425
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Page stability complete')
426
- except Exception as e:
427
- self.logger.warning(
428
- f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Network waiting failed: {e}, continuing anyway...'
429
- )
430
-
431
- # Get tabs info once at the beginning for all paths
432
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting tabs info...')
433
- tabs_info = await self.browser_session.get_tabs()
434
- self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got {len(tabs_info)} tabs')
435
- self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Tabs info: {tabs_info}')
436
-
437
- # Get viewport / scroll position info, remember changing scroll position should invalidate selector_map cache because it only includes visible elements
438
- # cdp_session = await self.browser_session.get_or_create_cdp_session(focus=True)
439
- # scroll_info = await cdp_session.cdp_client.send.Runtime.evaluate(
440
- # params={'expression': 'JSON.stringify({y: document.body.scrollTop, x: document.body.scrollLeft, width: document.documentElement.clientWidth, height: document.documentElement.clientHeight})'},
441
- # session_id=cdp_session.session_id,
442
- # )
443
- # self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got scroll info: {scroll_info["result"]}')
444
-
445
- try:
446
- # Fast path for empty pages
447
- if not_a_meaningful_website:
448
- self.logger.debug(f'⚡ Skipping BuildDOMTree for empty target: {page_url}')
449
- self.logger.info(f'📸 Not taking screenshot for empty page: {page_url} (non-http/https URL)')
450
-
451
- # Create minimal DOM state
452
- content = SerializedDOMState(_root=None, selector_map={})
453
-
454
- # Skip screenshot for empty pages
455
- screenshot_b64 = None
456
-
457
- # Try to get page info from CDP, fall back to defaults if unavailable
458
- try:
459
- page_info = await self._get_page_info()
460
- except Exception as e:
461
- self.logger.debug(f'Failed to get page info from CDP for empty page: {e}, using fallback')
462
- # Use default viewport dimensions
463
- viewport = self.browser_session.browser_profile.viewport or {'width': 1280, 'height': 720}
464
- page_info = PageInfo(
465
- viewport_width=viewport['width'],
466
- viewport_height=viewport['height'],
467
- page_width=viewport['width'],
468
- page_height=viewport['height'],
469
- scroll_x=0,
470
- scroll_y=0,
471
- pixels_above=0,
472
- pixels_below=0,
473
- pixels_left=0,
474
- pixels_right=0,
475
- )
476
-
477
- return BrowserStateSummary(
478
- dom_state=content,
479
- url=page_url,
480
- title='Empty Tab',
481
- tabs=tabs_info,
482
- screenshot=screenshot_b64,
483
- page_info=page_info,
484
- pixels_above=0,
485
- pixels_below=0,
486
- browser_errors=[],
487
- is_pdf_viewer=False,
488
- recent_events=self._get_recent_events_str() if event.include_recent_events else None,
489
- )
490
-
491
- # Execute DOM building and screenshot capture in parallel
492
- dom_task = None
493
- screenshot_task = None
494
-
495
- # Start DOM building task if requested
496
- if event.include_dom:
497
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🌳 Starting DOM tree build task...')
498
-
499
- previous_state = (
500
- self.browser_session._cached_browser_state_summary.dom_state
501
- if self.browser_session._cached_browser_state_summary
502
- else None
503
- )
504
-
505
- dom_task = asyncio.create_task(self._build_dom_tree_without_highlights(previous_state))
506
-
507
- # Start clean screenshot task if requested (without JS highlights)
508
- if event.include_screenshot:
509
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Starting clean screenshot task...')
510
- screenshot_task = asyncio.create_task(self._capture_clean_screenshot())
511
-
512
- # Wait for both tasks to complete
513
- content = None
514
- screenshot_b64 = None
515
-
516
- if dom_task:
517
- try:
518
- content = await dom_task
519
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ DOM tree build completed')
520
- except Exception as e:
521
- self.logger.warning(
522
- f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: DOM build failed: {e}, using minimal state')
523
- content = SerializedDOMState(_root=None, selector_map={})
524
- else:
525
- content = SerializedDOMState(_root=None, selector_map={})
526
-
527
- if screenshot_task:
528
- try:
529
- screenshot_b64 = await screenshot_task
530
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Clean screenshot captured')
531
- except Exception as e:
532
- self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Clean screenshot failed: {e}')
533
- screenshot_b64 = None
534
-
535
- # Apply Python-based highlighting if both DOM and screenshot are available
536
- if screenshot_b64 and content and content.selector_map and self.browser_session.browser_profile.highlight_elements:
537
- try:
538
- self.logger.debug(
539
- '🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🎨 Applying Python-based highlighting...')
540
- from vibe_surf.browser.utils import create_highlighted_screenshot_async
541
-
542
- # Get CDP session for viewport info
543
- cdp_session = await self.browser_session.get_or_create_cdp_session()
544
-
545
- screenshot_b64 = await create_highlighted_screenshot_async(screenshot_b64, content.selector_map,
546
- cdp_session)
547
- self.logger.debug(
548
- f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Applied highlights to {len(content.selector_map)} elements'
549
- )
550
- except Exception as e:
551
- self.logger.warning(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Python highlighting failed: {e}')
552
-
553
- # Ensure we have valid content
554
- if not content:
555
- content = SerializedDOMState(_root=None, selector_map={})
556
-
557
- # Tabs info already fetched at the beginning
558
-
559
- # Get target title safely
560
- try:
561
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page title...')
562
- title = await asyncio.wait_for(self.browser_session.get_current_page_title(), timeout=2.0)
563
- self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got title: {title}')
564
- except Exception as e:
565
- self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get title: {e}')
566
- title = 'Page'
567
-
568
- # Get comprehensive page info from CDP
569
- try:
570
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: Getting page info from CDP...')
571
- page_info = await self._get_page_info()
572
- self.logger.debug(f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Got page info from CDP: {page_info}')
573
- except Exception as e:
574
- self.logger.debug(
575
- f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Failed to get page info from CDP: {e}, using fallback'
576
- )
577
- # Fallback to default viewport dimensions
578
- viewport = self.browser_session.browser_profile.viewport or {'width': 1280, 'height': 720}
579
- page_info = PageInfo(
580
- viewport_width=viewport['width'],
581
- viewport_height=viewport['height'],
582
- page_width=viewport['width'],
583
- page_height=viewport['height'],
584
- scroll_x=0,
585
- scroll_y=0,
586
- pixels_above=0,
587
- pixels_below=0,
588
- pixels_left=0,
589
- pixels_right=0,
590
- )
591
-
592
- # Check for PDF viewer
593
- is_pdf_viewer = page_url.endswith('.pdf') or '/pdf/' in page_url
594
-
595
- # Build and cache the browser state summary
596
- if screenshot_b64:
597
- self.logger.debug(
598
- f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Creating BrowserStateSummary with screenshot, length: {len(screenshot_b64)}'
599
- )
600
- else:
601
- self.logger.debug(
602
- '🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Creating BrowserStateSummary WITHOUT screenshot'
603
- )
604
-
605
- browser_state = BrowserStateSummary(
606
- dom_state=content,
607
- url=page_url,
608
- title=title,
609
- tabs=tabs_info,
610
- screenshot=screenshot_b64,
611
- page_info=page_info,
612
- pixels_above=0,
613
- pixels_below=0,
614
- browser_errors=[],
615
- is_pdf_viewer=is_pdf_viewer,
616
- recent_events=self._get_recent_events_str() if event.include_recent_events else None,
617
- )
618
-
619
- # Cache the state
620
- self.browser_session._cached_browser_state_summary = browser_state
621
-
622
- self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ COMPLETED - Returning browser state')
623
- return browser_state
624
-
625
- except Exception as e:
626
- self.logger.error(f'Failed to get browser state: {e}')
627
-
628
- # Return minimal recovery state
629
- return BrowserStateSummary(
630
- dom_state=SerializedDOMState(_root=None, selector_map={}),
631
- url=page_url if 'page_url' in locals() else '',
632
- title='Error',
633
- tabs=[],
634
- screenshot=None,
635
- page_info=PageInfo(
636
- viewport_width=1280,
637
- viewport_height=720,
638
- page_width=1280,
639
- page_height=720,
640
- scroll_x=0,
641
- scroll_y=0,
642
- pixels_above=0,
643
- pixels_below=0,
644
- pixels_left=0,
645
- pixels_right=0,
646
- ),
647
- pixels_above=0,
648
- pixels_below=0,
649
- browser_errors=[str(e)],
650
- is_pdf_viewer=False,
651
- recent_events=None,
652
- )
653
-
654
- async def _build_dom_tree(self, previous_state: SerializedDOMState | None = None) -> SerializedDOMState:
655
- """Internal method to build and serialize DOM tree.
656
-
657
- This is the actual implementation that does the work, called by both
658
- on_BrowserStateRequestEvent.
659
-
660
- Returns:
661
- SerializedDOMState with serialized DOM and selector map
662
- """
663
- try:
664
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: STARTING DOM tree build')
665
- # Remove any existing highlights before building new DOM
666
- try:
667
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Removing existing highlights...')
668
- await self.browser_session.remove_highlights()
669
- # self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ Highlights removed')
670
- except Exception as e:
671
- self.logger.debug(f'🔍 DOMWatchdog._build_dom_tree: Failed to remove existing highlights: {e}')
672
-
673
- # Create or reuse DOM service
674
- if self._dom_service is None:
675
- # self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Creating DomService...')
676
- self._dom_service = DomService(browser_session=self.browser_session, logger=self.logger)
677
- # self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ DomService created')
678
- # else:
679
- # self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Reusing existing DomService')
680
-
681
- # Get serialized DOM tree using the service
682
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Calling DomService.get_serialized_dom_tree...')
683
- start = time.time()
684
- self.current_dom_state, self.enhanced_dom_tree, timing_info = await self._dom_service.get_serialized_dom_tree(
685
- previous_cached_state=previous_state,
686
- )
687
- end = time.time()
688
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ DomService.get_serialized_dom_tree completed')
689
-
690
- self.logger.debug(f'Time taken to get DOM tree: {end - start} seconds')
691
- self.logger.debug(f'Timing breakdown: {timing_info}')
692
-
693
- # Update selector map for other watchdogs
694
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Updating selector maps...')
695
- self.selector_map = self.current_dom_state.selector_map
696
- # Update BrowserSession's cached selector map
697
- if self.browser_session:
698
- self.browser_session.update_cached_selector_map(self.selector_map)
699
- self.logger.debug(
700
- f'🔍 DOMWatchdog._build_dom_tree: ✅ Selector maps updated, {len(self.selector_map)} elements')
701
-
702
- # Inject highlighting for visual feedback if we have elements
703
- if self.selector_map and self._dom_service:
704
- try:
705
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: Injecting highlighting script...')
706
- from browser_use.dom.debug.highlights import inject_highlighting_script
707
-
708
- await inject_highlighting_script(self._dom_service, self.selector_map)
709
- self.logger.debug(
710
- f'🔍 DOMWatchdog._build_dom_tree: ✅ Injected highlighting for {len(self.selector_map)} elements'
711
- )
712
- except Exception as e:
713
- self.logger.debug(f'🔍 DOMWatchdog._build_dom_tree: Failed to inject highlighting: {e}')
714
-
715
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree: ✅ COMPLETED DOM tree build')
716
- return self.current_dom_state
717
-
718
- except Exception as e:
719
- self.logger.error(f'Failed to build DOM tree: {e}')
720
- self.event_bus.dispatch(
721
- BrowserErrorEvent(
722
- error_type='DOMBuildFailed',
723
- message=str(e),
724
- )
725
- )
726
- raise
727
-
728
- async def _build_dom_tree_without_highlights(self,
729
- previous_state: SerializedDOMState | None = None) -> SerializedDOMState:
730
- """Build DOM tree without injecting JavaScript highlights (for parallel execution)."""
731
- try:
732
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: STARTING DOM tree build')
733
-
734
- # Create or reuse DOM service
735
- if self._dom_service is None:
736
- self._dom_service = DomService(browser_session=self.browser_session, logger=self.logger)
737
-
738
- # Get serialized DOM tree using the service
739
- self.logger.debug(
740
- '🔍 DOMWatchdog._build_dom_tree_without_highlights: Calling DomService.get_serialized_dom_tree...')
741
- start = time.time()
742
- self.current_dom_state, self.enhanced_dom_tree, timing_info = await self._dom_service.get_serialized_dom_tree(
743
- previous_cached_state=previous_state,
744
- )
745
- end = time.time()
746
- self.logger.debug(
747
- '🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ DomService.get_serialized_dom_tree completed'
748
- )
749
-
750
- self.logger.debug(f'Time taken to get DOM tree: {end - start} seconds')
751
- self.logger.debug(f'Timing breakdown: {timing_info}')
752
-
753
- # Update selector map for other watchdogs
754
- self.logger.debug('🔍 DOMWatchdog._build_dom_tree_without_highlights: Updating selector maps...')
755
- self.selector_map = self.current_dom_state.selector_map
756
- # Update BrowserSession's cached selector map
757
- if self.browser_session:
758
- self.browser_session.update_cached_selector_map(self.selector_map)
759
- self.logger.debug(
760
- f'🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ Selector maps updated, {len(self.selector_map)} elements'
761
- )
762
-
763
- # Skip JavaScript highlighting injection - Python highlighting will be applied later
764
- self.logger.debug(
765
- '🔍 DOMWatchdog._build_dom_tree_without_highlights: ✅ COMPLETED DOM tree build (no JS highlights)')
766
- return self.current_dom_state
767
-
768
- except Exception as e:
769
- self.logger.error(f'Failed to build DOM tree without highlights: {e}')
770
- self.event_bus.dispatch(
771
- BrowserErrorEvent(
772
- error_type='DOMBuildFailed',
773
- message=str(e),
774
- )
775
- )
776
- raise
777
-
778
- async def _capture_clean_screenshot(self) -> str:
779
- """Capture a clean screenshot without JavaScript highlights."""
780
- try:
781
- self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: Capturing clean screenshot...')
782
-
783
- # Ensure we have a focused CDP session
784
- assert self.browser_session.agent_focus is not None, 'No current target ID'
785
- await self.browser_session.get_or_create_cdp_session(target_id=self.browser_session.agent_focus.target_id,
786
- focus=True)
787
-
788
- # Check if handler is registered
789
- handlers = self.event_bus.handlers.get('ScreenshotEvent', [])
790
- handler_names = [getattr(h, '__name__', str(h)) for h in handlers]
791
- self.logger.debug(f'📸 ScreenshotEvent handlers registered: {len(handlers)} - {handler_names}')
792
-
793
- screenshot_event = self.event_bus.dispatch(ScreenshotEvent(full_page=False))
794
- self.logger.debug('📸 Dispatched ScreenshotEvent, waiting for event to complete...')
795
-
796
- # Wait for the event itself to complete (this waits for all handlers)
797
- await screenshot_event
798
-
799
- # Get the single handler result
800
- screenshot_b64 = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True)
801
- if screenshot_b64 is None:
802
- raise RuntimeError('Screenshot handler returned None')
803
- self.logger.debug('🔍 DOMWatchdog._capture_clean_screenshot: ✅ Clean screenshot captured successfully')
804
- return str(screenshot_b64)
805
-
806
- except TimeoutError:
807
- self.logger.warning('📸 Clean screenshot timed out after 6 seconds - no handler registered or slow page?')
808
- raise
809
- except Exception as e:
810
- self.logger.warning(f'📸 Clean screenshot failed: {type(e).__name__}: {e}')
811
- raise
812
-
813
- async def _wait_for_stable_network(self):
814
- """Wait for page stability - simplified for CDP-only branch."""
815
- start_time = time.time()
816
-
817
- # Apply minimum wait time first (let page settle)
818
- min_wait = self.browser_session.browser_profile.minimum_wait_page_load_time
819
- if min_wait > 0:
820
- self.logger.debug(f'⏳ Minimum wait: {min_wait}s')
821
- await asyncio.sleep(min_wait)
822
-
823
- # Apply network idle wait time (for dynamic content like iframes)
824
- network_idle_wait = self.browser_session.browser_profile.wait_for_network_idle_page_load_time
825
- if network_idle_wait > 0:
826
- self.logger.debug(f'⏳ Network idle wait: {network_idle_wait}s')
827
- await asyncio.sleep(network_idle_wait)
828
-
829
- elapsed = time.time() - start_time
830
- self.logger.debug(f'✅ Page stability wait completed in {elapsed:.2f}s')
831
-
832
- async def _get_page_info(self) -> 'PageInfo':
833
- """Get comprehensive page information using a single CDP call.
834
-
835
- TODO: should we make this an event as well?
836
-
837
- Returns:
838
- PageInfo with all viewport, page dimensions, and scroll information
839
- """
840
-
841
- from browser_use.browser.views import PageInfo
842
-
843
- # Get CDP session for the current target
844
- if not self.browser_session.agent_focus:
845
- raise RuntimeError('No active CDP session - browser may not be connected yet')
846
-
847
- cdp_session = await self.browser_session.get_or_create_cdp_session(
848
- target_id=self.browser_session.agent_focus.target_id, focus=True
849
- )
850
-
851
- # Get layout metrics which includes all the information we need
852
- metrics = await asyncio.wait_for(
853
- cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id), timeout=10.0
854
- )
855
-
856
- # Extract different viewport types
857
- layout_viewport = metrics.get('layoutViewport', {})
858
- visual_viewport = metrics.get('visualViewport', {})
859
- css_visual_viewport = metrics.get('cssVisualViewport', {})
860
- css_layout_viewport = metrics.get('cssLayoutViewport', {})
861
- content_size = metrics.get('contentSize', {})
862
-
863
- # Calculate device pixel ratio to convert between device pixels and CSS pixels
864
- # This matches the approach in dom/service.py _get_viewport_ratio method
865
- css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
866
- device_width = visual_viewport.get('clientWidth', css_width)
867
- device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
868
-
869
- # For viewport dimensions, use CSS pixels (what JavaScript sees)
870
- # Prioritize CSS layout viewport, then fall back to layout viewport
871
- viewport_width = int(css_layout_viewport.get('clientWidth') or layout_viewport.get('clientWidth', 1280))
872
- viewport_height = int(css_layout_viewport.get('clientHeight') or layout_viewport.get('clientHeight', 720))
873
-
874
- # For total page dimensions, content size is typically in device pixels, so convert to CSS pixels
875
- # by dividing by device pixel ratio
876
- raw_page_width = content_size.get('width', viewport_width * device_pixel_ratio)
877
- raw_page_height = content_size.get('height', viewport_height * device_pixel_ratio)
878
- page_width = int(raw_page_width / device_pixel_ratio)
879
- page_height = int(raw_page_height / device_pixel_ratio)
880
-
881
- # For scroll position, use CSS visual viewport if available, otherwise CSS layout viewport
882
- # These should already be in CSS pixels
883
- scroll_x = int(css_visual_viewport.get('pageX') or css_layout_viewport.get('pageX', 0))
884
- scroll_y = int(css_visual_viewport.get('pageY') or css_layout_viewport.get('pageY', 0))
885
-
886
- # Calculate scroll information - pixels that are above/below/left/right of current viewport
887
- pixels_above = scroll_y
888
- pixels_below = max(0, page_height - viewport_height - scroll_y)
889
- pixels_left = scroll_x
890
- pixels_right = max(0, page_width - viewport_width - scroll_x)
891
-
892
- page_info = PageInfo(
893
- viewport_width=viewport_width,
894
- viewport_height=viewport_height,
895
- page_width=page_width,
896
- page_height=page_height,
897
- scroll_x=scroll_x,
898
- scroll_y=scroll_y,
899
- pixels_above=pixels_above,
900
- pixels_below=pixels_below,
901
- pixels_left=pixels_left,
902
- pixels_right=pixels_right,
903
- )
904
-
905
- return page_info
906
-
907
- # ========== Public Helper Methods ==========
908
-
909
- async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
910
- """Get DOM element by index from cached selector map.
911
-
912
- Builds DOM if not cached.
913
-
914
- Returns:
915
- EnhancedDOMTreeNode or None if index not found
916
- """
917
- if not self.selector_map:
918
- # Build DOM if not cached
919
- await self._build_dom_tree()
920
-
921
- return self.selector_map.get(index) if self.selector_map else None
922
-
923
- def clear_cache(self) -> None:
924
- """Clear cached DOM state to force rebuild on next access."""
925
- self.selector_map = None
926
- self.current_dom_state = None
927
- self.enhanced_dom_tree = None
928
-
929
- # Keep the DOM service instance to reuse its CDP client connection
930
-
931
- def is_file_input(self, element: EnhancedDOMTreeNode) -> bool:
932
- """Check if element is a file input."""
933
- return element.node_name.upper() == 'INPUT' and element.attributes.get('type', '').lower() == 'file'
934
-
935
- @staticmethod
936
- def is_element_visible_according_to_all_parents(node: EnhancedDOMTreeNode,
937
- html_frames: list[EnhancedDOMTreeNode]) -> bool:
938
- """Check if the element is visible according to all its parent HTML frames.
939
-
940
- Delegates to the DomService static method.
941
- """
942
- return DomService.is_element_visible_according_to_all_parents(node, html_frames)
943
-
944
- async def __aexit__(self, exc_type, exc_value, traceback):
945
- """Clean up DOM service on exit."""
946
- if self._dom_service:
947
- await self._dom_service.__aexit__(exc_type, exc_value, traceback)
948
- self._dom_service = None
949
-
950
- def __del__(self):
951
- """Clean up DOM service on deletion."""
952
- super().__del__()
953
- # DOM service will clean up its own CDP client
954
- self._dom_service = None