vibesurf 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import os
5
+ import pdb
5
6
  from pathlib import Path
6
- from typing import List, Optional
7
+ from typing import Any, List, Optional
7
8
 
8
9
  from browser_use.browser.session import BrowserSession, CDPSession
9
10
  from pydantic import Field
@@ -16,27 +17,120 @@ from browser_use.browser.views import BrowserStateSummary
16
17
  from browser_use.dom.views import TargetInfo
17
18
  from vibe_surf.browser.agen_browser_profile import AgentBrowserProfile
18
19
  from typing import Self
20
+ from uuid_extensions import uuid7str
19
21
 
20
22
  DEFAULT_BROWSER_PROFILE = AgentBrowserProfile()
21
23
 
22
24
  class AgentBrowserSession(BrowserSession):
23
25
  """Isolated browser session for a specific agent."""
26
+
27
+ def __init__(
28
+ self,
29
+ # Core configuration
30
+ id: str | None = None,
31
+ cdp_url: str | None = None,
32
+ is_local: bool = False,
33
+ browser_profile: AgentBrowserProfile | None = None,
34
+ # Custom AgentBrowserSession fields
35
+ main_browser_session: BrowserSession | None = None,
36
+ # BrowserProfile fields that can be passed directly
37
+ # From BrowserConnectArgs
38
+ headers: dict[str, str] | None = None,
39
+ # From BrowserLaunchArgs
40
+ env: dict[str, str | float | bool] | None = None,
41
+ executable_path: str | Path | None = None,
42
+ headless: bool | None = None,
43
+ args: list[str] | None = None,
44
+ ignore_default_args: list[str] | list[bool] | None = None,
45
+ channel: str | None = None,
46
+ chromium_sandbox: bool | None = None,
47
+ devtools: bool | None = None,
48
+ downloads_path: str | Path | None = None,
49
+ traces_dir: str | Path | None = None,
50
+ # From BrowserContextArgs
51
+ accept_downloads: bool | None = None,
52
+ permissions: list[str] | None = None,
53
+ user_agent: str | None = None,
54
+ screen: dict | None = None,
55
+ viewport: dict | None = None,
56
+ no_viewport: bool | None = None,
57
+ device_scale_factor: float | None = None,
58
+ record_har_content: str | None = None,
59
+ record_har_mode: str | None = None,
60
+ record_har_path: str | Path | None = None,
61
+ record_video_dir: str | Path | None = None,
62
+ # From BrowserLaunchPersistentContextArgs
63
+ user_data_dir: str | Path | None = None,
64
+ # From BrowserNewContextArgs
65
+ storage_state: str | Path | dict[str, Any] | None = None,
66
+ # BrowserProfile specific fields
67
+ disable_security: bool | None = None,
68
+ deterministic_rendering: bool | None = None,
69
+ allowed_domains: list[str] | None = None,
70
+ keep_alive: bool | None = None,
71
+ proxy: any | None = None,
72
+ enable_default_extensions: bool | None = None,
73
+ window_size: dict | None = None,
74
+ window_position: dict | None = None,
75
+ cross_origin_iframes: bool | None = None,
76
+ minimum_wait_page_load_time: float | None = None,
77
+ wait_for_network_idle_page_load_time: float | None = None,
78
+ wait_between_actions: float | None = None,
79
+ highlight_elements: bool | None = None,
80
+ filter_highlight_ids: bool | None = None,
81
+ auto_download_pdfs: bool | None = None,
82
+ profile_directory: str | None = None,
83
+ cookie_whitelist_domains: list[str] | None = None,
84
+ # AgentBrowserProfile specific fields
85
+ custom_extensions: list[str] | None = None,
86
+ ):
87
+ # Filter out AgentBrowserSession specific parameters
88
+ agent_session_params = {
89
+ 'main_browser_session': main_browser_session,
90
+ }
91
+
92
+ # Get all browser profile parameters
93
+ profile_kwargs = {k: v for k, v in locals().items()
94
+ if k not in ['self', 'browser_profile', 'id', 'main_browser_session']
95
+ and v is not None}
96
+
97
+ # Apply BrowserSession's is_local logic first
98
+ effective_is_local = is_local
99
+ if is_local is False and executable_path is not None:
100
+ effective_is_local = True
101
+ if not cdp_url:
102
+ effective_is_local = True
103
+
104
+ # Always include is_local in profile_kwargs to ensure it's properly set
105
+ profile_kwargs['is_local'] = effective_is_local
106
+
107
+ # Create AgentBrowserProfile from direct parameters or use provided one
108
+ if browser_profile is not None:
109
+ # Always merge to ensure is_local logic is applied
110
+ merged_kwargs = {**browser_profile.model_dump(), **profile_kwargs}
111
+ resolved_browser_profile = AgentBrowserProfile(**merged_kwargs)
112
+ else:
113
+ resolved_browser_profile = AgentBrowserProfile(**profile_kwargs)
114
+
115
+ # Initialize the Pydantic model directly (like BrowserSession does)
116
+ # Don't call BrowserSession.__init__ as it would recreate BrowserProfile and lose custom_extensions
117
+ from pydantic import BaseModel
118
+ BaseModel.__init__(
119
+ self,
120
+ id=id or str(uuid7str()),
121
+ browser_profile=resolved_browser_profile,
122
+ )
123
+
124
+ # Set AgentBrowserSession specific fields
125
+ self.main_browser_session = main_browser_session
126
+
127
+ # Override browser_profile field to ensure it's always AgentBrowserProfile
24
128
  browser_profile: AgentBrowserProfile = Field(
25
129
  default_factory=lambda: DEFAULT_BROWSER_PROFILE,
26
- description='BrowserProfile() options to use for the session, otherwise a default profile will be used',
130
+ description='AgentBrowserProfile() options to use for the session',
27
131
  )
28
132
  main_browser_session: BrowserSession | None = Field(default=None)
29
- connected_agent: bool = False
30
- # Add a flag to control DVD animation (for future extensibility)
31
- disable_dvd_animation: bool = Field(
32
- default=True,
33
- description="Disable the DVD screensaver animation on about:blank pages"
34
- )
35
- # Custom extensions to load
36
- custom_extension_paths: List[str] = Field(
37
- default_factory=list,
38
- description="List of paths to custom Chrome extensions to load"
39
- )
133
+
40
134
 
41
135
  async def connect_agent(self, target_id: str) -> Self:
42
136
  """Register agent to browser with optional target assignment."""
@@ -54,7 +148,6 @@ class AgentBrowserSession(BrowserSession):
54
148
  await self.agent_focus.cdp_client.send.Runtime.runIfWaitingForDebugger(
55
149
  session_id=self.agent_focus.session_id)
56
150
  self._cdp_session_pool[target_id] = self.agent_focus
57
- self.connected_agent = True
58
151
  return self
59
152
 
60
153
  async def disconnect_agent(self) -> None:
@@ -62,7 +155,7 @@ class AgentBrowserSession(BrowserSession):
62
155
  for session in self._cdp_session_pool.values():
63
156
  await session.disconnect()
64
157
  self._cdp_session_pool.clear()
65
- self.connected_agent = False
158
+ self.main_browser_session = None
66
159
 
67
160
  async def _cdp_get_all_pages(
68
161
  self,
@@ -80,7 +173,7 @@ class AgentBrowserSession(BrowserSession):
80
173
  if not self._cdp_client_root:
81
174
  return []
82
175
  targets = await self.cdp_client.send.Target.getTargets()
83
- if self.connected_agent:
176
+ if self.main_browser_session is not None:
84
177
  assigned_target_ids = self._cdp_session_pool.keys()
85
178
  return [
86
179
  t
@@ -126,12 +219,12 @@ class AgentBrowserSession(BrowserSession):
126
219
  from vibe_surf.browser.watchdogs.action_watchdog import CustomActionWatchdog
127
220
  from vibe_surf.browser.watchdogs.dom_watchdog import CustomDOMWatchdog
128
221
 
129
- from browser_use.browser.downloads_watchdog import DownloadsWatchdog
130
- from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
131
- from browser_use.browser.permissions_watchdog import PermissionsWatchdog
132
- from browser_use.browser.popups_watchdog import PopupsWatchdog
133
- from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
134
- from browser_use.browser.security_watchdog import SecurityWatchdog
222
+ from browser_use.browser.watchdogs.downloads_watchdog import DownloadsWatchdog
223
+ from browser_use.browser.watchdogs.local_browser_watchdog import LocalBrowserWatchdog
224
+ from browser_use.browser.watchdogs.permissions_watchdog import PermissionsWatchdog
225
+ from browser_use.browser.watchdogs.popups_watchdog import PopupsWatchdog
226
+ from browser_use.browser.watchdogs.screenshot_watchdog import ScreenshotWatchdog
227
+ from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
135
228
 
136
229
  # NOTE: AboutBlankWatchdog is deliberately excluded to disable DVD animation
137
230
 
@@ -184,36 +277,6 @@ class AgentBrowserSession(BrowserSession):
184
277
 
185
278
  self.logger.info('✅ VibeSurfBrowserSession: All watchdogs attached (AboutBlankWatchdog excluded)')
186
279
 
187
- async def _ensure_minimal_about_blank_tab(self) -> None:
188
- """
189
- Ensure there's at least one about:blank tab without any animation.
190
- This replaces AboutBlankWatchdog's functionality but without the DVD animation.
191
- """
192
- try:
193
- # Get all page targets using CDP
194
- page_targets = await self._cdp_get_all_pages()
195
-
196
- # If no tabs exist at all, create one to keep browser alive
197
- if len(page_targets) == 0:
198
- self.logger.info('[VibeSurfBrowserSession] No tabs exist, creating new about:blank tab (no animation)')
199
- from browser_use.browser.events import NavigateToUrlEvent
200
- navigate_event = self.event_bus.dispatch(NavigateToUrlEvent(url='about:blank', new_tab=True))
201
- await navigate_event
202
- # Note: NO DVD screensaver injection here!
203
-
204
- except Exception as e:
205
- self.logger.error(f'[VibeSurfBrowserSession] Error ensuring about:blank tab: {e}')
206
-
207
- async def on_BrowserStartEvent(self, event) -> dict[str, str]:
208
- """Override to ensure minimal about:blank handling without animation."""
209
- # Call parent implementation first
210
- result = await super().on_BrowserStartEvent(event)
211
-
212
- # Ensure we have at least one tab without animation
213
- await self._ensure_minimal_about_blank_tab()
214
-
215
- return result
216
-
217
280
  def get_cdp_session_pool(self):
218
281
  return self._cdp_session_pool
219
282
 
@@ -89,7 +89,7 @@ class BrowserManager:
89
89
  # Get or create available target
90
90
  if target_id is None:
91
91
  new_target = await self.main_browser_session.cdp_client.send.Target.createTarget(
92
- params={'url': 'about:blank'})
92
+ params={'url': ''})
93
93
  target_id = new_target["targetId"]
94
94
 
95
95
  await agent_session.connect_agent(target_id=target_id)
@@ -257,7 +257,7 @@ class BrowserManager:
257
257
  if page_targets:
258
258
  target_id = page_targets[-1]["targetId"]
259
259
  else:
260
- new_target = await client.send.Target.createTarget(params={'url': 'about:blank'})
260
+ new_target = await client.send.Target.createTarget(params={'url': ''})
261
261
  target_id = new_target["targetId"]
262
262
  await self.main_browser_session.get_or_create_cdp_session(target_id, focus=False)
263
263
  return target_id
@@ -689,21 +689,14 @@ def create_highlighted_screenshot(
689
689
  for element_id, element in selector_map.items():
690
690
  try:
691
691
  # Use snapshot bounds (document coordinates) if available, otherwise absolute_position
692
- bounds = None
693
- if element.snapshot_node and element.snapshot_node.bounds:
694
- bounds = element.snapshot_node.bounds
695
- elif element.absolute_position:
696
- bounds = element.absolute_position
697
-
698
- if not bounds:
699
- continue
700
-
701
- # Convert from CSS pixels to device pixels for screenshot coordinates
702
- # Note: bounds are already in CSS pixels, screenshot is in device pixels
703
- x1 = int((bounds.x - viewport_offset_x) * device_pixel_ratio)
704
- y1 = int((bounds.y - viewport_offset_y) * device_pixel_ratio)
705
- x2 = int((bounds.x + bounds.width - viewport_offset_x) * device_pixel_ratio)
706
- y2 = int((bounds.y + bounds.height - viewport_offset_y) * device_pixel_ratio)
692
+ bounds = element.absolute_position
693
+
694
+ # Scale coordinates from CSS pixels to device pixels for screenshot
695
+ # The screenshot is captured at device pixel resolution, but coordinates are in CSS pixels
696
+ x1 = int(bounds.x * device_pixel_ratio)
697
+ y1 = int(bounds.y * device_pixel_ratio)
698
+ x2 = int((bounds.x + bounds.width) * device_pixel_ratio)
699
+ y2 = int((bounds.y + bounds.height) * device_pixel_ratio)
707
700
 
708
701
  # Ensure coordinates are within image bounds
709
702
  img_width, img_height = image.size
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
 
3
- from browser_use.browser.default_action_watchdog import DefaultActionWatchdog
3
+ from browser_use.browser.watchdogs.default_action_watchdog import DefaultActionWatchdog
4
4
  from browser_use.browser.events import (
5
5
  ClickElementEvent,
6
6
  GetDropdownOptionsEvent,
@@ -45,11 +45,14 @@ class CustomActionWatchdog(DefaultActionWatchdog):
45
45
  msg = f'Index {index_for_logging} - has an element which opens file upload dialog. To upload files please use a specific function to upload files'
46
46
  self.logger.info(msg)
47
47
  raise BrowserError(
48
- 'Click triggered a file input element which could not be handled, use the dedicated file upload function instead'
48
+ message=msg,
49
+ long_term_memory=msg,
49
50
  )
50
51
 
51
52
  # Perform the actual click using internal implementation
52
- await self._click_element_node_impl(element_node, while_holding_ctrl=event.while_holding_ctrl)
53
+ click_metadata = None
54
+ click_metadata = await self._click_element_node_impl(element_node,
55
+ while_holding_ctrl=event.while_holding_ctrl)
53
56
  download_path = None # moved to downloads_watchdog.py
54
57
 
55
58
  # Build success message
@@ -63,15 +66,9 @@ class CustomActionWatchdog(DefaultActionWatchdog):
63
66
 
64
67
  # Wait a bit for potential new tab to be created
65
68
  # This is necessary because tab creation is async and might not be immediate
66
- await asyncio.sleep(1)
69
+ await asyncio.sleep(0.5)
67
70
 
68
71
  # Clear cached state after click action since DOM might have changed
69
- self.logger.debug('🔄 Click action completed, clearing cached browser state')
70
- self.browser_session._cached_browser_state_summary = None
71
- self.browser_session._cached_selector_map.clear()
72
- if self.browser_session._dom_watchdog:
73
- self.browser_session._dom_watchdog.clear_cache()
74
- # Successfully clicked, always reset session back to parent page session context
75
72
  self.browser_session.agent_focus = await self.browser_session.get_or_create_cdp_session(
76
73
  target_id=starting_target_id, focus=True
77
74
  )
@@ -105,187 +102,4 @@ class CustomActionWatchdog(DefaultActionWatchdog):
105
102
 
106
103
  return None
107
104
  except Exception as e:
108
- raise
109
-
110
- async def _input_text_element_node_impl(self, element_node, text: str, clear_existing: bool = True) -> dict | None:
111
- """
112
- Input text into an element using pure CDP with improved focus fallbacks.
113
- """
114
-
115
- try:
116
- # Get CDP client
117
- cdp_session = await self.browser_session.cdp_client_for_node(element_node)
118
-
119
- # Get element info
120
- backend_node_id = element_node.backend_node_id
121
-
122
- # Track coordinates for metadata
123
- input_coordinates = None
124
-
125
- # Scroll element into view
126
- try:
127
- await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
128
- params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
129
- )
130
- await asyncio.sleep(0.1)
131
- except Exception as e:
132
- self.logger.warning(
133
- f'⚠️ Failed to focus the page {cdp_session} and scroll element {element_node} into view before typing in text: {type(e).__name__}: {e}'
134
- )
135
-
136
- # Get object ID for the element
137
- result = await cdp_session.cdp_client.send.DOM.resolveNode(
138
- params={'backendNodeId': backend_node_id},
139
- session_id=cdp_session.session_id,
140
- )
141
- assert 'object' in result and 'objectId' in result['object'], (
142
- 'Failed to find DOM element based on backendNodeId, maybe page content changed?'
143
- )
144
- object_id = result['object']['objectId']
145
-
146
- # Check element focusability before attempting focus
147
- element_info = await self._check_element_focusability(element_node, object_id, cdp_session.session_id)
148
- self.logger.debug(f'Element focusability check: {element_info}')
149
-
150
- # Extract coordinates from element bounds for metadata
151
- bounds = element_info.get('bounds', {})
152
- if bounds.get('width', 0) > 0 and bounds.get('height', 0) > 0:
153
- center_x = bounds['x'] + bounds['width'] / 2
154
- center_y = bounds['y'] + bounds['height'] / 2
155
- input_coordinates = {"input_x": center_x, "input_y": center_y}
156
- self.logger.debug(f'📍 Input coordinates: x={center_x:.1f}, y={center_y:.1f}')
157
-
158
- # Provide helpful warnings for common issues
159
- if not element_info.get('visible', False):
160
- self.logger.warning('⚠️ Target element appears to be invisible or has zero dimensions')
161
- if element_info.get('disabled', False):
162
- self.logger.warning('⚠️ Target element appears to be disabled')
163
- if not element_info.get('focusable', False):
164
- self.logger.warning('⚠️ Target element may not be focusable by standard criteria')
165
-
166
- # Clear existing text if requested
167
- if clear_existing:
168
- await cdp_session.cdp_client.send.Runtime.callFunctionOn(
169
- params={
170
- 'functionDeclaration': 'function() { if (this.value !== undefined) this.value = ""; if (this.textContent !== undefined) this.textContent = ""; }',
171
- 'objectId': object_id,
172
- },
173
- session_id=cdp_session.session_id,
174
- )
175
-
176
- # Try multiple focus strategies
177
- focused_successfully = False
178
-
179
- # Strategy 1: Try CDP DOM.focus (original method)
180
- try:
181
- await cdp_session.cdp_client.send.DOM.focus(
182
- params={'backendNodeId': backend_node_id},
183
- session_id=cdp_session.session_id,
184
- )
185
- focused_successfully = True
186
- self.logger.debug('✅ Element focused using CDP DOM.focus')
187
- except Exception as e:
188
- self.logger.debug(f'CDP DOM.focus failed: {e}')
189
-
190
- # Strategy 2: Try JavaScript focus as fallback
191
- try:
192
- await cdp_session.cdp_client.send.Runtime.callFunctionOn(
193
- params={
194
- 'functionDeclaration': 'function() { this.focus(); }',
195
- 'objectId': object_id,
196
- },
197
- session_id=cdp_session.session_id,
198
- )
199
- focused_successfully = True
200
- self.logger.debug('✅ Element focused using JavaScript focus()')
201
- except Exception as js_e:
202
- self.logger.debug(f'JavaScript focus failed: {js_e}')
203
-
204
- # Strategy 3: Try click-to-focus for stubborn elements
205
- try:
206
- await cdp_session.cdp_client.send.Runtime.callFunctionOn(
207
- params={
208
- 'functionDeclaration': 'function() { this.click(); this.focus(); }',
209
- 'objectId': object_id,
210
- },
211
- session_id=cdp_session.session_id,
212
- )
213
- focused_successfully = True
214
- self.logger.debug('✅ Element focused using click + focus combination')
215
- except Exception as click_e:
216
- self.logger.debug(f'Click + focus failed: {click_e}')
217
-
218
- # Strategy 4: Try simulated mouse click for maximum compatibility
219
- try:
220
- # Use coordinates already calculated from element bounds
221
- if input_coordinates and 'input_x' in input_coordinates and 'input_y' in input_coordinates:
222
- click_x = input_coordinates['input_x']
223
- click_y = input_coordinates['input_y']
224
-
225
- await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
226
- params={
227
- 'type': 'mousePressed',
228
- 'x': click_x,
229
- 'y': click_y,
230
- 'button': 'left',
231
- 'clickCount': 1,
232
- },
233
- session_id=cdp_session.session_id,
234
- )
235
- await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
236
- params={
237
- 'type': 'mouseReleased',
238
- 'x': click_x,
239
- 'y': click_y,
240
- 'button': 'left',
241
- 'clickCount': 1,
242
- },
243
- session_id=cdp_session.session_id,
244
- )
245
- focused_successfully = True
246
- self.logger.debug('✅ Element focused using simulated mouse click')
247
- else:
248
- self.logger.debug('Element bounds not available for mouse click')
249
- except Exception as mouse_e:
250
- self.logger.debug(f'Simulated mouse click failed: {mouse_e}')
251
-
252
- # Log focus result
253
- if not focused_successfully:
254
- self.logger.warning('⚠️ All focus strategies failed, typing without explicit focus')
255
-
256
- # Type the text character by character
257
- for char in text:
258
- # Send keydown (without text to avoid duplication)
259
- await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
260
- params={
261
- 'type': 'keyDown',
262
- 'key': char,
263
- },
264
- session_id=cdp_session.session_id,
265
- )
266
- # Send char (for actual text input)
267
- await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
268
- params={
269
- 'type': 'char',
270
- 'text': char,
271
- 'key': char,
272
- },
273
- session_id=cdp_session.session_id,
274
- )
275
- # Send keyup (without text to avoid duplication)
276
- await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
277
- params={
278
- 'type': 'keyUp',
279
- 'key': char,
280
- },
281
- session_id=cdp_session.session_id,
282
- )
283
- # Small delay between characters
284
- await asyncio.sleep(0.01)
285
-
286
- # Return coordinates metadata if available
287
- return input_coordinates
288
-
289
- except Exception as e:
290
- self.logger.error(f'Failed to input text via CDP: {type(e).__name__}: {e}')
291
- raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
105
+ raise