PyPI - vibesurf - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

vibesurf 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vibesurf might be problematic. Click here for more details.

Files changed (19) hide show

vibe_surf/_version.py +2 -2
vibe_surf/agents/browser_use_agent.py +48 -154
vibe_surf/agents/vibe_surf_agent.py +10 -9
vibe_surf/backend/shared_state.py +1 -1
vibe_surf/backend/utils/encryption.py +5 -35
vibe_surf/browser/agen_browser_profile.py +3 -4
vibe_surf/browser/agent_browser_session.py +115 -52
vibe_surf/browser/browser_manager.py +2 -2
vibe_surf/browser/utils.py +8 -15
vibe_surf/browser/watchdogs/action_watchdog.py +8 -194
vibe_surf/browser/watchdogs/dom_watchdog.py +3 -675
vibe_surf/controller/mcp_client.py +0 -4
vibe_surf/controller/{vibesurf_controller.py → vibesurf_tools.py} +13 -48
{vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/METADATA +8 -3
{vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/RECORD +19 -19
{vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/WHEEL +0 -0
{vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/entry_points.txt +0 -0
{vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/licenses/LICENSE +0 -0
{vibesurf-0.1.7.dist-info → vibesurf-0.1.8.dist-info}/top_level.txt +0 -0

vibe_surf/browser/agent_browser_session.py CHANGED Viewed

@@ -2,8 +2,9 @@ from __future__ import annotations
 import asyncio
 import os
+import pdb
 from pathlib import Path
-from typing import List, Optional
+from typing import Any, List, Optional
 from browser_use.browser.session import BrowserSession, CDPSession
 from pydantic import Field
@@ -16,27 +17,120 @@ from browser_use.browser.views import BrowserStateSummary
 from browser_use.dom.views import TargetInfo
 from vibe_surf.browser.agen_browser_profile import AgentBrowserProfile
 from typing import Self
+from uuid_extensions import uuid7str
 DEFAULT_BROWSER_PROFILE = AgentBrowserProfile()
 class AgentBrowserSession(BrowserSession):
     """Isolated browser session for a specific agent."""
+    def __init__(
+        self,
+        # Core configuration
+        id: str | None = None,
+        cdp_url: str | None = None,
+        is_local: bool = False,
+        browser_profile: AgentBrowserProfile | None = None,
+        # Custom AgentBrowserSession fields
+        main_browser_session: BrowserSession | None = None,
+        # BrowserProfile fields that can be passed directly
+        # From BrowserConnectArgs
+        headers: dict[str, str] | None = None,
+        # From BrowserLaunchArgs
+        env: dict[str, str | float | bool] | None = None,
+        executable_path: str | Path | None = None,
+        headless: bool | None = None,
+        args: list[str] | None = None,
+        ignore_default_args: list[str] | list[bool] | None = None,
+        channel: str | None = None,
+        chromium_sandbox: bool | None = None,
+        devtools: bool | None = None,
+        downloads_path: str | Path | None = None,
+        traces_dir: str | Path | None = None,
+        # From BrowserContextArgs
+        accept_downloads: bool | None = None,
+        permissions: list[str] | None = None,
+        user_agent: str | None = None,
+        screen: dict | None = None,
+        viewport: dict | None = None,
+        no_viewport: bool | None = None,
+        device_scale_factor: float | None = None,
+        record_har_content: str | None = None,
+        record_har_mode: str | None = None,
+        record_har_path: str | Path | None = None,
+        record_video_dir: str | Path | None = None,
+        # From BrowserLaunchPersistentContextArgs
+        user_data_dir: str | Path | None = None,
+        # From BrowserNewContextArgs
+        storage_state: str | Path | dict[str, Any] | None = None,
+        # BrowserProfile specific fields
+        disable_security: bool | None = None,
+        deterministic_rendering: bool | None = None,
+        allowed_domains: list[str] | None = None,
+        keep_alive: bool | None = None,
+        proxy: any | None = None,
+        enable_default_extensions: bool | None = None,
+        window_size: dict | None = None,
+        window_position: dict | None = None,
+        cross_origin_iframes: bool | None = None,
+        minimum_wait_page_load_time: float | None = None,
+        wait_for_network_idle_page_load_time: float | None = None,
+        wait_between_actions: float | None = None,
+        highlight_elements: bool | None = None,
+        filter_highlight_ids: bool | None = None,
+        auto_download_pdfs: bool | None = None,
+        profile_directory: str | None = None,
+        cookie_whitelist_domains: list[str] | None = None,
+        # AgentBrowserProfile specific fields
+        custom_extensions: list[str] | None = None,
+    ):
+        # Filter out AgentBrowserSession specific parameters
+        agent_session_params = {
+            'main_browser_session': main_browser_session,
+        }
+        # Get all browser profile parameters
+        profile_kwargs = {k: v for k, v in locals().items()
+                         if k not in ['self', 'browser_profile', 'id', 'main_browser_session']
+                         and v is not None}
+        # Apply BrowserSession's is_local logic first
+        effective_is_local = is_local
+        if is_local is False and executable_path is not None:
+            effective_is_local = True
+        if not cdp_url:
+            effective_is_local = True
+        # Always include is_local in profile_kwargs to ensure it's properly set
+        profile_kwargs['is_local'] = effective_is_local
+        # Create AgentBrowserProfile from direct parameters or use provided one
+        if browser_profile is not None:
+            # Always merge to ensure is_local logic is applied
+            merged_kwargs = {**browser_profile.model_dump(), **profile_kwargs}
+            resolved_browser_profile = AgentBrowserProfile(**merged_kwargs)
+        else:
+            resolved_browser_profile = AgentBrowserProfile(**profile_kwargs)
+        # Initialize the Pydantic model directly (like BrowserSession does)
+        # Don't call BrowserSession.__init__ as it would recreate BrowserProfile and lose custom_extensions
+        from pydantic import BaseModel
+        BaseModel.__init__(
+            self,
+            id=id or str(uuid7str()),
+            browser_profile=resolved_browser_profile,
+        )
+        # Set AgentBrowserSession specific fields
+        self.main_browser_session = main_browser_session
+    # Override browser_profile field to ensure it's always AgentBrowserProfile
     browser_profile: AgentBrowserProfile = Field(
         default_factory=lambda: DEFAULT_BROWSER_PROFILE,
-        description='BrowserProfile() options to use for the session, otherwise a default profile will be used',
+        description='AgentBrowserProfile() options to use for the session',
     )
     main_browser_session: BrowserSession | None = Field(default=None)
-    connected_agent: bool = False
-    # Add a flag to control DVD animation (for future extensibility)
-    disable_dvd_animation: bool = Field(
-        default=True,
-        description="Disable the DVD screensaver animation on about:blank pages"
-    )
-    # Custom extensions to load
-    custom_extension_paths: List[str] = Field(
-        default_factory=list,
-        description="List of paths to custom Chrome extensions to load"
-    )
     async def connect_agent(self, target_id: str) -> Self:
         """Register agent to browser with optional target assignment."""
@@ -54,7 +148,6 @@ class AgentBrowserSession(BrowserSession):
             await self.agent_focus.cdp_client.send.Runtime.runIfWaitingForDebugger(
                 session_id=self.agent_focus.session_id)
             self._cdp_session_pool[target_id] = self.agent_focus
-        self.connected_agent = True
         return self
     async def disconnect_agent(self) -> None:
@@ -62,7 +155,7 @@ class AgentBrowserSession(BrowserSession):
         for session in self._cdp_session_pool.values():
             await session.disconnect()
         self._cdp_session_pool.clear()
-        self.connected_agent = False
+        self.main_browser_session = None
     async def _cdp_get_all_pages(
             self,
@@ -80,7 +173,7 @@ class AgentBrowserSession(BrowserSession):
         if not self._cdp_client_root:
             return []
         targets = await self.cdp_client.send.Target.getTargets()
-        if self.connected_agent:
+        if self.main_browser_session is not None:
             assigned_target_ids = self._cdp_session_pool.keys()
             return [
                 t
@@ -126,12 +219,12 @@ class AgentBrowserSession(BrowserSession):
         from vibe_surf.browser.watchdogs.action_watchdog import CustomActionWatchdog
         from vibe_surf.browser.watchdogs.dom_watchdog import CustomDOMWatchdog
-        from browser_use.browser.downloads_watchdog import DownloadsWatchdog
-        from browser_use.browser.local_browser_watchdog import LocalBrowserWatchdog
-        from browser_use.browser.permissions_watchdog import PermissionsWatchdog
-        from browser_use.browser.popups_watchdog import PopupsWatchdog
-        from browser_use.browser.screenshot_watchdog import ScreenshotWatchdog
-        from browser_use.browser.security_watchdog import SecurityWatchdog
+        from browser_use.browser.watchdogs.downloads_watchdog import DownloadsWatchdog
+        from browser_use.browser.watchdogs.local_browser_watchdog import LocalBrowserWatchdog
+        from browser_use.browser.watchdogs.permissions_watchdog import PermissionsWatchdog
+        from browser_use.browser.watchdogs.popups_watchdog import PopupsWatchdog
+        from browser_use.browser.watchdogs.screenshot_watchdog import ScreenshotWatchdog
+        from browser_use.browser.watchdogs.security_watchdog import SecurityWatchdog
         # NOTE: AboutBlankWatchdog is deliberately excluded to disable DVD animation
@@ -184,36 +277,6 @@ class AgentBrowserSession(BrowserSession):
         self.logger.info('✅ VibeSurfBrowserSession: All watchdogs attached (AboutBlankWatchdog excluded)')
-    async def _ensure_minimal_about_blank_tab(self) -> None:
-        """
-        Ensure there's at least one about:blank tab without any animation.
-        This replaces AboutBlankWatchdog's functionality but without the DVD animation.
-        """
-        try:
-            # Get all page targets using CDP
-            page_targets = await self._cdp_get_all_pages()
-            # If no tabs exist at all, create one to keep browser alive
-            if len(page_targets) == 0:
-                self.logger.info('[VibeSurfBrowserSession] No tabs exist, creating new about:blank tab (no animation)')
-                from browser_use.browser.events import NavigateToUrlEvent
-                navigate_event = self.event_bus.dispatch(NavigateToUrlEvent(url='about:blank', new_tab=True))
-                await navigate_event
-                # Note: NO DVD screensaver injection here!
-        except Exception as e:
-            self.logger.error(f'[VibeSurfBrowserSession] Error ensuring about:blank tab: {e}')
-    async def on_BrowserStartEvent(self, event) -> dict[str, str]:
-        """Override to ensure minimal about:blank handling without animation."""
-        # Call parent implementation first
-        result = await super().on_BrowserStartEvent(event)
-        # Ensure we have at least one tab without animation
-        await self._ensure_minimal_about_blank_tab()
-        return result
     def get_cdp_session_pool(self):
         return self._cdp_session_pool

vibe_surf/browser/browser_manager.py CHANGED Viewed

@@ -89,7 +89,7 @@ class BrowserManager:
         # Get or create available target
         if target_id is None:
             new_target = await self.main_browser_session.cdp_client.send.Target.createTarget(
-                params={'url': 'about:blank'})
+                params={'url': ''})
             target_id = new_target["targetId"]
         await agent_session.connect_agent(target_id=target_id)
@@ -257,7 +257,7 @@ class BrowserManager:
         if page_targets:
             target_id = page_targets[-1]["targetId"]
         else:
-            new_target = await client.send.Target.createTarget(params={'url': 'about:blank'})
+            new_target = await client.send.Target.createTarget(params={'url': ''})
             target_id = new_target["targetId"]
         await self.main_browser_session.get_or_create_cdp_session(target_id, focus=False)
         return target_id

vibe_surf/browser/utils.py CHANGED Viewed

@@ -689,21 +689,14 @@ def create_highlighted_screenshot(
         for element_id, element in selector_map.items():
             try:
                 # Use snapshot bounds (document coordinates) if available, otherwise absolute_position
-                bounds = None
-                if element.snapshot_node and element.snapshot_node.bounds:
-                    bounds = element.snapshot_node.bounds
-                elif element.absolute_position:
-                    bounds = element.absolute_position
-                if not bounds:
-                    continue
-                # Convert from CSS pixels to device pixels for screenshot coordinates
-                # Note: bounds are already in CSS pixels, screenshot is in device pixels
-                x1 = int((bounds.x - viewport_offset_x) * device_pixel_ratio)
-                y1 = int((bounds.y - viewport_offset_y) * device_pixel_ratio)
-                x2 = int((bounds.x + bounds.width - viewport_offset_x) * device_pixel_ratio)
-                y2 = int((bounds.y + bounds.height - viewport_offset_y) * device_pixel_ratio)
+                bounds = element.absolute_position
+                # Scale coordinates from CSS pixels to device pixels for screenshot
+                # The screenshot is captured at device pixel resolution, but coordinates are in CSS pixels
+                x1 = int(bounds.x * device_pixel_ratio)
+                y1 = int(bounds.y * device_pixel_ratio)
+                x2 = int((bounds.x + bounds.width) * device_pixel_ratio)
+                y2 = int((bounds.y + bounds.height) * device_pixel_ratio)
                 # Ensure coordinates are within image bounds
                 img_width, img_height = image.size

vibe_surf/browser/watchdogs/action_watchdog.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
-from browser_use.browser.default_action_watchdog import DefaultActionWatchdog
+from browser_use.browser.watchdogs.default_action_watchdog import DefaultActionWatchdog
 from browser_use.browser.events import (
 	ClickElementEvent,
 	GetDropdownOptionsEvent,
@@ -45,11 +45,14 @@ class CustomActionWatchdog(DefaultActionWatchdog):
                 msg = f'Index {index_for_logging} - has an element which opens file upload dialog. To upload files please use a specific function to upload files'
                 self.logger.info(msg)
                 raise BrowserError(
-                    'Click triggered a file input element which could not be handled, use the dedicated file upload function instead'
+                    message=msg,
+                    long_term_memory=msg,
                 )
             # Perform the actual click using internal implementation
-            await self._click_element_node_impl(element_node, while_holding_ctrl=event.while_holding_ctrl)
+            click_metadata = None
+            click_metadata = await self._click_element_node_impl(element_node,
+                                                                 while_holding_ctrl=event.while_holding_ctrl)
             download_path = None  # moved to downloads_watchdog.py
             # Build success message
@@ -63,15 +66,9 @@ class CustomActionWatchdog(DefaultActionWatchdog):
             # Wait a bit for potential new tab to be created
             # This is necessary because tab creation is async and might not be immediate
-            await asyncio.sleep(1)
+            await asyncio.sleep(0.5)
             # Clear cached state after click action since DOM might have changed
-            self.logger.debug('🔄 Click action completed, clearing cached browser state')
-            self.browser_session._cached_browser_state_summary = None
-            self.browser_session._cached_selector_map.clear()
-            if self.browser_session._dom_watchdog:
-                self.browser_session._dom_watchdog.clear_cache()
-            # Successfully clicked, always reset session back to parent page session context
             self.browser_session.agent_focus = await self.browser_session.get_or_create_cdp_session(
                 target_id=starting_target_id, focus=True
             )
@@ -105,187 +102,4 @@ class CustomActionWatchdog(DefaultActionWatchdog):
             return None
         except Exception as e:
-            raise
-    async def _input_text_element_node_impl(self, element_node, text: str, clear_existing: bool = True) -> dict | None:
-        """
-        Input text into an element using pure CDP with improved focus fallbacks.
-        """
-        try:
-            # Get CDP client
-            cdp_session = await self.browser_session.cdp_client_for_node(element_node)
-            # Get element info
-            backend_node_id = element_node.backend_node_id
-            # Track coordinates for metadata
-            input_coordinates = None
-            # Scroll element into view
-            try:
-                await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
-                    params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
-                )
-                await asyncio.sleep(0.1)
-            except Exception as e:
-                self.logger.warning(
-                    f'⚠️ Failed to focus the page {cdp_session} and scroll element {element_node} into view before typing in text: {type(e).__name__}: {e}'
-                )
-            # Get object ID for the element
-            result = await cdp_session.cdp_client.send.DOM.resolveNode(
-                params={'backendNodeId': backend_node_id},
-                session_id=cdp_session.session_id,
-            )
-            assert 'object' in result and 'objectId' in result['object'], (
-                'Failed to find DOM element based on backendNodeId, maybe page content changed?'
-            )
-            object_id = result['object']['objectId']
-            # Check element focusability before attempting focus
-            element_info = await self._check_element_focusability(element_node, object_id, cdp_session.session_id)
-            self.logger.debug(f'Element focusability check: {element_info}')
-            # Extract coordinates from element bounds for metadata
-            bounds = element_info.get('bounds', {})
-            if bounds.get('width', 0) > 0 and bounds.get('height', 0) > 0:
-                center_x = bounds['x'] + bounds['width'] / 2
-                center_y = bounds['y'] + bounds['height'] / 2
-                input_coordinates = {"input_x": center_x, "input_y": center_y}
-                self.logger.debug(f'📍 Input coordinates: x={center_x:.1f}, y={center_y:.1f}')
-            # Provide helpful warnings for common issues
-            if not element_info.get('visible', False):
-                self.logger.warning('⚠️ Target element appears to be invisible or has zero dimensions')
-            if element_info.get('disabled', False):
-                self.logger.warning('⚠️ Target element appears to be disabled')
-            if not element_info.get('focusable', False):
-                self.logger.warning('⚠️ Target element may not be focusable by standard criteria')
-            # Clear existing text if requested
-            if clear_existing:
-                await cdp_session.cdp_client.send.Runtime.callFunctionOn(
-                    params={
-                        'functionDeclaration': 'function() { if (this.value !== undefined) this.value = ""; if (this.textContent !== undefined) this.textContent = ""; }',
-                        'objectId': object_id,
-                    },
-                    session_id=cdp_session.session_id,
-                )
-            # Try multiple focus strategies
-            focused_successfully = False
-            # Strategy 1: Try CDP DOM.focus (original method)
-            try:
-                await cdp_session.cdp_client.send.DOM.focus(
-                    params={'backendNodeId': backend_node_id},
-                    session_id=cdp_session.session_id,
-                )
-                focused_successfully = True
-                self.logger.debug('✅ Element focused using CDP DOM.focus')
-            except Exception as e:
-                self.logger.debug(f'CDP DOM.focus failed: {e}')
-                # Strategy 2: Try JavaScript focus as fallback
-                try:
-                    await cdp_session.cdp_client.send.Runtime.callFunctionOn(
-                        params={
-                            'functionDeclaration': 'function() { this.focus(); }',
-                            'objectId': object_id,
-                        },
-                        session_id=cdp_session.session_id,
-                    )
-                    focused_successfully = True
-                    self.logger.debug('✅ Element focused using JavaScript focus()')
-                except Exception as js_e:
-                    self.logger.debug(f'JavaScript focus failed: {js_e}')
-                    # Strategy 3: Try click-to-focus for stubborn elements
-                    try:
-                        await cdp_session.cdp_client.send.Runtime.callFunctionOn(
-                            params={
-                                'functionDeclaration': 'function() { this.click(); this.focus(); }',
-                                'objectId': object_id,
-                            },
-                            session_id=cdp_session.session_id,
-                        )
-                        focused_successfully = True
-                        self.logger.debug('✅ Element focused using click + focus combination')
-                    except Exception as click_e:
-                        self.logger.debug(f'Click + focus failed: {click_e}')
-                        # Strategy 4: Try simulated mouse click for maximum compatibility
-                        try:
-                            # Use coordinates already calculated from element bounds
-                            if input_coordinates and 'input_x' in input_coordinates and 'input_y' in input_coordinates:
-                                click_x = input_coordinates['input_x']
-                                click_y = input_coordinates['input_y']
-                                await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
-                                    params={
-                                        'type': 'mousePressed',
-                                        'x': click_x,
-                                        'y': click_y,
-                                        'button': 'left',
-                                        'clickCount': 1,
-                                    },
-                                    session_id=cdp_session.session_id,
-                                )
-                                await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
-                                    params={
-                                        'type': 'mouseReleased',
-                                        'x': click_x,
-                                        'y': click_y,
-                                        'button': 'left',
-                                        'clickCount': 1,
-                                    },
-                                    session_id=cdp_session.session_id,
-                                )
-                                focused_successfully = True
-                                self.logger.debug('✅ Element focused using simulated mouse click')
-                            else:
-                                self.logger.debug('Element bounds not available for mouse click')
-                        except Exception as mouse_e:
-                            self.logger.debug(f'Simulated mouse click failed: {mouse_e}')
-            # Log focus result
-            if not focused_successfully:
-                self.logger.warning('⚠️ All focus strategies failed, typing without explicit focus')
-            # Type the text character by character
-            for char in text:
-                # Send keydown (without text to avoid duplication)
-                await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
-                    params={
-                        'type': 'keyDown',
-                        'key': char,
-                    },
-                    session_id=cdp_session.session_id,
-                )
-                # Send char (for actual text input)
-                await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
-                    params={
-                        'type': 'char',
-                        'text': char,
-                        'key': char,
-                    },
-                    session_id=cdp_session.session_id,
-                )
-                # Send keyup (without text to avoid duplication)
-                await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
-                    params={
-                        'type': 'keyUp',
-                        'key': char,
-                    },
-                    session_id=cdp_session.session_id,
-                )
-                # Small delay between characters
-                await asyncio.sleep(0.01)
-            # Return coordinates metadata if available
-            return input_coordinates
-        except Exception as e:
-            self.logger.error(f'Failed to input text via CDP: {type(e).__name__}: {e}')
-            raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
+            raise

vibesurf 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

Potentially problematic release.

vibesurf 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl