vibesurf 0.1.9a6__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (69) hide show
  1. vibe_surf/_version.py +2 -2
  2. vibe_surf/agents/browser_use_agent.py +68 -45
  3. vibe_surf/agents/prompts/report_writer_prompt.py +73 -0
  4. vibe_surf/agents/prompts/vibe_surf_prompt.py +85 -172
  5. vibe_surf/agents/report_writer_agent.py +380 -226
  6. vibe_surf/agents/vibe_surf_agent.py +878 -814
  7. vibe_surf/agents/views.py +130 -0
  8. vibe_surf/backend/api/activity.py +3 -1
  9. vibe_surf/backend/api/browser.py +70 -0
  10. vibe_surf/backend/api/config.py +8 -5
  11. vibe_surf/backend/api/files.py +59 -50
  12. vibe_surf/backend/api/models.py +2 -2
  13. vibe_surf/backend/api/task.py +47 -13
  14. vibe_surf/backend/database/manager.py +24 -18
  15. vibe_surf/backend/database/queries.py +199 -192
  16. vibe_surf/backend/database/schemas.py +1 -1
  17. vibe_surf/backend/main.py +80 -3
  18. vibe_surf/backend/shared_state.py +30 -35
  19. vibe_surf/backend/utils/encryption.py +3 -1
  20. vibe_surf/backend/utils/llm_factory.py +41 -36
  21. vibe_surf/browser/agent_browser_session.py +308 -62
  22. vibe_surf/browser/browser_manager.py +71 -100
  23. vibe_surf/browser/utils.py +5 -3
  24. vibe_surf/browser/watchdogs/dom_watchdog.py +0 -45
  25. vibe_surf/chrome_extension/background.js +88 -0
  26. vibe_surf/chrome_extension/manifest.json +3 -1
  27. vibe_surf/chrome_extension/scripts/api-client.js +13 -0
  28. vibe_surf/chrome_extension/scripts/file-manager.js +482 -0
  29. vibe_surf/chrome_extension/scripts/history-manager.js +658 -0
  30. vibe_surf/chrome_extension/scripts/modal-manager.js +487 -0
  31. vibe_surf/chrome_extension/scripts/session-manager.js +52 -11
  32. vibe_surf/chrome_extension/scripts/settings-manager.js +1214 -0
  33. vibe_surf/chrome_extension/scripts/ui-manager.js +1530 -3163
  34. vibe_surf/chrome_extension/sidepanel.html +47 -7
  35. vibe_surf/chrome_extension/styles/activity.css +934 -0
  36. vibe_surf/chrome_extension/styles/base.css +76 -0
  37. vibe_surf/chrome_extension/styles/history-modal.css +791 -0
  38. vibe_surf/chrome_extension/styles/input.css +568 -0
  39. vibe_surf/chrome_extension/styles/layout.css +186 -0
  40. vibe_surf/chrome_extension/styles/responsive.css +454 -0
  41. vibe_surf/chrome_extension/styles/settings-environment.css +165 -0
  42. vibe_surf/chrome_extension/styles/settings-forms.css +389 -0
  43. vibe_surf/chrome_extension/styles/settings-modal.css +141 -0
  44. vibe_surf/chrome_extension/styles/settings-profiles.css +244 -0
  45. vibe_surf/chrome_extension/styles/settings-responsive.css +144 -0
  46. vibe_surf/chrome_extension/styles/settings-utilities.css +25 -0
  47. vibe_surf/chrome_extension/styles/variables.css +54 -0
  48. vibe_surf/cli.py +5 -22
  49. vibe_surf/common.py +35 -0
  50. vibe_surf/llm/openai_compatible.py +148 -93
  51. vibe_surf/logger.py +99 -0
  52. vibe_surf/{controller/vibesurf_tools.py → tools/browser_use_tools.py} +233 -221
  53. vibe_surf/tools/file_system.py +415 -0
  54. vibe_surf/{controller → tools}/mcp_client.py +4 -3
  55. vibe_surf/tools/report_writer_tools.py +21 -0
  56. vibe_surf/tools/vibesurf_tools.py +657 -0
  57. vibe_surf/tools/views.py +120 -0
  58. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/METADATA +23 -3
  59. vibesurf-0.1.11.dist-info/RECORD +93 -0
  60. vibe_surf/chrome_extension/styles/main.css +0 -2338
  61. vibe_surf/chrome_extension/styles/settings.css +0 -1100
  62. vibe_surf/controller/file_system.py +0 -53
  63. vibe_surf/controller/views.py +0 -37
  64. vibesurf-0.1.9a6.dist-info/RECORD +0 -71
  65. /vibe_surf/{controller → tools}/__init__.py +0 -0
  66. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/WHEEL +0 -0
  67. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/entry_points.txt +0 -0
  68. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/licenses/LICENSE +0 -0
  69. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ from pydantic import Field
11
11
  from browser_use.browser.events import (
12
12
  NavigationCompleteEvent,
13
13
  )
14
+ from browser_use.utils import _log_pretty_url, is_new_tab_page, time_execution_async
14
15
  import time
15
16
  from browser_use.browser.profile import BrowserProfile
16
17
  from browser_use.browser.views import BrowserStateSummary
@@ -18,71 +19,96 @@ from browser_use.dom.views import TargetInfo
18
19
  from vibe_surf.browser.agen_browser_profile import AgentBrowserProfile
19
20
  from typing import Self
20
21
  from uuid_extensions import uuid7str
22
+ import httpx
23
+ from browser_use.browser.views import BrowserStateSummary, TabInfo
24
+ from browser_use.dom.views import EnhancedDOMTreeNode, TargetInfo
25
+ from browser_use.observability import observe_debug
26
+ from cdp_use import CDPClient
27
+ from browser_use.browser.events import (
28
+ AgentFocusChangedEvent,
29
+ BrowserConnectedEvent,
30
+ BrowserErrorEvent,
31
+ BrowserLaunchEvent,
32
+ BrowserLaunchResult,
33
+ BrowserStartEvent,
34
+ BrowserStateRequestEvent,
35
+ BrowserStopEvent,
36
+ BrowserStoppedEvent,
37
+ CloseTabEvent,
38
+ FileDownloadedEvent,
39
+ NavigateToUrlEvent,
40
+ NavigationCompleteEvent,
41
+ NavigationStartedEvent,
42
+ SwitchTabEvent,
43
+ TabClosedEvent,
44
+ TabCreatedEvent,
45
+ )
21
46
 
22
47
  DEFAULT_BROWSER_PROFILE = AgentBrowserProfile()
23
48
 
49
+
24
50
  class AgentBrowserSession(BrowserSession):
25
51
  """Isolated browser session for a specific agent."""
26
52
 
27
53
  def __init__(
28
- self,
29
- # Core configuration
30
- id: str | None = None,
31
- cdp_url: str | None = None,
32
- is_local: bool = False,
33
- browser_profile: AgentBrowserProfile | None = None,
34
- # Custom AgentBrowserSession fields
35
- main_browser_session: BrowserSession | None = None,
36
- # BrowserProfile fields that can be passed directly
37
- # From BrowserConnectArgs
38
- headers: dict[str, str] | None = None,
39
- # From BrowserLaunchArgs
40
- env: dict[str, str | float | bool] | None = None,
41
- executable_path: str | Path | None = None,
42
- headless: bool | None = None,
43
- args: list[str] | None = None,
44
- ignore_default_args: list[str] | list[bool] | None = None,
45
- channel: str | None = None,
46
- chromium_sandbox: bool | None = None,
47
- devtools: bool | None = None,
48
- downloads_path: str | Path | None = None,
49
- traces_dir: str | Path | None = None,
50
- # From BrowserContextArgs
51
- accept_downloads: bool | None = None,
52
- permissions: list[str] | None = None,
53
- user_agent: str | None = None,
54
- screen: dict | None = None,
55
- viewport: dict | None = None,
56
- no_viewport: bool | None = None,
57
- device_scale_factor: float | None = None,
58
- record_har_content: str | None = None,
59
- record_har_mode: str | None = None,
60
- record_har_path: str | Path | None = None,
61
- record_video_dir: str | Path | None = None,
62
- # From BrowserLaunchPersistentContextArgs
63
- user_data_dir: str | Path | None = None,
64
- # From BrowserNewContextArgs
65
- storage_state: str | Path | dict[str, Any] | None = None,
66
- # BrowserProfile specific fields
67
- disable_security: bool | None = None,
68
- deterministic_rendering: bool | None = None,
69
- allowed_domains: list[str] | None = None,
70
- keep_alive: bool | None = None,
71
- proxy: any | None = None,
72
- enable_default_extensions: bool | None = None,
73
- window_size: dict | None = None,
74
- window_position: dict | None = None,
75
- cross_origin_iframes: bool | None = None,
76
- minimum_wait_page_load_time: float | None = None,
77
- wait_for_network_idle_page_load_time: float | None = None,
78
- wait_between_actions: float | None = None,
79
- highlight_elements: bool | None = None,
80
- filter_highlight_ids: bool | None = None,
81
- auto_download_pdfs: bool | None = None,
82
- profile_directory: str | None = None,
83
- cookie_whitelist_domains: list[str] | None = None,
84
- # AgentBrowserProfile specific fields
85
- custom_extensions: list[str] | None = None,
54
+ self,
55
+ # Core configuration
56
+ id: str | None = None,
57
+ cdp_url: str | None = None,
58
+ is_local: bool = False,
59
+ browser_profile: AgentBrowserProfile | None = None,
60
+ # Custom AgentBrowserSession fields
61
+ main_browser_session: BrowserSession | None = None,
62
+ # BrowserProfile fields that can be passed directly
63
+ # From BrowserConnectArgs
64
+ headers: dict[str, str] | None = None,
65
+ # From BrowserLaunchArgs
66
+ env: dict[str, str | float | bool] | None = None,
67
+ executable_path: str | Path | None = None,
68
+ headless: bool | None = None,
69
+ args: list[str] | None = None,
70
+ ignore_default_args: list[str] | list[bool] | None = None,
71
+ channel: str | None = None,
72
+ chromium_sandbox: bool | None = None,
73
+ devtools: bool | None = None,
74
+ downloads_path: str | Path | None = None,
75
+ traces_dir: str | Path | None = None,
76
+ # From BrowserContextArgs
77
+ accept_downloads: bool | None = None,
78
+ permissions: list[str] | None = None,
79
+ user_agent: str | None = None,
80
+ screen: dict | None = None,
81
+ viewport: dict | None = None,
82
+ no_viewport: bool | None = None,
83
+ device_scale_factor: float | None = None,
84
+ record_har_content: str | None = None,
85
+ record_har_mode: str | None = None,
86
+ record_har_path: str | Path | None = None,
87
+ record_video_dir: str | Path | None = None,
88
+ # From BrowserLaunchPersistentContextArgs
89
+ user_data_dir: str | Path | None = None,
90
+ # From BrowserNewContextArgs
91
+ storage_state: str | Path | dict[str, Any] | None = None,
92
+ # BrowserProfile specific fields
93
+ disable_security: bool | None = None,
94
+ deterministic_rendering: bool | None = None,
95
+ allowed_domains: list[str] | None = None,
96
+ keep_alive: bool | None = None,
97
+ proxy: any | None = None,
98
+ enable_default_extensions: bool | None = None,
99
+ window_size: dict | None = None,
100
+ window_position: dict | None = None,
101
+ cross_origin_iframes: bool | None = None,
102
+ minimum_wait_page_load_time: float | None = None,
103
+ wait_for_network_idle_page_load_time: float | None = None,
104
+ wait_between_actions: float | None = None,
105
+ highlight_elements: bool | None = None,
106
+ filter_highlight_ids: bool | None = None,
107
+ auto_download_pdfs: bool | None = None,
108
+ profile_directory: str | None = None,
109
+ cookie_whitelist_domains: list[str] | None = None,
110
+ # AgentBrowserProfile specific fields
111
+ custom_extensions: list[str] | None = None,
86
112
  ):
87
113
  # Filter out AgentBrowserSession specific parameters
88
114
  agent_session_params = {
@@ -91,8 +117,8 @@ class AgentBrowserSession(BrowserSession):
91
117
 
92
118
  # Get all browser profile parameters
93
119
  profile_kwargs = {k: v for k, v in locals().items()
94
- if k not in ['self', 'browser_profile', 'id', 'main_browser_session']
95
- and v is not None}
120
+ if k not in ['self', 'browser_profile', 'id', 'main_browser_session']
121
+ and v is not None}
96
122
 
97
123
  # Apply BrowserSession's is_local logic first
98
124
  effective_is_local = is_local
@@ -131,6 +157,155 @@ class AgentBrowserSession(BrowserSession):
131
157
  )
132
158
  main_browser_session: BrowserSession | None = Field(default=None)
133
159
 
160
+ async def connect(self, cdp_url: str | None = None) -> Self:
161
+ """Connect to a remote chromium-based browser via CDP using cdp-use.
162
+
163
+ This MUST succeed or the browser is unusable. Fails hard on any error.
164
+ """
165
+
166
+ self.browser_profile.cdp_url = cdp_url or self.cdp_url
167
+ if not self.cdp_url:
168
+ raise RuntimeError('Cannot setup CDP connection without CDP URL')
169
+
170
+ if not self.cdp_url.startswith('ws'):
171
+ # If it's an HTTP URL, fetch the WebSocket URL from /json/version endpoint
172
+ url = self.cdp_url.rstrip('/')
173
+ if not url.endswith('/json/version'):
174
+ url = url + '/json/version'
175
+
176
+ # Run a tiny HTTP client to query for the WebSocket URL from the /json/version endpoint
177
+ async with httpx.AsyncClient() as client:
178
+ headers = self.browser_profile.headers or {}
179
+ version_info = await client.get(url, headers=headers)
180
+ self.browser_profile.cdp_url = version_info.json()['webSocketDebuggerUrl']
181
+
182
+ assert self.cdp_url is not None
183
+
184
+ browser_location = 'local browser' if self.is_local else 'remote browser'
185
+ self.logger.debug(
186
+ f'🌎 Connecting to existing chromium-based browser via CDP: {self.cdp_url} -> ({browser_location})')
187
+
188
+ try:
189
+ # Import cdp-use client
190
+
191
+ # Convert HTTP URL to WebSocket URL if needed
192
+
193
+ # Create and store the CDP client for direct CDP communication
194
+ self._cdp_client_root = CDPClient(self.cdp_url)
195
+ assert self._cdp_client_root is not None
196
+ await self._cdp_client_root.start()
197
+ await self._cdp_client_root.send.Target.setAutoAttach(
198
+ params={'autoAttach': True, 'waitForDebuggerOnStart': False, 'flatten': True}
199
+ )
200
+ self.logger.debug('CDP client connected successfully')
201
+
202
+ # Get browser targets to find available contexts/pages
203
+ targets = await self._cdp_client_root.send.Target.getTargets()
204
+
205
+ # Find main browser pages (avoiding iframes, workers, extensions, etc.)
206
+ page_targets: list[TargetInfo] = [
207
+ t
208
+ for t in targets['targetInfos']
209
+ if self._is_valid_target(
210
+ t, include_http=True, include_about=True, include_pages=True, include_iframes=False,
211
+ include_workers=False
212
+ )
213
+ ]
214
+
215
+ # Check for chrome://newtab pages and immediately redirect them
216
+ # to about:blank to avoid JS issues from CDP on chrome://* urls
217
+ from browser_use.utils import is_new_tab_page
218
+
219
+ # Collect all targets that need redirection
220
+ redirected_targets = []
221
+ redirect_sessions = {} # Store sessions created for redirection to potentially reuse
222
+ for target in page_targets:
223
+ target_url = target.get('url', '')
224
+ if is_new_tab_page(target_url) and target_url != '':
225
+ # Redirect chrome://newtab to about:blank to avoid JS issues preventing driving chrome://newtab
226
+ target_id = target['targetId']
227
+ self.logger.debug(f'🔄 Redirecting {target_url} to about:blank for target {target_id}')
228
+ try:
229
+ # Create a CDP session for redirection (minimal domains to avoid duplicate event handlers)
230
+ # Only enable Page domain for navigation, avoid duplicate event handlers
231
+ redirect_session = await CDPSession.for_target(self._cdp_client_root, target_id,
232
+ domains=['Page'])
233
+ # Navigate to about:blank
234
+ await redirect_session.cdp_client.send.Page.navigate(
235
+ params={'url': ''}, session_id=redirect_session.session_id
236
+ )
237
+ redirected_targets.append(target_id)
238
+ redirect_sessions[target_id] = redirect_session # Store for potential reuse
239
+ # Update the target's URL in our list for later use
240
+ target['url'] = ''
241
+ # Small delay to ensure navigation completes
242
+ await asyncio.sleep(0.1)
243
+ except Exception as e:
244
+ self.logger.warning(f'Failed to redirect {target_url} to about:blank: {e}')
245
+
246
+ # Log summary of redirections
247
+ if redirected_targets:
248
+ self.logger.debug(f'Redirected {len(redirected_targets)} chrome://newtab pages to about:blank')
249
+
250
+ if not page_targets:
251
+ # No pages found, create a new one
252
+ new_target = await self._cdp_client_root.send.Target.createTarget(params={'url': ''})
253
+ target_id = new_target['targetId']
254
+ self.logger.debug(f'📄 Created new blank page with target ID: {target_id}')
255
+ else:
256
+ # Use the first available page
257
+ target_id = [page for page in page_targets if page.get('type') == 'page'][0]['targetId']
258
+ self.logger.debug(f'📄 Using existing page with target ID: {target_id}')
259
+
260
+ # Store the current page target ID and add to pool
261
+ # Reuse redirect session if available, otherwise create new one
262
+ if target_id in redirect_sessions:
263
+ self.logger.debug(f'Reusing redirect session for target {target_id}')
264
+ self.agent_focus = redirect_sessions[target_id]
265
+ else:
266
+ # For the initial connection, we'll use the shared root WebSocket
267
+ self.agent_focus = await CDPSession.for_target(self._cdp_client_root, target_id, new_socket=False)
268
+ if self.agent_focus:
269
+ self._cdp_session_pool[target_id] = self.agent_focus
270
+
271
+ # Enable proxy authentication handling if configured
272
+ await self._setup_proxy_auth()
273
+
274
+ # Verify the session is working
275
+ try:
276
+ if self.agent_focus:
277
+ assert self.agent_focus.title != 'Unknown title'
278
+ else:
279
+ raise RuntimeError('Failed to create CDP session')
280
+ except Exception as e:
281
+ self.logger.warning(f'Failed to create CDP session: {e}')
282
+ raise
283
+
284
+ # Dispatch TabCreatedEvent for all initial tabs (so watchdogs can initialize)
285
+ # This replaces the duplicated logic from navigation_watchdog's _initialize_agent_focus
286
+ for idx, target in enumerate(page_targets):
287
+ target_url = target.get('url', '')
288
+ self.logger.debug(f'Dispatching TabCreatedEvent for initial tab {idx}: {target_url}')
289
+ await self.event_bus.dispatch(TabCreatedEvent(url=target_url, target_id=target['targetId']))
290
+
291
+ # Dispatch initial focus event
292
+ if page_targets:
293
+ initial_url = page_targets[0].get('url', '')
294
+ await self.event_bus.dispatch(
295
+ AgentFocusChangedEvent(target_id=page_targets[0]['targetId'], url=initial_url))
296
+ self.logger.debug(f'Initial agent focus set to tab 0: {initial_url}')
297
+
298
+ except Exception as e:
299
+ # Fatal error - browser is not usable without CDP connection
300
+ self.logger.error(f'❌ FATAL: Failed to setup CDP connection: {e}')
301
+ self.logger.error('❌ Browser cannot continue without CDP connection')
302
+ # Clean up any partial state
303
+ self._cdp_client_root = None
304
+ self.agent_focus = None
305
+ # Re-raise as a fatal error
306
+ raise RuntimeError(f'Failed to establish CDP connection to browser: {e}') from e
307
+
308
+ return self
134
309
 
135
310
  async def connect_agent(self, target_id: str) -> Self:
136
311
  """Register agent to browser with optional target assignment."""
@@ -286,7 +461,7 @@ class AgentBrowserSession(BrowserSession):
286
461
  return
287
462
  await self.get_or_create_cdp_session(self.agent_focus.target_id, focus=True)
288
463
 
289
- async def navigate_to_url(self, url: str, new_tab: bool = False) -> None:
464
+ async def navigate_to_url(self, url: str, new_tab: bool = False) -> Optional[str]:
290
465
  """
291
466
  Concurrent navigation method that bypasses serial bottlenecks in on_NavigateToUrlEvent.
292
467
 
@@ -294,7 +469,7 @@ class AgentBrowserSession(BrowserSession):
294
469
  """
295
470
  if not self.agent_focus:
296
471
  self.logger.warning('Cannot navigate - browser not connected')
297
- return
472
+ return None
298
473
 
299
474
  target_id = None
300
475
 
@@ -351,6 +526,8 @@ class AgentBrowserSession(BrowserSession):
351
526
  )
352
527
  )
353
528
  raise
529
+ finally:
530
+ return target_id
354
531
 
355
532
  async def _wait_for_stable_network(self):
356
533
  """Wait for page stability - simplified for CDP-only branch."""
@@ -477,3 +654,72 @@ class AgentBrowserSession(BrowserSession):
477
654
  include_recent_events=include_recent_events
478
655
  )
479
656
  return browser_state
657
+
658
+ @observe_debug(ignore_input=True, ignore_output=True, name='get_tabs')
659
+ async def get_tabs(self) -> list[TabInfo]:
660
+ """Get information about all open tabs using CDP Target.getTargetInfo for speed."""
661
+ tabs = []
662
+
663
+ # Safety check - return empty list if browser not connected yet
664
+ if not self._cdp_client_root:
665
+ return tabs
666
+
667
+ # Get all page targets using CDP
668
+ pages = await self._cdp_get_all_pages()
669
+
670
+ for i, page_target in enumerate(pages):
671
+ target_id = page_target['targetId']
672
+ url = page_target['url']
673
+
674
+ # Try to get the title directly from Target.getTargetInfo - much faster!
675
+ # The initial getTargets() doesn't include title, but getTargetInfo does
676
+ try:
677
+ target_info = await self.cdp_client.send.Target.getTargetInfo(params={'targetId': target_id})
678
+ # The title is directly available in targetInfo
679
+ title = target_info.get('targetInfo', {}).get('title', '')
680
+
681
+ # Skip JS execution for chrome:// pages and new tab pages
682
+ if not title:
683
+ # For chrome:// pages without a title, use the URL itself
684
+ title = url
685
+
686
+ # Special handling for PDF pages without titles
687
+ if (not title or title == '') and (url.endswith('.pdf') or 'pdf' in url):
688
+ # PDF pages might not have a title, use URL filename
689
+ try:
690
+ from urllib.parse import urlparse
691
+
692
+ filename = urlparse(url).path.split('/')[-1]
693
+ if filename:
694
+ title = filename
695
+ except Exception:
696
+ pass
697
+
698
+ except Exception as e:
699
+ # Fallback to basic title handling
700
+ self.logger.debug(
701
+ f'⚠️ Failed to get target info for tab #{i}: {_log_pretty_url(url)} - {type(e).__name__}')
702
+ title = ''
703
+
704
+ tab_info = TabInfo(
705
+ target_id=target_id,
706
+ url=url,
707
+ title=title,
708
+ parent_target_id=None,
709
+ )
710
+ tabs.append(tab_info)
711
+
712
+ return tabs
713
+
714
+ async def refresh_page(self):
715
+ cdp_session = await self.browser_session.get_or_create_cdp_session()
716
+ try:
717
+ # Reload the target
718
+ await cdp_session.cdp_client.send.Page.reload(session_id=cdp_session.session_id)
719
+
720
+ # Wait for reload
721
+ await asyncio.sleep(1.0)
722
+
723
+ self.logger.info('🔄 Target refreshed')
724
+ except Exception as e:
725
+ raise
@@ -16,7 +16,9 @@ from vibe_surf.browser.agent_browser_session import AgentBrowserSession
16
16
  if TYPE_CHECKING:
17
17
  from browser_use.browser.session import BrowserSession
18
18
 
19
- logger = logging.getLogger(__name__)
19
+ from vibe_surf.logger import get_logger
20
+
21
+ logger = get_logger(__name__)
20
22
 
21
23
 
22
24
  class BrowserManager:
@@ -62,7 +64,7 @@ class BrowserManager:
62
64
  agent_session._cdp_client_root = await self._get_root_cdp_client()
63
65
  logger.info(f"🚀 Starting agent session for {agent_id} to initialize watchdogs...")
64
66
  await agent_session.start()
65
-
67
+
66
68
  self._agent_sessions[agent_id] = agent_session
67
69
  await self.assign_target_to_agent(agent_id, target_id)
68
70
  return agent_session
@@ -80,16 +82,22 @@ class BrowserManager:
80
82
 
81
83
  # Validate target assignment
82
84
  if target_id:
83
- target_id_owner = self.get_target_owner(target_id)
84
- if target_id_owner and target_id_owner != agent_id:
85
- logger.warning(
86
- f"Target id: {target_id} belongs to {target_id_owner}. You cannot assign it to {target_id_owner}.")
87
- return False
85
+ try:
86
+ target_id = await self.main_browser_session.get_target_id_from_tab_id(target_id)
87
+ except Exception:
88
+ logger.warning(f"Target ID '{target_id}' not found.")
89
+ target_id = None
90
+ if target_id:
91
+ target_id_owner = self.get_target_owner(target_id)
92
+ if target_id_owner and target_id_owner != agent_id:
93
+ logger.warning(
94
+ f"Target id: {target_id} belongs to {target_id_owner}. You cannot assign it to {target_id_owner}.")
95
+ return False
88
96
 
89
97
  # Get or create available target
90
98
  if target_id is None:
91
99
  new_target = await self.main_browser_session.cdp_client.send.Target.createTarget(
92
- params={'url': ''})
100
+ params={'url': 'about:blank'})
93
101
  target_id = new_target["targetId"]
94
102
 
95
103
  await agent_session.connect_agent(target_id=target_id)
@@ -166,8 +174,6 @@ class BrowserManager:
166
174
  except Exception as e:
167
175
  logger.warning(f"Error during agent {agent_id} cleanup: {e}")
168
176
 
169
- # Note: We don't close the root browser session here as it's managed externally
170
-
171
177
  async def __aenter__(self) -> "BrowserManager":
172
178
  """Async context manager entry."""
173
179
  return self
@@ -176,121 +182,86 @@ class BrowserManager:
176
182
  """Async context manager exit."""
177
183
  await self.close()
178
184
 
179
- async def _is_target_focused(self, target_id: str) -> bool:
180
- """Check if a given target has focus using multiple detection methods."""
181
- client = self.main_browser_session.cdp_client
182
- session_id = None
183
-
184
- try:
185
- # Use document.visibilityState and document.hasFocus()
186
- attach_result = await client.send.Target.attachToTarget(
187
- params={"targetId": target_id, "flatten": True}
188
- )
189
- session_id = attach_result["sessionId"]
190
-
191
- # Check both visibility and focus
192
- combined_script = """
193
- ({
194
- hasFocus: document.hasFocus(),
195
- visibilityState: document.visibilityState,
196
- hidden: document.hidden,
197
- activeElement: document.activeElement ? document.activeElement.tagName : null,
198
- timestamp: Date.now()
199
- })
200
- """
201
-
202
- eval_result = await client.send.Runtime.evaluate(
203
- params={
204
- "expression": combined_script,
205
- "returnByValue": True
206
- },
207
- session_id=session_id
208
- )
209
-
210
- # Detach immediately after checking
211
- await client.send.Target.detachFromTarget(
212
- params={"sessionId": session_id}
213
- )
214
- session_id = None
215
-
216
- if "result" in eval_result and "value" in eval_result["result"]:
217
- focus_data = eval_result["result"]["value"]
218
- has_focus = focus_data.get("hasFocus", False)
219
- visibility_state = focus_data.get("visibilityState", "")
220
- is_hidden = focus_data.get("hidden", True)
221
-
222
- # A target is considered focused if:
223
- # 1. Document has focus OR
224
- # 2. Document is visible (not hidden)
225
- is_focused = has_focus or (visibility_state == "visible" and not is_hidden)
226
- return is_focused
227
- else:
228
- return False
185
+ async def check_browser_connected(self):
186
+ import aiohttp
229
187
 
230
- except Exception:
231
- if session_id:
232
- try:
233
- await client.send.Target.detachFromTarget(
234
- params={"sessionId": session_id}
235
- )
236
- except Exception:
237
- pass # Ignore cleanup errors
188
+ if not self.main_browser_session:
189
+ logger.info("No Main browser session available.")
238
190
  return False
239
191
 
192
+ for _ in range(5):
193
+ try:
194
+ targets = await self.main_browser_session.cdp_client.send.Target.getTargets()
195
+ await asyncio.sleep(1)
196
+ return len(targets) > 0
197
+ except Exception as e:
198
+ logger.error(f"Connect failed: {e}")
199
+ return False
200
+
240
201
  async def _get_active_target(self) -> str:
241
202
  """Get current focused target, or an available target, or create a new one."""
242
- client = self.main_browser_session.cdp_client
243
- targets_info = await client.send.Target.getTargets()
244
- page_targets = [t for t in targets_info["targetInfos"] if t["type"] == "page"]
245
-
203
+ tab_infos = await self.get_all_tabs()
246
204
  # 1. Check for a focused page among ALL pages (not just unassigned)
247
- for target in page_targets:
248
- target_id = target["targetId"]
205
+ for tab_info in tab_infos:
206
+ target_id = tab_info.target_id
249
207
  try:
250
- is_focused = await self._is_target_focused(target_id)
251
- if is_focused:
252
- return target_id
208
+ simple_check = """
209
+ ({
210
+ hasFocus: document.hasFocus(),
211
+ isVisible: document.visibilityState === 'visible',
212
+ notHidden: !document.hidden
213
+ })
214
+ """
215
+ cdb_session = await self.main_browser_session.get_or_create_cdp_session(target_id, focus=False,
216
+ new_socket=None)
217
+ eval_result = await cdb_session.cdp_client.send.Runtime.evaluate(
218
+ params={
219
+ "expression": simple_check,
220
+ "returnByValue": True
221
+ },
222
+ session_id=cdb_session.session_id
223
+ )
224
+ if "result" in eval_result and "value" in eval_result["result"]:
225
+ data = eval_result["result"]["value"]
226
+ is_visible = data.get("isVisible", False)
227
+ not_hidden = data.get("notHidden", False)
228
+ if is_visible and not_hidden:
229
+ return target_id
253
230
  except Exception as e:
231
+ logger.warning(f"Get active target {e}")
254
232
  continue # Skip invalid targets
255
233
 
256
234
  # 2. If no pages are available, create a new one
257
- if page_targets:
258
- target_id = page_targets[-1]["targetId"]
235
+ if tab_infos:
236
+ target_id = tab_infos[0].target_id
259
237
  else:
260
- new_target = await client.send.Target.createTarget(params={'url': ''})
261
- target_id = new_target["targetId"]
262
- await self.main_browser_session.get_or_create_cdp_session(target_id, focus=False)
238
+ target_id = await self.main_browser_session.navigate_to_url(url="about:blank", new_tab=True)
263
239
  return target_id
264
240
 
265
- async def _get_activate_tab_info(self) -> Optional[TabInfo]:
241
+ async def get_activate_tab(self) -> Optional[TabInfo]:
266
242
  """Get tab information for the currently active target."""
267
243
  try:
268
244
  # Get the active target ID
269
245
  active_target_id = await self._get_active_target()
270
-
246
+ if active_target_id is None:
247
+ return None
271
248
  # Get target information from CDP
272
- client = self.main_browser_session.cdp_client
273
- targets_info = await client.send.Target.getTargets()
249
+ tab_infos = await self.get_all_tabs()
274
250
 
275
251
  # Find the active target in the targets list
276
- for target in targets_info["targetInfos"]:
277
- if target["targetId"] == active_target_id and target["type"] == "page":
278
- # Get additional target info for title if needed
279
- try:
280
- target_info = await client.send.Target.getTargetInfo(
281
- params={'targetId': active_target_id}
282
- )
283
- target_details = target_info.get('targetInfo', target)
284
- except Exception:
285
- target_details = target
286
-
252
+ for tab_info in tab_infos:
253
+ if tab_info.target_id == active_target_id:
254
+ await self.main_browser_session.get_or_create_cdp_session(active_target_id, focus=True)
287
255
  # Create TabInfo object
288
256
  return TabInfo(
289
- url=target_details.get('url', ''),
290
- title=target_details.get('title', ''),
257
+ url=tab_info.url,
258
+ title=tab_info.title,
291
259
  target_id=active_target_id
292
260
  )
293
-
294
261
  return None
295
262
  except Exception:
296
263
  return None
264
+
265
+ async def get_all_tabs(self) -> list[TabInfo]:
266
+ tabs = await self.main_browser_session.get_tabs()
267
+ return tabs