vibesurf 0.1.36__py3-none-any.whl → 0.1.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -177,6 +177,22 @@ def create_llm_from_profile(llm_profile) -> BaseChatModel:
177
177
  params["region_name"] = provider_config["region_name"]
178
178
  return ChatAnthropicBedrock(**params)
179
179
 
180
+ elif provider == "qwen":
181
+ return ChatOpenAICompatible(
182
+ model=model,
183
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" or base_url,
184
+ api_key=api_key,
185
+ **common_params
186
+ )
187
+
188
+ elif provider == "kimi":
189
+ return ChatOpenAICompatible(
190
+ model=model,
191
+ base_url="https://api.moonshot.cn/v1" or base_url,
192
+ api_key=api_key,
193
+ **common_params
194
+ )
195
+
180
196
  elif provider == "openai_compatible":
181
197
  if not base_url:
182
198
  raise ValueError("OpenAI Compatible provider requires base_url")
@@ -45,6 +45,11 @@ class AgentBrowserProfile(BrowserProfile):
45
45
  'id': 'edibdbjcniadpccecjdfdjjppcpchdlm',
46
46
  'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=130&acceptformat=crx3&x=id%3Dedibdbjcniadpccecjdfdjjppcpchdlm%26uc',
47
47
  },
48
+ {
49
+ 'name': 'Force Background Tab',
50
+ 'id': 'gidlfommnbibbmegmgajdbikelkdcmcl',
51
+ 'url': 'https://clients2.google.com/service/update2/crx?response=redirect&prodversion=133&acceptformat=crx3&x=id%3Dgidlfommnbibbmegmgajdbikelkdcmcl%26uc',
52
+ },
48
53
  # {
49
54
  # 'name': 'ClearURLs',
50
55
  # 'id': 'lckanjgmijmafbedllaakclkaicjfmnk',
@@ -4,7 +4,7 @@ import asyncio
4
4
  import os
5
5
  import pdb
6
6
  from pathlib import Path
7
- from typing import Any, List, Optional
7
+ from typing import TYPE_CHECKING, Any, Literal, Self, Union, cast, Optional
8
8
 
9
9
  from browser_use.browser.session import BrowserSession, CDPSession
10
10
  from pydantic import Field
@@ -68,7 +68,7 @@ class AgentBrowserSession(BrowserSession):
68
68
  executable_path: str | Path | None = None,
69
69
  headless: bool | None = None,
70
70
  args: list[str] | None = None,
71
- ignore_default_args: list[str] | list[bool] | None = None,
71
+ ignore_default_args: list[str] | Literal[True] | None = None,
72
72
  channel: str | None = None,
73
73
  chromium_sandbox: bool | None = None,
74
74
  devtools: bool | None = None,
@@ -86,11 +86,15 @@ class AgentBrowserSession(BrowserSession):
86
86
  record_har_mode: str | None = None,
87
87
  record_har_path: str | Path | None = None,
88
88
  record_video_dir: str | Path | None = None,
89
+ record_video_framerate: int | None = None,
90
+ record_video_size: dict | None = None,
89
91
  # From BrowserLaunchPersistentContextArgs
90
92
  user_data_dir: str | Path | None = None,
91
93
  # From BrowserNewContextArgs
92
94
  storage_state: str | Path | dict[str, Any] | None = None,
93
95
  # BrowserProfile specific fields
96
+ use_cloud: bool | None = None,
97
+ cloud_browser: bool | None = None, # Backward compatibility alias
94
98
  disable_security: bool | None = None,
95
99
  deterministic_rendering: bool | None = None,
96
100
  allowed_domains: list[str] | None = None,
@@ -99,15 +103,21 @@ class AgentBrowserSession(BrowserSession):
99
103
  enable_default_extensions: bool | None = None,
100
104
  window_size: dict | None = None,
101
105
  window_position: dict | None = None,
102
- cross_origin_iframes: bool | None = None,
103
106
  minimum_wait_page_load_time: float | None = None,
104
107
  wait_for_network_idle_page_load_time: float | None = None,
105
108
  wait_between_actions: float | None = None,
106
- highlight_elements: bool | None = None,
107
109
  filter_highlight_ids: bool | None = None,
108
110
  auto_download_pdfs: bool | None = None,
109
111
  profile_directory: str | None = None,
110
112
  cookie_whitelist_domains: list[str] | None = None,
113
+ # DOM extraction layer configuration
114
+ cross_origin_iframes: bool | None = None,
115
+ highlight_elements: bool | None = None,
116
+ dom_highlight_elements: bool | None = None,
117
+ paint_order_filtering: bool | None = None,
118
+ # Iframe processing limits
119
+ max_iframes: int | None = None,
120
+ max_iframe_depth: int | None = None,
111
121
  # AgentBrowserProfile specific fields
112
122
  custom_extensions: list[str] | None = None,
113
123
  ):
@@ -585,17 +595,19 @@ class AgentBrowserSession(BrowserSession):
585
595
  f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: Network waiting failed: {e}, continuing anyway...'
586
596
  )
587
597
 
588
- async def take_screenshot(self, target_id: Optional[str] = None, format: str = 'png') -> str:
598
+ async def take_screenshot(self, target_id: Optional[str] = None,
599
+ path: str | None = None,
600
+ full_page: bool = False,
601
+ format: str = 'png',
602
+ quality: int | None = None,
603
+ clip: dict | None = None,
604
+ ) -> bytes:
589
605
  """
590
606
  Concurrent screenshot method that bypasses serial bottlenecks in ScreenshotWatchdog.
591
607
 
592
608
  This method performs direct CDP calls for maximum concurrency.
593
609
  """
594
- if target_id is None:
595
- if not self.agent_focus:
596
- self.logger.warning('No page focus to get html, please specify a target id.')
597
- return ''
598
- target_id = self.agent_focus.target_id
610
+
599
611
  cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
600
612
  await self._wait_for_stable_network()
601
613
 
@@ -607,13 +619,98 @@ class AgentBrowserSession(BrowserSession):
607
619
  pass
608
620
 
609
621
  try:
622
+ import base64
610
623
  from cdp_use.cdp.page import CaptureScreenshotParameters
611
- # Direct CDP screenshot - bypasses all event system overhead
612
- params = CaptureScreenshotParameters(format=format, captureBeyondViewport=False, quality=90)
613
- result = await cdp_session.cdp_client.send.Page.captureScreenshot(
614
- params=params,
615
- session_id=cdp_session.session_id,
624
+
625
+ # Build parameters dict explicitly to satisfy TypedDict expectations
626
+ params: CaptureScreenshotParameters = {
627
+ 'format': format,
628
+ 'captureBeyondViewport': full_page,
629
+ }
630
+
631
+ if quality is not None and format == 'jpeg':
632
+ params['quality'] = quality
633
+
634
+ if clip:
635
+ params['clip'] = {
636
+ 'x': clip['x'],
637
+ 'y': clip['y'],
638
+ 'width': clip['width'],
639
+ 'height': clip['height'],
640
+ 'scale': 1,
641
+ }
642
+
643
+ params = CaptureScreenshotParameters(**params)
644
+
645
+ result = await cdp_session.cdp_client.send.Page.captureScreenshot(params=params,
646
+ session_id=cdp_session.session_id)
647
+
648
+ if not result or 'data' not in result:
649
+ raise Exception('Screenshot failed - no data returned')
650
+
651
+ screenshot_data = base64.b64decode(result['data'])
652
+
653
+ if path:
654
+ Path(path).write_bytes(screenshot_data)
655
+
656
+ return screenshot_data
657
+
658
+ except Exception as e:
659
+ self.logger.error(f'Concurrent screenshot failed: {type(e).__name__}: {e}')
660
+ raise
661
+
662
+ async def take_screenshot_base64(self, target_id: Optional[str] = None,
663
+ full_page: bool = False,
664
+ format: str = 'png',
665
+ quality: int | None = None,
666
+ clip: dict | None = None,
667
+ ) -> str:
668
+ """
669
+ Concurrent screenshot method that bypasses serial bottlenecks in ScreenshotWatchdog.
670
+
671
+ This method performs direct CDP calls for maximum concurrency.
672
+ """
673
+
674
+ cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
675
+ await self._wait_for_stable_network()
676
+
677
+ try:
678
+ ready_state = await cdp_session.cdp_client.send.Runtime.evaluate(
679
+ params={'expression': 'document.readyState'}, session_id=cdp_session.session_id
616
680
  )
681
+ except Exception:
682
+ pass
683
+
684
+ try:
685
+ import base64
686
+ from cdp_use.cdp.page import CaptureScreenshotParameters
687
+
688
+ # Build parameters dict explicitly to satisfy TypedDict expectations
689
+ params: CaptureScreenshotParameters = {
690
+ 'format': format,
691
+ 'captureBeyondViewport': full_page,
692
+ }
693
+
694
+ if quality is not None and format == 'jpeg':
695
+ params['quality'] = quality
696
+
697
+ if clip:
698
+ params['clip'] = {
699
+ 'x': clip['x'],
700
+ 'y': clip['y'],
701
+ 'width': clip['width'],
702
+ 'height': clip['height'],
703
+ 'scale': 1,
704
+ }
705
+
706
+ params = CaptureScreenshotParameters(**params)
707
+
708
+ result = await cdp_session.cdp_client.send.Page.captureScreenshot(params=params,
709
+ session_id=cdp_session.session_id)
710
+
711
+ if not result or 'data' not in result:
712
+ raise Exception('Screenshot failed - no data returned')
713
+
617
714
  return result['data']
618
715
 
619
716
  except Exception as e:
@@ -625,12 +722,8 @@ class AgentBrowserSession(BrowserSession):
625
722
  Get html content of current page
626
723
  :return:
627
724
  """
628
- if target_id is None:
629
- if not self.agent_focus:
630
- self.logger.warning('No page focus to get html, please specify a target id.')
631
- return ''
632
- target_id = self.agent_focus.target_id
633
- cdp_session = await self.get_or_create_cdp_session(target_id, focus=True)
725
+
726
+ cdp_session = await self.get_or_create_cdp_session(target_id, focus=False)
634
727
  await self._wait_for_stable_network()
635
728
 
636
729
  try:
@@ -654,7 +747,6 @@ class AgentBrowserSession(BrowserSession):
654
747
 
655
748
  async def get_browser_state_summary(
656
749
  self,
657
- cache_clickable_elements_hashes: bool = True,
658
750
  include_screenshot: bool = True,
659
751
  cached: bool = False,
660
752
  include_recent_events: bool = False,
@@ -677,7 +769,6 @@ class AgentBrowserSession(BrowserSession):
677
769
  browser_state = await self._dom_watchdog.get_browser_state_no_event_bus(
678
770
  include_dom=True,
679
771
  include_screenshot=include_screenshot,
680
- cache_clickable_elements_hashes=cache_clickable_elements_hashes,
681
772
  include_recent_events=include_recent_events
682
773
  )
683
774
  return browser_state
@@ -738,9 +829,9 @@ class AgentBrowserSession(BrowserSession):
738
829
 
739
830
  return tabs
740
831
 
741
- async def refresh_page(self):
742
- cdp_session = await self.browser_session.get_or_create_cdp_session()
832
+ async def refresh_page(self, target_id: Optional[str] = None, ):
743
833
  try:
834
+ cdp_session = await self.browser_session.get_or_create_cdp_session(target_id)
744
835
  # Reload the target
745
836
  await cdp_session.cdp_client.send.Page.reload(session_id=cdp_session.session_id)
746
837
 
@@ -20,86 +20,4 @@ from browser_use.browser.watchdog_base import BaseWatchdog
20
20
  from browser_use.dom.service import EnhancedDOMTreeNode
21
21
 
22
22
  class CustomActionWatchdog(DefaultActionWatchdog):
23
- async def on_ClickElementEvent(self, event: ClickElementEvent) -> None:
24
- """Handle click request with CDP."""
25
- try:
26
- # Check if session is alive before attempting any operations
27
- if not self.browser_session.agent_focus or not self.browser_session.agent_focus.target_id:
28
- error_msg = 'Cannot execute click: browser session is corrupted (target_id=None). Session may have crashed.'
29
- self.logger.error(f'⚠️ {error_msg}')
30
- raise BrowserError(error_msg)
31
-
32
- # Use the provided node
33
- element_node = event.node
34
- index_for_logging = element_node.element_index or 'unknown'
35
- starting_target_id = self.browser_session.agent_focus.target_id
36
-
37
- # Track initial number of tabs to detect new tab opening
38
- if hasattr(self.browser_session, "main_browser_session") and self.browser_session.main_browser_session:
39
- initial_target_ids = await self.browser_session.main_browser_session._cdp_get_all_pages()
40
- else:
41
- initial_target_ids = await self.browser_session._cdp_get_all_pages()
42
-
43
- # Check if element is a file input (should not be clicked)
44
- if self.browser_session.is_file_input(element_node):
45
- msg = f'Index {index_for_logging} - has an element which opens file upload dialog. To upload files please use a specific function to upload files'
46
- self.logger.info(msg)
47
- raise BrowserError(
48
- message=msg,
49
- long_term_memory=msg,
50
- )
51
-
52
- # Perform the actual click using internal implementation
53
- click_metadata = None
54
- click_metadata = await self._click_element_node_impl(element_node,
55
- while_holding_ctrl=event.while_holding_ctrl)
56
- download_path = None # moved to downloads_watchdog.py
57
-
58
- # Build success message
59
- if download_path:
60
- msg = f'Downloaded file to {download_path}'
61
- self.logger.info(f'💾 {msg}')
62
- else:
63
- msg = f'Clicked button with index {index_for_logging}: {element_node.get_all_children_text(max_depth=2)}'
64
- self.logger.debug(f'🖱️ {msg}')
65
- self.logger.debug(f'Element xpath: {element_node.xpath}')
66
-
67
- # Wait a bit for potential new tab to be created
68
- # This is necessary because tab creation is async and might not be immediate
69
- await asyncio.sleep(0.5)
70
-
71
- # Clear cached state after click action since DOM might have changed
72
- self.browser_session.agent_focus = await self.browser_session.get_or_create_cdp_session(
73
- target_id=starting_target_id, focus=True
74
- )
75
-
76
- # Check if a new tab was opened
77
- if hasattr(self.browser_session, "main_browser_session") and self.browser_session.main_browser_session:
78
- after_target_ids = await self.browser_session.main_browser_session._cdp_get_all_pages()
79
- else:
80
- after_target_ids = await self.browser_session._cdp_get_all_pages()
81
- new_target_ids = {t['targetId'] for t in after_target_ids} - {t['targetId'] for t in initial_target_ids}
82
- if new_target_ids:
83
- new_tab_msg = 'New tab opened - switching to it'
84
- msg += f' - {new_tab_msg}'
85
- self.logger.info(f'🔗 {new_tab_msg}')
86
- new_target_id = new_target_ids.pop()
87
- if not event.while_holding_ctrl:
88
- # if while_holding_ctrl=False it means agent was not expecting a new tab to be opened
89
- # so we need to switch to the new tab to make the agent aware of the surprise new tab that was opened.
90
- # when while_holding_ctrl=True we dont actually want to switch to it,
91
- # we should match human expectations of ctrl+click which opens in the background,
92
- # so in multi_act it usually already sends [click_element_by_index(123, while_holding_ctrl=True), switch_tab(tab_id=None)] anyway
93
- from browser_use.browser.events import SwitchTabEvent
94
-
95
- await self.browser_session.get_or_create_cdp_session(
96
- target_id=new_target_id, focus=True
97
- )
98
- else:
99
- await self.browser_session.get_or_create_cdp_session(
100
- target_id=new_target_id, focus=False
101
- )
102
-
103
- return None
104
- except Exception as e:
105
- raise
23
+ pass
@@ -27,7 +27,6 @@ class CustomDOMWatchdog(DOMWatchdog):
27
27
 
28
28
  async def get_browser_state_no_event_bus(self, include_dom: bool = True,
29
29
  include_screenshot: bool = True,
30
- cache_clickable_elements_hashes: bool = True,
31
30
  include_recent_events: bool = False) -> 'BrowserStateSummary':
32
31
  """Handle browser state request by coordinating DOM building and screenshot capture.
33
32
 
@@ -91,7 +90,7 @@ class CustomDOMWatchdog(DOMWatchdog):
91
90
  # Start clean screenshot task if requested (without JS highlights)
92
91
  if include_screenshot:
93
92
  self.logger.debug('🔍 DOMWatchdog.on_BrowserStateRequestEvent: 📸 Starting clean screenshot task...')
94
- screenshot_task = asyncio.create_task(self.browser_session.take_screenshot())
93
+ screenshot_task = asyncio.create_task(self.browser_session.take_screenshot_base64())
95
94
 
96
95
  # Wait for both tasks to complete
97
96
  content = None
@@ -121,13 +120,18 @@ class CustomDOMWatchdog(DOMWatchdog):
121
120
  try:
122
121
  self.logger.debug(
123
122
  '🔍 DOMWatchdog.on_BrowserStateRequestEvent: 🎨 Applying Python-based highlighting...')
124
- from vibe_surf.browser.utils import create_highlighted_screenshot_async
123
+ from browser_use.browser.python_highlights import create_highlighted_screenshot_async
125
124
 
126
125
  # Get CDP session for viewport info
127
126
  cdp_session = await self.browser_session.get_or_create_cdp_session()
128
127
 
129
- screenshot_b64 = await create_highlighted_screenshot_async(screenshot_b64, content.selector_map,
130
- cdp_session)
128
+ screenshot_b64 = await create_highlighted_screenshot_async(
129
+ screenshot_b64,
130
+ content.selector_map,
131
+ cdp_session,
132
+ self.browser_session.browser_profile.filter_highlight_ids,
133
+ )
134
+
131
135
  self.logger.debug(
132
136
  f'🔍 DOMWatchdog.on_BrowserStateRequestEvent: ✅ Applied highlights to {len(content.selector_map)} elements'
133
137
  )
@@ -234,4 +238,3 @@ class CustomDOMWatchdog(DOMWatchdog):
234
238
  is_pdf_viewer=False,
235
239
  recent_events=None,
236
240
  )
237
-
vibe_surf/cli.py CHANGED
@@ -455,6 +455,8 @@ def main():
455
455
  import vibe_surf
456
456
  console.print(f"[dim]Version: {vibe_surf.__version__}[/dim]\n")
457
457
  console.print(f"[dim]Author: WarmShao and Community Contributors [/dim]\n")
458
+ console.print("[dim]VibeSurf collects anonymous usage data by default to improve user experience.[/dim]")
459
+ console.print("[dim]To opt out, set environment variable: VIBESURF_ANONYMIZED_TELEMETRY=false[/dim]\n")
458
460
 
459
461
  # Capture telemetry start event
460
462
  start_event = CLITelemetryEvent(
@@ -209,14 +209,6 @@ class ChatOpenAICompatible(ChatOpenAI):
209
209
 
210
210
  return clean_schema(schema)
211
211
 
212
- @overload
213
- async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]:
214
- ...
215
-
216
- @overload
217
- async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]:
218
- ...
219
-
220
212
  async def ainvoke(
221
213
  self, messages: list[BaseMessage], output_format: type[T] | None = None
222
214
  ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
@@ -299,7 +291,8 @@ class ChatOpenAICompatible(ChatOpenAI):
299
291
 
300
292
  # Add JSON schema to system prompt if requested
301
293
  if self.add_schema_to_system_prompt and openai_messages and openai_messages[0]['role'] == 'system':
302
- schema_text = f'\n<json_schema>\n{response_format}\n</json_schema>'
294
+ schema_text = "Your response must return JSON with followed format:\n"
295
+ schema_text += f'\n<json_schema>\n{response_format}\n</json_schema>'
303
296
  if isinstance(openai_messages[0]['content'], str):
304
297
  openai_messages[0]['content'] += schema_text
305
298
  elif isinstance(openai_messages[0]['content'], Iterable):
@@ -154,3 +154,35 @@ class BackendTelemetryEvent(BaseTelemetryEvent):
154
154
  error_message: str | None = None
155
155
 
156
156
  name: str = 'backend_event'
157
+
158
+
159
+ @dataclass
160
+ class VibeSurfAgentParsedOutputEvent(BaseTelemetryEvent):
161
+ """Telemetry event for VibeSurf Agent parsed output"""
162
+
163
+ version: str
164
+ parsed_output: str | None = None
165
+ action_count: int | None = None
166
+ action_types: list[str] | None = None
167
+ model: str | None = None
168
+ model_provider: str | None = None
169
+ session_id: str | None = None
170
+ thinking: str | None = None
171
+
172
+ name: str = 'vibesurf_agent_parsed_output'
173
+
174
+
175
+ @dataclass
176
+ class VibeSurfAgentExceptionEvent(BaseTelemetryEvent):
177
+ """Telemetry event for VibeSurf Agent exceptions"""
178
+
179
+ version: str
180
+ error_message: str
181
+ error_type: str | None = None
182
+ traceback: str | None = None
183
+ model: str | None = None
184
+ model_provider: str | None = None
185
+ session_id: str | None = None
186
+ function_name: str | None = None
187
+
188
+ name: str = 'vibesurf_agent_exception'