vibesurf 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vibesurf might be problematic. Click here for more details.
- vibe_surf/__init__.py +12 -0
- vibe_surf/_version.py +34 -0
- vibe_surf/agents/__init__.py +0 -0
- vibe_surf/agents/browser_use_agent.py +1106 -0
- vibe_surf/agents/prompts/__init__.py +1 -0
- vibe_surf/agents/prompts/vibe_surf_prompt.py +176 -0
- vibe_surf/agents/report_writer_agent.py +360 -0
- vibe_surf/agents/vibe_surf_agent.py +1632 -0
- vibe_surf/backend/__init__.py +0 -0
- vibe_surf/backend/api/__init__.py +3 -0
- vibe_surf/backend/api/activity.py +243 -0
- vibe_surf/backend/api/config.py +740 -0
- vibe_surf/backend/api/files.py +322 -0
- vibe_surf/backend/api/models.py +257 -0
- vibe_surf/backend/api/task.py +300 -0
- vibe_surf/backend/database/__init__.py +13 -0
- vibe_surf/backend/database/manager.py +129 -0
- vibe_surf/backend/database/models.py +164 -0
- vibe_surf/backend/database/queries.py +922 -0
- vibe_surf/backend/database/schemas.py +100 -0
- vibe_surf/backend/llm_config.py +182 -0
- vibe_surf/backend/main.py +137 -0
- vibe_surf/backend/migrations/__init__.py +16 -0
- vibe_surf/backend/migrations/init_db.py +303 -0
- vibe_surf/backend/migrations/seed_data.py +236 -0
- vibe_surf/backend/shared_state.py +601 -0
- vibe_surf/backend/utils/__init__.py +7 -0
- vibe_surf/backend/utils/encryption.py +164 -0
- vibe_surf/backend/utils/llm_factory.py +225 -0
- vibe_surf/browser/__init__.py +8 -0
- vibe_surf/browser/agen_browser_profile.py +130 -0
- vibe_surf/browser/agent_browser_session.py +416 -0
- vibe_surf/browser/browser_manager.py +296 -0
- vibe_surf/browser/utils.py +790 -0
- vibe_surf/browser/watchdogs/__init__.py +0 -0
- vibe_surf/browser/watchdogs/action_watchdog.py +291 -0
- vibe_surf/browser/watchdogs/dom_watchdog.py +954 -0
- vibe_surf/chrome_extension/background.js +558 -0
- vibe_surf/chrome_extension/config.js +48 -0
- vibe_surf/chrome_extension/content.js +284 -0
- vibe_surf/chrome_extension/dev-reload.js +47 -0
- vibe_surf/chrome_extension/icons/convert-svg.js +33 -0
- vibe_surf/chrome_extension/icons/logo-preview.html +187 -0
- vibe_surf/chrome_extension/icons/logo.png +0 -0
- vibe_surf/chrome_extension/manifest.json +53 -0
- vibe_surf/chrome_extension/popup.html +134 -0
- vibe_surf/chrome_extension/scripts/api-client.js +473 -0
- vibe_surf/chrome_extension/scripts/main.js +491 -0
- vibe_surf/chrome_extension/scripts/markdown-it.min.js +3 -0
- vibe_surf/chrome_extension/scripts/session-manager.js +599 -0
- vibe_surf/chrome_extension/scripts/ui-manager.js +3687 -0
- vibe_surf/chrome_extension/sidepanel.html +347 -0
- vibe_surf/chrome_extension/styles/animations.css +471 -0
- vibe_surf/chrome_extension/styles/components.css +670 -0
- vibe_surf/chrome_extension/styles/main.css +2307 -0
- vibe_surf/chrome_extension/styles/settings.css +1100 -0
- vibe_surf/cli.py +357 -0
- vibe_surf/controller/__init__.py +0 -0
- vibe_surf/controller/file_system.py +53 -0
- vibe_surf/controller/mcp_client.py +68 -0
- vibe_surf/controller/vibesurf_controller.py +616 -0
- vibe_surf/controller/views.py +37 -0
- vibe_surf/llm/__init__.py +21 -0
- vibe_surf/llm/openai_compatible.py +237 -0
- vibesurf-0.1.0.dist-info/METADATA +97 -0
- vibesurf-0.1.0.dist-info/RECORD +70 -0
- vibesurf-0.1.0.dist-info/WHEEL +5 -0
- vibesurf-0.1.0.dist-info/entry_points.txt +2 -0
- vibesurf-0.1.0.dist-info/licenses/LICENSE +201 -0
- vibesurf-0.1.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from browser_use.browser.default_action_watchdog import DefaultActionWatchdog
|
|
4
|
+
from browser_use.browser.events import (
|
|
5
|
+
ClickElementEvent,
|
|
6
|
+
GetDropdownOptionsEvent,
|
|
7
|
+
GoBackEvent,
|
|
8
|
+
GoForwardEvent,
|
|
9
|
+
RefreshEvent,
|
|
10
|
+
ScrollEvent,
|
|
11
|
+
ScrollToTextEvent,
|
|
12
|
+
SelectDropdownOptionEvent,
|
|
13
|
+
SendKeysEvent,
|
|
14
|
+
TypeTextEvent,
|
|
15
|
+
UploadFileEvent,
|
|
16
|
+
WaitEvent,
|
|
17
|
+
)
|
|
18
|
+
from browser_use.browser.views import BrowserError, URLNotAllowedError
|
|
19
|
+
from browser_use.browser.watchdog_base import BaseWatchdog
|
|
20
|
+
from browser_use.dom.service import EnhancedDOMTreeNode
|
|
21
|
+
|
|
22
|
+
class CustomActionWatchdog(DefaultActionWatchdog):
|
|
23
|
+
async def on_ClickElementEvent(self, event: ClickElementEvent) -> None:
|
|
24
|
+
"""Handle click request with CDP."""
|
|
25
|
+
try:
|
|
26
|
+
# Check if session is alive before attempting any operations
|
|
27
|
+
if not self.browser_session.agent_focus or not self.browser_session.agent_focus.target_id:
|
|
28
|
+
error_msg = 'Cannot execute click: browser session is corrupted (target_id=None). Session may have crashed.'
|
|
29
|
+
self.logger.error(f'⚠️ {error_msg}')
|
|
30
|
+
raise BrowserError(error_msg)
|
|
31
|
+
|
|
32
|
+
# Use the provided node
|
|
33
|
+
element_node = event.node
|
|
34
|
+
index_for_logging = element_node.element_index or 'unknown'
|
|
35
|
+
starting_target_id = self.browser_session.agent_focus.target_id
|
|
36
|
+
|
|
37
|
+
# Track initial number of tabs to detect new tab opening
|
|
38
|
+
if hasattr(self.browser_session, "main_browser_session") and self.browser_session.main_browser_session:
|
|
39
|
+
initial_target_ids = await self.browser_session.main_browser_session._cdp_get_all_pages()
|
|
40
|
+
else:
|
|
41
|
+
initial_target_ids = await self.browser_session._cdp_get_all_pages()
|
|
42
|
+
|
|
43
|
+
# Check if element is a file input (should not be clicked)
|
|
44
|
+
if self.browser_session.is_file_input(element_node):
|
|
45
|
+
msg = f'Index {index_for_logging} - has an element which opens file upload dialog. To upload files please use a specific function to upload files'
|
|
46
|
+
self.logger.info(msg)
|
|
47
|
+
raise BrowserError(
|
|
48
|
+
'Click triggered a file input element which could not be handled, use the dedicated file upload function instead'
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Perform the actual click using internal implementation
|
|
52
|
+
await self._click_element_node_impl(element_node, while_holding_ctrl=event.while_holding_ctrl)
|
|
53
|
+
download_path = None # moved to downloads_watchdog.py
|
|
54
|
+
|
|
55
|
+
# Build success message
|
|
56
|
+
if download_path:
|
|
57
|
+
msg = f'Downloaded file to {download_path}'
|
|
58
|
+
self.logger.info(f'💾 {msg}')
|
|
59
|
+
else:
|
|
60
|
+
msg = f'Clicked button with index {index_for_logging}: {element_node.get_all_children_text(max_depth=2)}'
|
|
61
|
+
self.logger.debug(f'🖱️ {msg}')
|
|
62
|
+
self.logger.debug(f'Element xpath: {element_node.xpath}')
|
|
63
|
+
|
|
64
|
+
# Wait a bit for potential new tab to be created
|
|
65
|
+
# This is necessary because tab creation is async and might not be immediate
|
|
66
|
+
await asyncio.sleep(1)
|
|
67
|
+
|
|
68
|
+
# Clear cached state after click action since DOM might have changed
|
|
69
|
+
self.logger.debug('🔄 Click action completed, clearing cached browser state')
|
|
70
|
+
self.browser_session._cached_browser_state_summary = None
|
|
71
|
+
self.browser_session._cached_selector_map.clear()
|
|
72
|
+
if self.browser_session._dom_watchdog:
|
|
73
|
+
self.browser_session._dom_watchdog.clear_cache()
|
|
74
|
+
# Successfully clicked, always reset session back to parent page session context
|
|
75
|
+
self.browser_session.agent_focus = await self.browser_session.get_or_create_cdp_session(
|
|
76
|
+
target_id=starting_target_id, focus=True
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Check if a new tab was opened
|
|
80
|
+
if hasattr(self.browser_session, "main_browser_session") and self.browser_session.main_browser_session:
|
|
81
|
+
after_target_ids = await self.browser_session.main_browser_session._cdp_get_all_pages()
|
|
82
|
+
else:
|
|
83
|
+
after_target_ids = await self.browser_session._cdp_get_all_pages()
|
|
84
|
+
new_target_ids = {t['targetId'] for t in after_target_ids} - {t['targetId'] for t in initial_target_ids}
|
|
85
|
+
if new_target_ids:
|
|
86
|
+
new_tab_msg = 'New tab opened - switching to it'
|
|
87
|
+
msg += f' - {new_tab_msg}'
|
|
88
|
+
self.logger.info(f'🔗 {new_tab_msg}')
|
|
89
|
+
new_target_id = new_target_ids.pop()
|
|
90
|
+
if not event.while_holding_ctrl:
|
|
91
|
+
# if while_holding_ctrl=False it means agent was not expecting a new tab to be opened
|
|
92
|
+
# so we need to switch to the new tab to make the agent aware of the surprise new tab that was opened.
|
|
93
|
+
# when while_holding_ctrl=True we dont actually want to switch to it,
|
|
94
|
+
# we should match human expectations of ctrl+click which opens in the background,
|
|
95
|
+
# so in multi_act it usually already sends [click_element_by_index(123, while_holding_ctrl=True), switch_tab(tab_id=None)] anyway
|
|
96
|
+
from browser_use.browser.events import SwitchTabEvent
|
|
97
|
+
|
|
98
|
+
await self.browser_session.get_or_create_cdp_session(
|
|
99
|
+
target_id=new_target_id, focus=True
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
await self.browser_session.get_or_create_cdp_session(
|
|
103
|
+
target_id=new_target_id, focus=False
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return None
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise
|
|
109
|
+
|
|
110
|
+
async def _input_text_element_node_impl(self, element_node, text: str, clear_existing: bool = True) -> dict | None:
|
|
111
|
+
"""
|
|
112
|
+
Input text into an element using pure CDP with improved focus fallbacks.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Get CDP client
|
|
117
|
+
cdp_session = await self.browser_session.cdp_client_for_node(element_node)
|
|
118
|
+
|
|
119
|
+
# Get element info
|
|
120
|
+
backend_node_id = element_node.backend_node_id
|
|
121
|
+
|
|
122
|
+
# Track coordinates for metadata
|
|
123
|
+
input_coordinates = None
|
|
124
|
+
|
|
125
|
+
# Scroll element into view
|
|
126
|
+
try:
|
|
127
|
+
await cdp_session.cdp_client.send.DOM.scrollIntoViewIfNeeded(
|
|
128
|
+
params={'backendNodeId': backend_node_id}, session_id=cdp_session.session_id
|
|
129
|
+
)
|
|
130
|
+
await asyncio.sleep(0.1)
|
|
131
|
+
except Exception as e:
|
|
132
|
+
self.logger.warning(
|
|
133
|
+
f'⚠️ Failed to focus the page {cdp_session} and scroll element {element_node} into view before typing in text: {type(e).__name__}: {e}'
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Get object ID for the element
|
|
137
|
+
result = await cdp_session.cdp_client.send.DOM.resolveNode(
|
|
138
|
+
params={'backendNodeId': backend_node_id},
|
|
139
|
+
session_id=cdp_session.session_id,
|
|
140
|
+
)
|
|
141
|
+
assert 'object' in result and 'objectId' in result['object'], (
|
|
142
|
+
'Failed to find DOM element based on backendNodeId, maybe page content changed?'
|
|
143
|
+
)
|
|
144
|
+
object_id = result['object']['objectId']
|
|
145
|
+
|
|
146
|
+
# Check element focusability before attempting focus
|
|
147
|
+
element_info = await self._check_element_focusability(element_node, object_id, cdp_session.session_id)
|
|
148
|
+
self.logger.debug(f'Element focusability check: {element_info}')
|
|
149
|
+
|
|
150
|
+
# Extract coordinates from element bounds for metadata
|
|
151
|
+
bounds = element_info.get('bounds', {})
|
|
152
|
+
if bounds.get('width', 0) > 0 and bounds.get('height', 0) > 0:
|
|
153
|
+
center_x = bounds['x'] + bounds['width'] / 2
|
|
154
|
+
center_y = bounds['y'] + bounds['height'] / 2
|
|
155
|
+
input_coordinates = {"input_x": center_x, "input_y": center_y}
|
|
156
|
+
self.logger.debug(f'📍 Input coordinates: x={center_x:.1f}, y={center_y:.1f}')
|
|
157
|
+
|
|
158
|
+
# Provide helpful warnings for common issues
|
|
159
|
+
if not element_info.get('visible', False):
|
|
160
|
+
self.logger.warning('⚠️ Target element appears to be invisible or has zero dimensions')
|
|
161
|
+
if element_info.get('disabled', False):
|
|
162
|
+
self.logger.warning('⚠️ Target element appears to be disabled')
|
|
163
|
+
if not element_info.get('focusable', False):
|
|
164
|
+
self.logger.warning('⚠️ Target element may not be focusable by standard criteria')
|
|
165
|
+
|
|
166
|
+
# Clear existing text if requested
|
|
167
|
+
if clear_existing:
|
|
168
|
+
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
169
|
+
params={
|
|
170
|
+
'functionDeclaration': 'function() { if (this.value !== undefined) this.value = ""; if (this.textContent !== undefined) this.textContent = ""; }',
|
|
171
|
+
'objectId': object_id,
|
|
172
|
+
},
|
|
173
|
+
session_id=cdp_session.session_id,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Try multiple focus strategies
|
|
177
|
+
focused_successfully = False
|
|
178
|
+
|
|
179
|
+
# Strategy 1: Try CDP DOM.focus (original method)
|
|
180
|
+
try:
|
|
181
|
+
await cdp_session.cdp_client.send.DOM.focus(
|
|
182
|
+
params={'backendNodeId': backend_node_id},
|
|
183
|
+
session_id=cdp_session.session_id,
|
|
184
|
+
)
|
|
185
|
+
focused_successfully = True
|
|
186
|
+
self.logger.debug('✅ Element focused using CDP DOM.focus')
|
|
187
|
+
except Exception as e:
|
|
188
|
+
self.logger.debug(f'CDP DOM.focus failed: {e}')
|
|
189
|
+
|
|
190
|
+
# Strategy 2: Try JavaScript focus as fallback
|
|
191
|
+
try:
|
|
192
|
+
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
193
|
+
params={
|
|
194
|
+
'functionDeclaration': 'function() { this.focus(); }',
|
|
195
|
+
'objectId': object_id,
|
|
196
|
+
},
|
|
197
|
+
session_id=cdp_session.session_id,
|
|
198
|
+
)
|
|
199
|
+
focused_successfully = True
|
|
200
|
+
self.logger.debug('✅ Element focused using JavaScript focus()')
|
|
201
|
+
except Exception as js_e:
|
|
202
|
+
self.logger.debug(f'JavaScript focus failed: {js_e}')
|
|
203
|
+
|
|
204
|
+
# Strategy 3: Try click-to-focus for stubborn elements
|
|
205
|
+
try:
|
|
206
|
+
await cdp_session.cdp_client.send.Runtime.callFunctionOn(
|
|
207
|
+
params={
|
|
208
|
+
'functionDeclaration': 'function() { this.click(); this.focus(); }',
|
|
209
|
+
'objectId': object_id,
|
|
210
|
+
},
|
|
211
|
+
session_id=cdp_session.session_id,
|
|
212
|
+
)
|
|
213
|
+
focused_successfully = True
|
|
214
|
+
self.logger.debug('✅ Element focused using click + focus combination')
|
|
215
|
+
except Exception as click_e:
|
|
216
|
+
self.logger.debug(f'Click + focus failed: {click_e}')
|
|
217
|
+
|
|
218
|
+
# Strategy 4: Try simulated mouse click for maximum compatibility
|
|
219
|
+
try:
|
|
220
|
+
# Use coordinates already calculated from element bounds
|
|
221
|
+
if input_coordinates and 'input_x' in input_coordinates and 'input_y' in input_coordinates:
|
|
222
|
+
click_x = input_coordinates['input_x']
|
|
223
|
+
click_y = input_coordinates['input_y']
|
|
224
|
+
|
|
225
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
226
|
+
params={
|
|
227
|
+
'type': 'mousePressed',
|
|
228
|
+
'x': click_x,
|
|
229
|
+
'y': click_y,
|
|
230
|
+
'button': 'left',
|
|
231
|
+
'clickCount': 1,
|
|
232
|
+
},
|
|
233
|
+
session_id=cdp_session.session_id,
|
|
234
|
+
)
|
|
235
|
+
await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
|
|
236
|
+
params={
|
|
237
|
+
'type': 'mouseReleased',
|
|
238
|
+
'x': click_x,
|
|
239
|
+
'y': click_y,
|
|
240
|
+
'button': 'left',
|
|
241
|
+
'clickCount': 1,
|
|
242
|
+
},
|
|
243
|
+
session_id=cdp_session.session_id,
|
|
244
|
+
)
|
|
245
|
+
focused_successfully = True
|
|
246
|
+
self.logger.debug('✅ Element focused using simulated mouse click')
|
|
247
|
+
else:
|
|
248
|
+
self.logger.debug('Element bounds not available for mouse click')
|
|
249
|
+
except Exception as mouse_e:
|
|
250
|
+
self.logger.debug(f'Simulated mouse click failed: {mouse_e}')
|
|
251
|
+
|
|
252
|
+
# Log focus result
|
|
253
|
+
if not focused_successfully:
|
|
254
|
+
self.logger.warning('⚠️ All focus strategies failed, typing without explicit focus')
|
|
255
|
+
|
|
256
|
+
# Type the text character by character
|
|
257
|
+
for char in text:
|
|
258
|
+
# Send keydown (without text to avoid duplication)
|
|
259
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
260
|
+
params={
|
|
261
|
+
'type': 'keyDown',
|
|
262
|
+
'key': char,
|
|
263
|
+
},
|
|
264
|
+
session_id=cdp_session.session_id,
|
|
265
|
+
)
|
|
266
|
+
# Send char (for actual text input)
|
|
267
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
268
|
+
params={
|
|
269
|
+
'type': 'char',
|
|
270
|
+
'text': char,
|
|
271
|
+
'key': char,
|
|
272
|
+
},
|
|
273
|
+
session_id=cdp_session.session_id,
|
|
274
|
+
)
|
|
275
|
+
# Send keyup (without text to avoid duplication)
|
|
276
|
+
await cdp_session.cdp_client.send.Input.dispatchKeyEvent(
|
|
277
|
+
params={
|
|
278
|
+
'type': 'keyUp',
|
|
279
|
+
'key': char,
|
|
280
|
+
},
|
|
281
|
+
session_id=cdp_session.session_id,
|
|
282
|
+
)
|
|
283
|
+
# Small delay between characters
|
|
284
|
+
await asyncio.sleep(0.01)
|
|
285
|
+
|
|
286
|
+
# Return coordinates metadata if available
|
|
287
|
+
return input_coordinates
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
self.logger.error(f'Failed to input text via CDP: {type(e).__name__}: {e}')
|
|
291
|
+
raise BrowserError(f'Failed to input text into element: {repr(element_node)}')
|