vibesurf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (70) hide show
  1. vibe_surf/__init__.py +12 -0
  2. vibe_surf/_version.py +34 -0
  3. vibe_surf/agents/__init__.py +0 -0
  4. vibe_surf/agents/browser_use_agent.py +1106 -0
  5. vibe_surf/agents/prompts/__init__.py +1 -0
  6. vibe_surf/agents/prompts/vibe_surf_prompt.py +176 -0
  7. vibe_surf/agents/report_writer_agent.py +360 -0
  8. vibe_surf/agents/vibe_surf_agent.py +1632 -0
  9. vibe_surf/backend/__init__.py +0 -0
  10. vibe_surf/backend/api/__init__.py +3 -0
  11. vibe_surf/backend/api/activity.py +243 -0
  12. vibe_surf/backend/api/config.py +740 -0
  13. vibe_surf/backend/api/files.py +322 -0
  14. vibe_surf/backend/api/models.py +257 -0
  15. vibe_surf/backend/api/task.py +300 -0
  16. vibe_surf/backend/database/__init__.py +13 -0
  17. vibe_surf/backend/database/manager.py +129 -0
  18. vibe_surf/backend/database/models.py +164 -0
  19. vibe_surf/backend/database/queries.py +922 -0
  20. vibe_surf/backend/database/schemas.py +100 -0
  21. vibe_surf/backend/llm_config.py +182 -0
  22. vibe_surf/backend/main.py +137 -0
  23. vibe_surf/backend/migrations/__init__.py +16 -0
  24. vibe_surf/backend/migrations/init_db.py +303 -0
  25. vibe_surf/backend/migrations/seed_data.py +236 -0
  26. vibe_surf/backend/shared_state.py +601 -0
  27. vibe_surf/backend/utils/__init__.py +7 -0
  28. vibe_surf/backend/utils/encryption.py +164 -0
  29. vibe_surf/backend/utils/llm_factory.py +225 -0
  30. vibe_surf/browser/__init__.py +8 -0
  31. vibe_surf/browser/agen_browser_profile.py +130 -0
  32. vibe_surf/browser/agent_browser_session.py +416 -0
  33. vibe_surf/browser/browser_manager.py +296 -0
  34. vibe_surf/browser/utils.py +790 -0
  35. vibe_surf/browser/watchdogs/__init__.py +0 -0
  36. vibe_surf/browser/watchdogs/action_watchdog.py +291 -0
  37. vibe_surf/browser/watchdogs/dom_watchdog.py +954 -0
  38. vibe_surf/chrome_extension/background.js +558 -0
  39. vibe_surf/chrome_extension/config.js +48 -0
  40. vibe_surf/chrome_extension/content.js +284 -0
  41. vibe_surf/chrome_extension/dev-reload.js +47 -0
  42. vibe_surf/chrome_extension/icons/convert-svg.js +33 -0
  43. vibe_surf/chrome_extension/icons/logo-preview.html +187 -0
  44. vibe_surf/chrome_extension/icons/logo.png +0 -0
  45. vibe_surf/chrome_extension/manifest.json +53 -0
  46. vibe_surf/chrome_extension/popup.html +134 -0
  47. vibe_surf/chrome_extension/scripts/api-client.js +473 -0
  48. vibe_surf/chrome_extension/scripts/main.js +491 -0
  49. vibe_surf/chrome_extension/scripts/markdown-it.min.js +3 -0
  50. vibe_surf/chrome_extension/scripts/session-manager.js +599 -0
  51. vibe_surf/chrome_extension/scripts/ui-manager.js +3687 -0
  52. vibe_surf/chrome_extension/sidepanel.html +347 -0
  53. vibe_surf/chrome_extension/styles/animations.css +471 -0
  54. vibe_surf/chrome_extension/styles/components.css +670 -0
  55. vibe_surf/chrome_extension/styles/main.css +2307 -0
  56. vibe_surf/chrome_extension/styles/settings.css +1100 -0
  57. vibe_surf/cli.py +357 -0
  58. vibe_surf/controller/__init__.py +0 -0
  59. vibe_surf/controller/file_system.py +53 -0
  60. vibe_surf/controller/mcp_client.py +68 -0
  61. vibe_surf/controller/vibesurf_controller.py +616 -0
  62. vibe_surf/controller/views.py +37 -0
  63. vibe_surf/llm/__init__.py +21 -0
  64. vibe_surf/llm/openai_compatible.py +237 -0
  65. vibesurf-0.1.0.dist-info/METADATA +97 -0
  66. vibesurf-0.1.0.dist-info/RECORD +70 -0
  67. vibesurf-0.1.0.dist-info/WHEEL +5 -0
  68. vibesurf-0.1.0.dist-info/entry_points.txt +2 -0
  69. vibesurf-0.1.0.dist-info/licenses/LICENSE +201 -0
  70. vibesurf-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,616 @@
1
+ import pdb
2
+ import os
3
+ import asyncio
4
+ import json
5
+ import enum
6
+ import base64
7
+ import mimetypes
8
+
9
+ from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
10
+ from pydantic import BaseModel
11
+ from browser_use.controller.service import Controller
12
+ import logging
13
+ from browser_use.agent.views import ActionModel, ActionResult
14
+ from browser_use.utils import time_execution_sync
15
+ from browser_use.filesystem.file_system import FileSystem
16
+ from browser_use.browser import BrowserSession
17
+ from browser_use.browser.events import UploadFileEvent
18
+ from browser_use.observability import observe_debug
19
+ from browser_use.controller.views import (
20
+ ClickElementAction,
21
+ CloseTabAction,
22
+ DoneAction,
23
+ GetDropdownOptionsAction,
24
+ GoToUrlAction,
25
+ InputTextAction,
26
+ NoParamsAction,
27
+ ScrollAction,
28
+ SearchGoogleAction,
29
+ SelectDropdownOptionAction,
30
+ SendKeysAction,
31
+ StructuredOutputAction,
32
+ SwitchTabAction,
33
+ UploadFileAction,
34
+ )
35
+ from browser_use.llm.base import BaseChatModel
36
+ from browser_use.llm.messages import UserMessage, ContentPartTextParam, ContentPartImageParam, ImageURL
37
+ from browser_use.dom.service import EnhancedDOMTreeNode
38
+ from browser_use.browser.views import BrowserError
39
+ from browser_use.mcp.client import MCPClient
40
+
41
+
42
+ from vibe_surf.browser.agent_browser_session import AgentBrowserSession
43
+ from vibe_surf.controller.views import HoverAction, ExtractionAction, FileExtractionAction
44
+ from vibe_surf.controller.mcp_client import VibeSurfMCPClient
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ Context = TypeVar('Context')
49
+
50
+ T = TypeVar('T', bound=BaseModel)
51
+
52
+
53
+ class VibeSurfController(Controller):
54
+ def __init__(self,
55
+ exclude_actions: list[str] = [],
56
+ output_model: type[T] | None = None,
57
+ display_files_in_done_text: bool = True,
58
+ mcp_server_config: Optional[Dict[str, Any]] = None
59
+ ):
60
+ super().__init__(exclude_actions=exclude_actions, output_model=output_model,
61
+ display_files_in_done_text=display_files_in_done_text)
62
+ self._register_browser_actions()
63
+ self.mcp_server_config = mcp_server_config
64
+ self.mcp_clients = {}
65
+
66
+ def _register_browser_actions(self):
67
+ """Register custom browser actions"""
68
+
69
+ @self.registry.action(
70
+ 'Hover over an element',
71
+ param_model=HoverAction,
72
+ )
73
+ async def hover_element(params: HoverAction, browser_session: AgentBrowserSession):
74
+ """Hovers over the element specified by its index from the cached selector map or by XPath."""
75
+ try:
76
+ if params.xpath:
77
+ # Find element by XPath using CDP
78
+ cdp_session = await browser_session.get_or_create_cdp_session()
79
+ result = await cdp_session.cdp_client.send.Runtime.evaluate(
80
+ params={
81
+ 'expression': f"""
82
+ (() => {{
83
+ const element = document.evaluate('{params.xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
84
+ if (element) {{
85
+ const rect = element.getBoundingClientRect();
86
+ return {{found: true, x: rect.x + rect.width/2, y: rect.y + rect.height/2}};
87
+ }}
88
+ return {{found: false}};
89
+ }})()
90
+ """,
91
+ 'returnByValue': True,
92
+ },
93
+ session_id=cdp_session.session_id,
94
+ )
95
+ element_info = result.get('result', {}).get('value', {})
96
+ if not element_info.get('found'):
97
+ raise Exception(f'Failed to locate element with XPath {params.xpath}')
98
+ x, y = element_info['x'], element_info['y']
99
+
100
+ elif params.selector:
101
+ # Find element by CSS selector using CDP
102
+ cdp_session = await browser_session.get_or_create_cdp_session()
103
+ result = await cdp_session.cdp_client.send.Runtime.evaluate(
104
+ params={
105
+ 'expression': f"""
106
+ (() => {{
107
+ const element = document.querySelector('{params.selector}');
108
+ if (element) {{
109
+ const rect = element.getBoundingClientRect();
110
+ return {{found: true, x: rect.x + rect.width/2, y: rect.y + rect.height/2}};
111
+ }}
112
+ return {{found: false}};
113
+ }})()
114
+ """,
115
+ 'returnByValue': True,
116
+ },
117
+ session_id=cdp_session.session_id,
118
+ )
119
+ element_info = result.get('result', {}).get('value', {})
120
+ if not element_info.get('found'):
121
+ raise Exception(f'Failed to locate element with CSS Selector {params.selector}')
122
+ x, y = element_info['x'], element_info['y']
123
+
124
+ elif params.index is not None:
125
+ # Use index to locate the element
126
+ selector_map = await browser_session.get_selector_map()
127
+ if params.index not in selector_map:
128
+ raise Exception(
129
+ f'Element index {params.index} does not exist - retry or use alternative actions')
130
+ element_node = selector_map[params.index]
131
+
132
+ # Get element position
133
+ if not element_node.absolute_position:
134
+ raise Exception(f'Element at index {params.index} has no position information')
135
+
136
+ x = element_node.absolute_position.x + element_node.absolute_position.width / 2
137
+ y = element_node.absolute_position.y + element_node.absolute_position.height / 2
138
+
139
+ else:
140
+ raise Exception('Either index, xpath, or selector must be provided')
141
+
142
+ # Perform hover using CDP mouse events
143
+ cdp_session = await browser_session.get_or_create_cdp_session()
144
+
145
+ # Move mouse to the element position
146
+ await cdp_session.cdp_client.send.Input.dispatchMouseEvent(
147
+ params={
148
+ 'type': 'mouseMoved',
149
+ 'x': x,
150
+ 'y': y,
151
+ },
152
+ session_id=cdp_session.session_id,
153
+ )
154
+
155
+ # Wait a bit for hover state to trigger
156
+ await asyncio.sleep(0.1)
157
+
158
+ msg = (
159
+ f'🖱️ Hovered over element at index {params.index}'
160
+ if params.index is not None
161
+ else f'🖱️ Hovered over element with XPath {params.xpath}'
162
+ if params.xpath
163
+ else f'🖱️ Hovered over element with selector {params.selector}'
164
+ )
165
+ return ActionResult(extracted_content=msg, include_in_memory=True)
166
+
167
+ except Exception as e:
168
+ error_msg = f'❌ Failed to hover over element: {str(e)}'
169
+ return ActionResult(error=error_msg)
170
+
171
+ # =======================
172
+ # NAVIGATION ACTIONS
173
+ # =======================
174
+
175
+ @self.registry.action(
176
+ 'Search the query in Google, the query should be a search query like humans search in Google, concrete and not vague or super long.',
177
+ param_model=SearchGoogleAction,
178
+ )
179
+ async def search_google(params: SearchGoogleAction, browser_session: AgentBrowserSession):
180
+ search_url = f'https://www.google.com/search?q={params.query}&udm=14'
181
+
182
+ try:
183
+ # Use AgentBrowserSession's direct navigation method
184
+ await browser_session.navigate_to_url(search_url, new_tab=False)
185
+ memory = f"Searched Google for '{params.query}'"
186
+ msg = f'🔍 {memory}'
187
+ logger.info(msg)
188
+ return ActionResult(extracted_content=memory, include_in_memory=True, long_term_memory=memory)
189
+ except Exception as e:
190
+ logger.error(f'Failed to search Google: {e}')
191
+ return ActionResult(error=f'Failed to search Google for "{params.query}": {str(e)}')
192
+
193
+ @self.registry.action(
194
+ 'Navigate to URL, set new_tab=True to open in new tab, False to navigate in current tab',
195
+ param_model=GoToUrlAction
196
+ )
197
+ async def go_to_url(params: GoToUrlAction, browser_session: AgentBrowserSession):
198
+ try:
199
+ # Use AgentBrowserSession's direct navigation method
200
+ await browser_session.navigate_to_url(params.url, new_tab=params.new_tab)
201
+
202
+ if params.new_tab:
203
+ memory = f'Opened new tab with URL {params.url}'
204
+ msg = f'🔗 Opened new tab with url {params.url}'
205
+ else:
206
+ memory = f'Navigated to {params.url}'
207
+ msg = f'🔗 {memory}'
208
+
209
+ logger.info(msg)
210
+ return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory=memory)
211
+ except Exception as e:
212
+ logger.error(f'❌ Navigation failed: {str(e)}')
213
+ return ActionResult(error=f'Navigation failed: {str(e)}')
214
+
215
+ @self.registry.action(
216
+ 'Go back',
217
+ )
218
+ async def go_back(browser_session: AgentBrowserSession):
219
+ try:
220
+ cdp_session = await browser_session.get_or_create_cdp_session()
221
+ history = await cdp_session.cdp_client.send.Page.getNavigationHistory(session_id=cdp_session.session_id)
222
+ current_index = history['currentIndex']
223
+ entries = history['entries']
224
+
225
+ # Check if we can go back
226
+ if current_index <= 0:
227
+ memory = msg = '⚠️ Cannot go back - no previous entry in history'
228
+ logger.info(msg)
229
+ return ActionResult(extracted_content=memory)
230
+
231
+ # Navigate to the previous entry
232
+ previous_entry_id = entries[current_index - 1]['id']
233
+ await cdp_session.cdp_client.send.Page.navigateToHistoryEntry(
234
+ params={'entryId': previous_entry_id}, session_id=cdp_session.session_id
235
+ )
236
+
237
+ # Wait for navigation
238
+ await asyncio.sleep(0.5)
239
+ memory = 'Navigated back'
240
+ msg = f'🔙 {memory}'
241
+ logger.info(msg)
242
+ return ActionResult(extracted_content=memory)
243
+ except Exception as e:
244
+ logger.error(f'Failed to go back: {str(e)}')
245
+ return ActionResult(error=f'Failed to go back: {str(e)}')
246
+
247
+ @self.registry.action(
248
+ 'Switch tab',
249
+ param_model=SwitchTabAction
250
+ )
251
+ async def switch_tab(params: SwitchTabAction, browser_session: AgentBrowserSession):
252
+ try:
253
+
254
+ if params.tab_id:
255
+ target_id = await browser_session.get_target_id_from_tab_id(params.tab_id)
256
+ elif params.url:
257
+ target_id = await browser_session.get_target_id_from_url(params.url)
258
+ else:
259
+ target_id = await browser_session.get_most_recently_opened_target_id()
260
+
261
+ # Switch to target using CDP
262
+ await browser_session.get_or_create_cdp_session(target_id, focus=True)
263
+
264
+ memory = f'Switched to Tab with ID {target_id[-4:]}'
265
+ logger.info(f'🔄 {memory}')
266
+ return ActionResult(extracted_content=memory, include_in_memory=True, long_term_memory=memory)
267
+ except Exception as e:
268
+ logger.error(f'Failed to switch tab: {str(e)}')
269
+ return ActionResult(error=f'Failed to switch to tab {params.tab_id or params.url}: {str(e)}')
270
+
271
+ @self.registry.action(
272
+ """Extract structured, semantic data (e.g. product description, price, all information about XYZ) from the current webpage based on a textual query.
273
+ This tool takes the entire markdown of the page and extracts the query from it.
274
+ Set extract_links=True ONLY if your query requires extracting links/URLs from the page.
275
+ Only use this for specific queries for information retrieval from the page. Don't use this to get interactive elements - the tool does not see HTML elements, only the markdown.
276
+ Note: Extracting from the same page will yield the same results unless more content is loaded (e.g., through scrolling for dynamic content, or new page is loaded) - so one extraction per page state is sufficient. If you want to scrape a listing of many elements always first scroll a lot until the page end to load everything and then call this tool in the end.
277
+ If you called extract_structured_data in the last step and the result was not good (e.g. because of antispam protection), use the current browser state and scrolling to get the information, dont call extract_structured_data again.
278
+ """,
279
+ param_model=ExtractionAction
280
+ )
281
+ async def extract_structured_data(
282
+ params: ExtractionAction,
283
+ browser_session: AgentBrowserSession,
284
+ page_extraction_llm: BaseChatModel,
285
+ file_system: FileSystem,
286
+ ):
287
+ try:
288
+ # Use AgentBrowserSession's direct method to get HTML content
289
+ target_id = None
290
+ if params.tab_id:
291
+ target_id = await browser_session.get_target_id_from_tab_id(params.tab_id)
292
+ page_html = await browser_session.get_html_content(target_id)
293
+
294
+ # Simple markdown conversion
295
+ import re
296
+ import markdownify
297
+
298
+ if params.extract_links:
299
+ content = markdownify.markdownify(page_html, heading_style='ATX', bullets='-')
300
+ else:
301
+ content = markdownify.markdownify(page_html, heading_style='ATX', bullets='-', strip=['a'])
302
+ # Remove all markdown links and images, keep only the text
303
+ content = re.sub(r'!\[.*?\]\([^)]*\)', '', content, flags=re.MULTILINE | re.DOTALL) # Remove images
304
+ content = re.sub(
305
+ r'\[([^\]]*)\]\([^)]*\)', r'\1', content, flags=re.MULTILINE | re.DOTALL
306
+ ) # Convert [text](url) -> text
307
+
308
+ # Remove weird positioning artifacts
309
+ content = re.sub(r'❓\s*\[\d+\]\s*\w+.*?Position:.*?Size:.*?\n?', '', content,
310
+ flags=re.MULTILINE | re.DOTALL)
311
+ content = re.sub(r'Primary: UNKNOWN\n\nNo specific evidence found', '', content,
312
+ flags=re.MULTILINE | re.DOTALL)
313
+ content = re.sub(r'UNKNOWN CONFIDENCE', '', content, flags=re.MULTILINE | re.DOTALL)
314
+ content = re.sub(r'!\[\]\(\)', '', content, flags=re.MULTILINE | re.DOTALL)
315
+
316
+ # Simple truncation to 30k characters
317
+ if len(content) > 30000:
318
+ content = content[:30000] + '\n\n... [Content truncated at 30k characters] ...'
319
+
320
+ # Simple prompt
321
+ prompt = f"""Extract the requested information from this webpage content.
322
+
323
+ Query: {params.query}
324
+
325
+ Webpage Content:
326
+ {content}
327
+
328
+ Provide the extracted information in a clear, structured format."""
329
+
330
+ from browser_use.llm.messages import UserMessage
331
+
332
+ response = await asyncio.wait_for(
333
+ page_extraction_llm.ainvoke([UserMessage(content=prompt)]),
334
+ timeout=120.0,
335
+ )
336
+
337
+ extracted_content = f'Query: {params.query}\nExtracted Content:\n{response.completion}'
338
+
339
+ # Simple memory handling
340
+ if len(extracted_content) < 1000:
341
+ memory = extracted_content
342
+ include_extracted_content_only_once = False
343
+ else:
344
+ save_result = await file_system.save_extracted_content(extracted_content)
345
+ current_url = await browser_session.get_current_page_url()
346
+ memory = (
347
+ f'Extracted content from {current_url} for query: {params.query}\nContent saved to file system: {save_result}'
348
+ )
349
+ include_extracted_content_only_once = True
350
+
351
+ logger.info(f'📄 {memory}')
352
+ return ActionResult(
353
+ extracted_content=extracted_content,
354
+ include_extracted_content_only_once=include_extracted_content_only_once,
355
+ long_term_memory=memory,
356
+ )
357
+ except Exception as e:
358
+ logger.debug(f'Error extracting content: {e}')
359
+ raise RuntimeError(str(e))
360
+
361
+ @self.registry.action('Read file_name from file system. If this is a file not in Current workspace dir or with a absolute path, Set external_file=True.')
362
+ async def read_file(file_name: str, external_file: bool, file_system: FileSystem):
363
+ result = await file_system.read_file(file_name, external_file=external_file)
364
+
365
+ MAX_MEMORY_SIZE = 1000
366
+ if len(result) > MAX_MEMORY_SIZE:
367
+ lines = result.splitlines()
368
+ display = ''
369
+ lines_count = 0
370
+ for line in lines:
371
+ if len(display) + len(line) < MAX_MEMORY_SIZE:
372
+ display += line + '\n'
373
+ lines_count += 1
374
+ else:
375
+ break
376
+ remaining_lines = len(lines) - lines_count
377
+ memory = f'{display}{remaining_lines} more lines...' if remaining_lines > 0 else display
378
+ else:
379
+ memory = result
380
+ logger.info(f'💾 {memory}')
381
+ return ActionResult(
382
+ extracted_content=result,
383
+ include_in_memory=True,
384
+ long_term_memory=memory,
385
+ include_extracted_content_only_once=True,
386
+ )
387
+
388
+ @self.registry.action(
389
+ 'Extract content from a file. Support image files, pdf and more.',
390
+ param_model=FileExtractionAction,
391
+ )
392
+ async def extract_content_from_file(
393
+ params: FileExtractionAction,
394
+ page_extraction_llm: BaseChatModel,
395
+ file_system: FileSystem,
396
+ ):
397
+ try:
398
+ # Get file path
399
+ file_path = params.file_path
400
+
401
+ # Check if file exists
402
+ if not os.path.exists(file_path):
403
+ raise Exception(f'File not found: {file_path}')
404
+
405
+ # Determine if file is an image based on MIME type
406
+ mime_type, _ = mimetypes.guess_type(file_path)
407
+ is_image = mime_type and mime_type.startswith('image/')
408
+
409
+ if is_image:
410
+ # Handle image files with LLM vision
411
+ try:
412
+ # Read image file and encode to base64
413
+ with open(file_path, 'rb') as image_file:
414
+ image_data = image_file.read()
415
+ image_base64 = base64.b64encode(image_data).decode('utf-8')
416
+
417
+ # Create content parts similar to the user's example
418
+ content_parts: list[ContentPartTextParam | ContentPartImageParam] = [
419
+ ContentPartTextParam(text=f"Query: {params.query}")
420
+ ]
421
+
422
+ # Add the image
423
+ content_parts.append(
424
+ ContentPartImageParam(
425
+ image_url=ImageURL(
426
+ url=f'data:{mime_type};base64,{image_base64}',
427
+ media_type=mime_type,
428
+ detail='high',
429
+ ),
430
+ )
431
+ )
432
+
433
+ # Create user message and invoke LLM
434
+ user_message = UserMessage(content=content_parts, cache=True)
435
+ response = await asyncio.wait_for(
436
+ page_extraction_llm.ainvoke([user_message]),
437
+ timeout=120.0,
438
+ )
439
+
440
+ extracted_content = f'File: {file_path}\nQuery: {params.query}\nExtracted Content:\n{response.completion}'
441
+
442
+ except Exception as e:
443
+ raise Exception(f'Failed to process image file {file_path}: {str(e)}')
444
+
445
+ else:
446
+ # Handle non-image files by reading content
447
+ try:
448
+ file_content = await file_system.read_file(file_path, external_file=True)
449
+
450
+ # Create a simple prompt for text extraction
451
+ prompt = f"""Extract the requested information from this file content.
452
+
453
+ Query: {params.query}
454
+
455
+ File: {file_path}
456
+ File Content:
457
+ {file_content}
458
+
459
+ Provide the extracted information in a clear, structured format."""
460
+
461
+ response = await asyncio.wait_for(
462
+ page_extraction_llm.ainvoke([UserMessage(content=prompt)]),
463
+ timeout=120.0,
464
+ )
465
+
466
+ extracted_content = f'File: {file_path}\nQuery: {params.query}\nExtracted Content:\n{response.completion}'
467
+
468
+ except Exception as e:
469
+ raise Exception(f'Failed to read file {file_path}: {str(e)}')
470
+
471
+ # Handle memory storage
472
+ if len(extracted_content) < 1000:
473
+ memory = extracted_content
474
+ include_extracted_content_only_once = False
475
+ else:
476
+ save_result = await file_system.save_extracted_content(extracted_content)
477
+ memory = (
478
+ f'Extracted content from file {file_path} for query: {params.query}\nContent saved to file system: {save_result}'
479
+ )
480
+ include_extracted_content_only_once = True
481
+
482
+ logger.info(f'📄 Extracted content from file: {file_path}')
483
+ return ActionResult(
484
+ extracted_content=extracted_content,
485
+ include_extracted_content_only_once=include_extracted_content_only_once,
486
+ long_term_memory=memory,
487
+ )
488
+
489
+ except Exception as e:
490
+ logger.debug(f'Error extracting content from file: {e}')
491
+ raise RuntimeError(str(e))
492
+
493
+ async def register_mcp_clients(self, mcp_server_config: Optional[Dict[str, Any]] = None):
494
+ self.mcp_server_config = mcp_server_config or self.mcp_server_config
495
+ if self.mcp_server_config:
496
+ await self.unregister_mcp_clients()
497
+ await self.register_mcp_tools()
498
+
499
+ async def register_mcp_tools(self):
500
+ """
501
+ Register the MCP tools used by this controller.
502
+ """
503
+ if not self.mcp_server_config:
504
+ return
505
+
506
+ # Handle both formats: with or without "mcpServers" key
507
+ mcp_servers = self.mcp_server_config.get('mcpServers', self.mcp_server_config)
508
+
509
+ if not mcp_servers:
510
+ return
511
+
512
+ for server_name, server_config in mcp_servers.items():
513
+ try:
514
+ logger.info(f'Connecting to MCP server: {server_name}')
515
+
516
+ # Create MCP client
517
+ client = VibeSurfMCPClient(
518
+ server_name=server_name,
519
+ command=server_config['command'],
520
+ args=server_config['args'],
521
+ env=server_config.get('env', None)
522
+ )
523
+
524
+ # Connect to the MCP server
525
+ await client.connect(timeout=200)
526
+
527
+ # Register tools to controller with prefix
528
+ prefix = f"mcp.{server_name}."
529
+ await client.register_to_controller(
530
+ controller=self,
531
+ prefix=prefix
532
+ )
533
+
534
+ # Store client for later cleanup
535
+ self.mcp_clients[server_name] = client
536
+
537
+ logger.info(f'Successfully registered MCP server: {server_name} with prefix: {prefix}')
538
+
539
+ except Exception as e:
540
+ logger.error(f'Failed to register MCP server {server_name}: {str(e)}')
541
+ # Continue with other servers even if one fails
542
+
543
+ async def unregister_mcp_clients(self):
544
+ """
545
+ Unregister and disconnect all MCP clients.
546
+ """
547
+ # Disconnect all MCP clients
548
+ for server_name, client in self.mcp_clients.items():
549
+ try:
550
+ logger.info(f'Disconnecting MCP server: {server_name}')
551
+ await client.disconnect()
552
+ except Exception as e:
553
+ logger.error(f'Failed to disconnect MCP server {server_name}: {str(e)}')
554
+
555
+ # Remove MCP tools from registry
556
+ try:
557
+ # Get all registered actions
558
+ actions_to_remove = []
559
+ for action_name in list(self.registry.registry.actions.keys()):
560
+ if action_name.startswith('mcp.'):
561
+ actions_to_remove.append(action_name)
562
+
563
+ # Remove MCP actions from registry
564
+ for action_name in actions_to_remove:
565
+ if action_name in self.registry.registry.actions:
566
+ del self.registry.registry.actions[action_name]
567
+ logger.info(f'Removed MCP action: {action_name}')
568
+
569
+ except Exception as e:
570
+ logger.error(f'Failed to remove MCP actions from registry: {str(e)}')
571
+
572
+ # Clear the clients dictionary
573
+ self.mcp_clients.clear()
574
+ logger.info('All MCP clients unregistered and disconnected')
575
+
576
+ @observe_debug(ignore_input=True, ignore_output=True, name='act')
577
+ @time_execution_sync('--act')
578
+ async def act(
579
+ self,
580
+ action: ActionModel,
581
+ browser_session: BrowserSession| None = None,
582
+ #
583
+ page_extraction_llm: BaseChatModel | None = None,
584
+ sensitive_data: dict[str, str | dict[str, str]] | None = None,
585
+ available_file_paths: list[str] | None = None,
586
+ file_system: FileSystem | None = None,
587
+ #
588
+ context: Context | None = None,
589
+ ) -> ActionResult:
590
+ """Execute an action"""
591
+
592
+ for action_name, params in action.model_dump(exclude_unset=True).items():
593
+ if params is not None:
594
+ try:
595
+ result = await self.registry.execute_action(
596
+ action_name=action_name,
597
+ params=params,
598
+ browser_session=browser_session,
599
+ page_extraction_llm=page_extraction_llm,
600
+ file_system=file_system,
601
+ sensitive_data=sensitive_data,
602
+ available_file_paths=available_file_paths,
603
+ context=context,
604
+ )
605
+ except Exception as e:
606
+ result = ActionResult(error=str(e))
607
+
608
+ if isinstance(result, str):
609
+ return ActionResult(extracted_content=result)
610
+ elif isinstance(result, ActionResult):
611
+ return result
612
+ elif result is None:
613
+ return ActionResult()
614
+ else:
615
+ raise ValueError(f'Invalid action result type: {type(result)} of {result}')
616
+ return ActionResult()
@@ -0,0 +1,37 @@
1
+ from typing import Generic, TypeVar
2
+ from pydantic import BaseModel, ConfigDict, Field
3
+
4
+
5
+ class HoverAction(BaseModel):
6
+ """Parameters for hover action"""
7
+ index: int | None = None
8
+ xpath: str | None = None
9
+ selector: str | None = None
10
+
11
+
12
+ class ExtractionAction(BaseModel):
13
+ query: str = Field(
14
+ default="summary this page",
15
+ description='Extraction goal',
16
+ )
17
+ extract_links: bool | None = Field(
18
+ default=False,
19
+ description='Whether to extract links',
20
+ )
21
+ tab_id: str | None = Field(
22
+ default=None,
23
+ min_length=4,
24
+ max_length=4,
25
+ description='exact 4 character Tab ID of the tab for extraction',
26
+ ) # last 4 chars of TargetID
27
+
28
+
29
+ class FileExtractionAction(BaseModel):
30
+ """Parameters for file content extraction action"""
31
+ file_path: str = Field(
32
+ description='Path to the file to extract content from',
33
+ )
34
+ query: str = Field(
35
+ default="Extract and summarize the content from this file",
36
+ description='Query or instruction for content extraction',
37
+ )
@@ -0,0 +1,21 @@
1
+ """
2
+ Vibe Surf LLM implementations.
3
+
4
+ This module provides LLM implementations for vibe_surf, including:
5
+ - ChatOpenAICompatible: OpenAI-compatible implementation with Gemini schema fix support
6
+
7
+ Example usage:
8
+ from vibe_surf.llm import ChatOpenAICompatible
9
+
10
+ # Using with Azure OpenAI for Gemini models
11
+ llm = ChatOpenAICompatible(
12
+ model="gemini-2.5-pro",
13
+ base_url="https://your-endpoint.openai.azure.com/",
14
+ api_key="your-api-key",
15
+ temperature=0,
16
+ )
17
+ """
18
+
19
+ from vibe_surf.llm.openai_compatible import ChatOpenAICompatible
20
+
21
+ __all__ = ['ChatOpenAICompatible']