vibesurf 0.1.9a6__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (69) hide show
  1. vibe_surf/_version.py +2 -2
  2. vibe_surf/agents/browser_use_agent.py +68 -45
  3. vibe_surf/agents/prompts/report_writer_prompt.py +73 -0
  4. vibe_surf/agents/prompts/vibe_surf_prompt.py +85 -172
  5. vibe_surf/agents/report_writer_agent.py +380 -226
  6. vibe_surf/agents/vibe_surf_agent.py +878 -814
  7. vibe_surf/agents/views.py +130 -0
  8. vibe_surf/backend/api/activity.py +3 -1
  9. vibe_surf/backend/api/browser.py +70 -0
  10. vibe_surf/backend/api/config.py +8 -5
  11. vibe_surf/backend/api/files.py +59 -50
  12. vibe_surf/backend/api/models.py +2 -2
  13. vibe_surf/backend/api/task.py +47 -13
  14. vibe_surf/backend/database/manager.py +24 -18
  15. vibe_surf/backend/database/queries.py +199 -192
  16. vibe_surf/backend/database/schemas.py +1 -1
  17. vibe_surf/backend/main.py +80 -3
  18. vibe_surf/backend/shared_state.py +30 -35
  19. vibe_surf/backend/utils/encryption.py +3 -1
  20. vibe_surf/backend/utils/llm_factory.py +41 -36
  21. vibe_surf/browser/agent_browser_session.py +308 -62
  22. vibe_surf/browser/browser_manager.py +71 -100
  23. vibe_surf/browser/utils.py +5 -3
  24. vibe_surf/browser/watchdogs/dom_watchdog.py +0 -45
  25. vibe_surf/chrome_extension/background.js +88 -0
  26. vibe_surf/chrome_extension/manifest.json +3 -1
  27. vibe_surf/chrome_extension/scripts/api-client.js +13 -0
  28. vibe_surf/chrome_extension/scripts/file-manager.js +482 -0
  29. vibe_surf/chrome_extension/scripts/history-manager.js +658 -0
  30. vibe_surf/chrome_extension/scripts/modal-manager.js +487 -0
  31. vibe_surf/chrome_extension/scripts/session-manager.js +52 -11
  32. vibe_surf/chrome_extension/scripts/settings-manager.js +1214 -0
  33. vibe_surf/chrome_extension/scripts/ui-manager.js +1530 -3163
  34. vibe_surf/chrome_extension/sidepanel.html +47 -7
  35. vibe_surf/chrome_extension/styles/activity.css +934 -0
  36. vibe_surf/chrome_extension/styles/base.css +76 -0
  37. vibe_surf/chrome_extension/styles/history-modal.css +791 -0
  38. vibe_surf/chrome_extension/styles/input.css +568 -0
  39. vibe_surf/chrome_extension/styles/layout.css +186 -0
  40. vibe_surf/chrome_extension/styles/responsive.css +454 -0
  41. vibe_surf/chrome_extension/styles/settings-environment.css +165 -0
  42. vibe_surf/chrome_extension/styles/settings-forms.css +389 -0
  43. vibe_surf/chrome_extension/styles/settings-modal.css +141 -0
  44. vibe_surf/chrome_extension/styles/settings-profiles.css +244 -0
  45. vibe_surf/chrome_extension/styles/settings-responsive.css +144 -0
  46. vibe_surf/chrome_extension/styles/settings-utilities.css +25 -0
  47. vibe_surf/chrome_extension/styles/variables.css +54 -0
  48. vibe_surf/cli.py +5 -22
  49. vibe_surf/common.py +35 -0
  50. vibe_surf/llm/openai_compatible.py +148 -93
  51. vibe_surf/logger.py +99 -0
  52. vibe_surf/{controller/vibesurf_tools.py → tools/browser_use_tools.py} +233 -221
  53. vibe_surf/tools/file_system.py +415 -0
  54. vibe_surf/{controller → tools}/mcp_client.py +4 -3
  55. vibe_surf/tools/report_writer_tools.py +21 -0
  56. vibe_surf/tools/vibesurf_tools.py +657 -0
  57. vibe_surf/tools/views.py +120 -0
  58. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/METADATA +23 -3
  59. vibesurf-0.1.11.dist-info/RECORD +93 -0
  60. vibe_surf/chrome_extension/styles/main.css +0 -2338
  61. vibe_surf/chrome_extension/styles/settings.css +0 -1100
  62. vibe_surf/controller/file_system.py +0 -53
  63. vibe_surf/controller/views.py +0 -37
  64. vibesurf-0.1.9a6.dist-info/RECORD +0 -71
  65. /vibe_surf/{controller → tools}/__init__.py +0 -0
  66. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/WHEEL +0 -0
  67. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/entry_points.txt +0 -0
  68. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/licenses/LICENSE +0 -0
  69. {vibesurf-0.1.9a6.dist-info → vibesurf-0.1.11.dist-info}/top_level.txt +0 -0
vibe_surf/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.9a6'
32
- __version_tuple__ = version_tuple = (0, 1, 9, 'a6')
31
+ __version__ = version = '0.1.11'
32
+ __version_tuple__ = version_tuple = (0, 1, 11)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -3,6 +3,7 @@ import gc
3
3
  import inspect
4
4
  import json
5
5
  import logging
6
+ import os.path
6
7
  import pdb
7
8
  import re
8
9
  import sys
@@ -11,7 +12,7 @@ import time
11
12
  from collections.abc import Awaitable, Callable
12
13
  from datetime import datetime
13
14
  from pathlib import Path
14
- from typing import Any, Generic, Literal, TypeVar
15
+ from typing import Any, Generic, Literal, TypeVar, Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  from dotenv import load_dotenv
@@ -74,12 +75,11 @@ from browser_use.utils import (
74
75
  )
75
76
 
76
77
  from browser_use.agent.service import Agent, AgentHookFunc
77
- from vibe_surf.controller.file_system import CustomFileSystem
78
+ from vibe_surf.tools.file_system import CustomFileSystem
78
79
 
79
80
  Context = TypeVar('Context')
80
81
 
81
82
 
82
-
83
83
  class BrowserUseAgent(Agent):
84
84
  @time_execution_sync('--init')
85
85
  def __init__(
@@ -134,9 +134,11 @@ class BrowserUseAgent(Agent):
134
134
  vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
135
135
  llm_timeout: int = 90,
136
136
  step_timeout: int = 120,
137
- directly_open_url: bool = True,
137
+ directly_open_url: bool = False,
138
138
  include_recent_events: bool = False,
139
139
  allow_parallel_action_types: list[str] = ["extract_structured_data", "extract_content_from_file"],
140
+ _url_shortening_limit: int = 25,
141
+ token_cost_service: Optional[TokenCost] = None,
140
142
  **kwargs,
141
143
  ):
142
144
  if page_extraction_llm is None:
@@ -148,6 +150,7 @@ class BrowserUseAgent(Agent):
148
150
  self.task_id: str = self.id
149
151
  self.session_id: str = uuid7str()
150
152
  self.allow_parallel_action_types = allow_parallel_action_types
153
+ self._url_shortening_limit = _url_shortening_limit
151
154
 
152
155
  browser_profile = browser_profile or DEFAULT_BROWSER_PROFILE
153
156
 
@@ -206,7 +209,10 @@ class BrowserUseAgent(Agent):
206
209
  )
207
210
 
208
211
  # Token cost service
209
- self.token_cost_service = TokenCost(include_cost=calculate_cost)
212
+ if token_cost_service is None:
213
+ self.token_cost_service = TokenCost(include_cost=calculate_cost)
214
+ else:
215
+ self.token_cost_service = token_cost_service
210
216
  self.token_cost_service.register_llm(llm)
211
217
  self.token_cost_service.register_llm(page_extraction_llm)
212
218
 
@@ -253,6 +259,11 @@ class BrowserUseAgent(Agent):
253
259
  '⚠️ DeepSeek models do not support use_vision=True yet. Setting use_vision=False for now...')
254
260
  self.settings.use_vision = False
255
261
 
262
+ if 'kimi-k2' in self.llm.model.lower():
263
+ self.logger.warning(
264
+ '⚠️ Kimi-k2 models do not support use_vision=True yet. Setting use_vision=False for now...')
265
+ self.settings.use_vision = False
266
+
256
267
  # Handle users trying to use use_vision=True with XAI models
257
268
  if 'grok' in self.llm.model.lower():
258
269
  self.logger.warning('⚠️ XAI models do not support use_vision=True yet. Setting use_vision=False for now...')
@@ -468,6 +479,13 @@ class BrowserUseAgent(Agent):
468
479
  # Increment step counter after step is fully completed
469
480
  self.state.n_steps += 1
470
481
 
482
+ def add_new_task(self, new_task: str) -> None:
483
+ """Add a new task to the agent, keeping the same task_id as tasks are continuous"""
484
+ # Simply delegate to message manager - no need for new task_id or events
485
+ # The task continues with new instructions, it doesn't end and start a new one
486
+ self.task = new_task
487
+ self._message_manager.add_new_task(new_task)
488
+
471
489
  @observe(name='agent.run', metadata={'task': '{{task}}', 'debug': '{{debug}}'})
472
490
  @time_execution_async('--run')
473
491
  async def run(
@@ -527,11 +545,13 @@ class BrowserUseAgent(Agent):
527
545
  # Replace the polling with clean pause-wait
528
546
  if self.state.paused:
529
547
  self.logger.debug(f'⏸️ Step {step}: Agent paused, waiting to resume...')
530
- await self.wait_until_resumed()
548
+ await self._external_pause_event.wait()
531
549
  signal_handler.reset()
532
550
 
533
551
  # Check if we should stop due to too many failures
534
- if self.state.consecutive_failures >= self.settings.max_failures:
552
+ if (self.state.consecutive_failures) >= self.settings.max_failures + int(
553
+ self.settings.final_response_after_failure
554
+ ):
535
555
  self.logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
536
556
  agent_run_error = f'Stopped due to {self.settings.max_failures} consecutive failures'
537
557
  break
@@ -630,6 +650,8 @@ class BrowserUseAgent(Agent):
630
650
  # Log token usage summary
631
651
  await self.token_cost_service.log_usage_summary()
632
652
 
653
+ self.save_history(os.path.join(self.file_system_path, 'AgentHistory.json'))
654
+
633
655
  # Unregister signal handlers before cleanup
634
656
  signal_handler.unregister()
635
657
 
@@ -673,7 +695,7 @@ class BrowserUseAgent(Agent):
673
695
  else:
674
696
  # Exact matching
675
697
  return action_type == allowed_pattern
676
-
698
+
677
699
  def _is_action_parallel_allowed(self, action: ActionModel) -> bool:
678
700
  """
679
701
  Check if an action is allowed to be executed in parallel.
@@ -686,16 +708,16 @@ class BrowserUseAgent(Agent):
686
708
  """
687
709
  action_data = action.model_dump(exclude_unset=True)
688
710
  action_type = next(iter(action_data.keys())) if action_data else None
689
-
711
+
690
712
  if not action_type:
691
713
  return False
692
-
714
+
693
715
  for allowed_pattern in self.allow_parallel_action_types:
694
716
  if self._matches_action_type(action_type, allowed_pattern):
695
717
  return True
696
-
718
+
697
719
  return False
698
-
720
+
699
721
  def _group_actions_for_parallel_execution(self, actions: list[ActionModel]) -> list[list[ActionModel]]:
700
722
  """
701
723
  Group consecutive actions that can be executed in parallel.
@@ -708,27 +730,27 @@ class BrowserUseAgent(Agent):
708
730
  """
709
731
  if not actions:
710
732
  return []
711
-
733
+
712
734
  groups = []
713
735
  current_group = [actions[0]]
714
-
736
+
715
737
  for i in range(1, len(actions)):
716
738
  current_action = actions[i]
717
- previous_action = actions[i-1]
718
-
739
+ previous_action = actions[i - 1]
740
+
719
741
  # Check if both current and previous actions can be executed in parallel
720
742
  if (self._is_action_parallel_allowed(current_action) and
721
- self._is_action_parallel_allowed(previous_action)):
743
+ self._is_action_parallel_allowed(previous_action)):
722
744
  # Add to current group
723
745
  current_group.append(current_action)
724
746
  else:
725
747
  # Start a new group
726
748
  groups.append(current_group)
727
749
  current_group = [current_action]
728
-
750
+
729
751
  # Add the last group
730
752
  groups.append(current_group)
731
-
753
+
732
754
  return groups
733
755
 
734
756
  @observe_debug(ignore_input=True, ignore_output=True)
@@ -761,21 +783,22 @@ class BrowserUseAgent(Agent):
761
783
 
762
784
  # Group actions for potential parallel execution
763
785
  action_groups = self._group_actions_for_parallel_execution(actions)
764
-
786
+
765
787
  # Track global action index for logging and DOM checks
766
788
  global_action_index = 0
767
789
 
768
790
  for group_index, action_group in enumerate(action_groups):
769
791
  group_size = len(action_group)
770
-
792
+
771
793
  # Check if this group can be executed in parallel
772
794
  can_execute_in_parallel = (
773
- group_size > 1 and
774
- all(self._is_action_parallel_allowed(action) for action in action_group)
795
+ group_size > 1 and
796
+ all(self._is_action_parallel_allowed(action) for action in action_group)
775
797
  )
776
-
798
+
777
799
  if can_execute_in_parallel:
778
- self.logger.info(f'🚀 Executing {group_size} actions in parallel: group {group_index + 1}/{len(action_groups)}')
800
+ self.logger.info(
801
+ f'🚀 Executing {group_size} actions in parallel: group {group_index + 1}/{len(action_groups)}')
779
802
  # Execute actions in parallel using asyncio.gather
780
803
  parallel_results = await self._execute_actions_in_parallel(
781
804
  action_group, global_action_index, total_actions,
@@ -783,7 +806,7 @@ class BrowserUseAgent(Agent):
783
806
  )
784
807
  results.extend(parallel_results)
785
808
  global_action_index += group_size
786
-
809
+
787
810
  # Check if any result indicates completion or error
788
811
  if any(result.is_done or result.error for result in parallel_results):
789
812
  break
@@ -791,7 +814,7 @@ class BrowserUseAgent(Agent):
791
814
  # Execute actions sequentially
792
815
  for local_index, action in enumerate(action_group):
793
816
  i = global_action_index + local_index
794
-
817
+
795
818
  # Original sequential execution logic continues here...
796
819
  if i > 0:
797
820
  # ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION
@@ -825,7 +848,7 @@ class BrowserUseAgent(Agent):
825
848
  except Exception as e:
826
849
  self.logger.error(f'❌ Executing action {i + 1} failed: {type(e).__name__}: {e}')
827
850
  raise e
828
-
851
+
829
852
  global_action_index += len(action_group)
830
853
 
831
854
  return results
@@ -840,11 +863,11 @@ class BrowserUseAgent(Agent):
840
863
  check_for_new_elements: bool
841
864
  ) -> list[ActionResult]:
842
865
  """Execute a group of actions in parallel using asyncio.gather"""
843
-
866
+
844
867
  async def execute_single_parallel_action(action: ActionModel, action_index: int) -> ActionResult:
845
868
  """Execute a single action for parallel execution"""
846
869
  await self._raise_if_stopped_or_paused()
847
-
870
+
848
871
  # Get action info for logging
849
872
  action_data = action.model_dump(exclude_unset=True)
850
873
  action_name = next(iter(action_data.keys())) if action_data else 'unknown'
@@ -853,12 +876,12 @@ class BrowserUseAgent(Agent):
853
876
  ).replace('{', '').replace('}', '').replace("'", '').strip().strip(',')
854
877
  action_params = str(action_params)
855
878
  action_params = f'{action_params[:122]}...' if len(action_params) > 128 else action_params
856
-
879
+
857
880
  time_start = time.time()
858
881
  blue = '\033[34m'
859
882
  reset = '\033[0m'
860
883
  self.logger.info(f' 🦾 {blue}[PARALLEL ACTION {action_index + 1}/{total_actions}]{reset} {action_params}')
861
-
884
+
862
885
  # Execute the action
863
886
  result = await self.tools.act(
864
887
  action=action,
@@ -868,26 +891,26 @@ class BrowserUseAgent(Agent):
868
891
  sensitive_data=self.sensitive_data,
869
892
  available_file_paths=self.available_file_paths,
870
893
  )
871
-
894
+
872
895
  time_end = time.time()
873
896
  time_elapsed = time_end - time_start
874
-
897
+
875
898
  green = '\033[92m'
876
899
  self.logger.debug(
877
900
  f'☑️ Parallel action {action_index + 1}/{total_actions}: {green}{action_params}{reset} in {time_elapsed:.2f}s'
878
901
  )
879
-
902
+
880
903
  return result
881
-
904
+
882
905
  # Create tasks for parallel execution
883
906
  tasks = [
884
907
  execute_single_parallel_action(action, start_index + i)
885
908
  for i, action in enumerate(actions)
886
909
  ]
887
-
910
+
888
911
  # Execute all tasks in parallel
889
912
  parallel_results = await asyncio.gather(*tasks, return_exceptions=True)
890
-
913
+
891
914
  # Process results and handle any exceptions
892
915
  processed_results = []
893
916
  for i, result in enumerate(parallel_results):
@@ -897,7 +920,7 @@ class BrowserUseAgent(Agent):
897
920
  raise result
898
921
  else:
899
922
  processed_results.append(result)
900
-
923
+
901
924
  return processed_results
902
925
 
903
926
  async def _check_dom_synchronization(
@@ -955,13 +978,13 @@ class BrowserUseAgent(Agent):
955
978
  include_in_memory=True,
956
979
  long_term_memory=msg,
957
980
  )
958
-
981
+
959
982
  return None
960
983
 
961
984
  async def _execute_single_action(self, action: ActionModel, action_index: int, total_actions: int) -> ActionResult:
962
985
  """Execute a single action in sequential mode"""
963
986
  await self._raise_if_stopped_or_paused()
964
-
987
+
965
988
  # Get action name from the action model
966
989
  action_data = action.model_dump(exclude_unset=True)
967
990
  action_name = next(iter(action_data.keys())) if action_data else 'unknown'
@@ -971,14 +994,14 @@ class BrowserUseAgent(Agent):
971
994
  # Ensure action_params is always a string before checking length
972
995
  action_params = str(action_params)
973
996
  action_params = f'{action_params[:122]}...' if len(action_params) > 128 else action_params
974
-
997
+
975
998
  time_start = time.time()
976
-
999
+
977
1000
  red = '\033[91m'
978
1001
  green = '\033[92m'
979
1002
  blue = '\033[34m'
980
1003
  reset = '\033[0m'
981
-
1004
+
982
1005
  self.logger.info(f' 🦾 {blue}[ACTION {action_index + 1}/{total_actions}]{reset} {action_params}')
983
1006
 
984
1007
  result = await self.tools.act(
@@ -996,5 +1019,5 @@ class BrowserUseAgent(Agent):
996
1019
  self.logger.debug(
997
1020
  f'☑️ Executed action {action_index + 1}/{total_actions}: {green}{action_params}{reset} in {time_elapsed:.2f}s'
998
1021
  )
999
-
1022
+
1000
1023
  return result
@@ -0,0 +1,73 @@
1
+ REPORT_WRITER_PROMPT = """
2
+ You are an intelligent report writing assistant that can read files, generate content, and create professional HTML reports.
3
+
4
+ ## Your Capabilities:
5
+ 1. **read_file**: Read existing files to gather additional context or reference material
6
+ 2. **write_file**: Write content to files, including generating report content and creating HTML output
7
+
8
+ ## Workflow (MUST Follow These Steps):
9
+ 1. **Analyze the task**: Understand what type of report is needed and what information you have
10
+ 2. **Determine if you need more information**:
11
+ - If you need to read existing files for context, use `read_file`
12
+ - Look for references to files in the task or information that might be helpful
13
+ - **IMPORTANT for BrowserTaskResult inputs**: If you receive browser_results data containing BrowserTaskResult objects:
14
+ * Each BrowserTaskResult has an `agent_workdir` field with the actual working directory path
15
+ * For any file paths in `important_files` or other file references from that result:
16
+ - Check if the file path already starts with the `agent_workdir` value
17
+ - If NOT, prepend the `agent_workdir` value + "/" to the file path when calling read_file
18
+ - This ensures you can access files created by the browser agent correctly
19
+ * Example: If BrowserTaskResult shows `agent_workdir: "/tmp/session123"` and `important_files: ["data/report.csv"]`,
20
+ use `/tmp/session123/data/report.csv` when calling read_file
21
+ 3. **Generate the report content**: Create comprehensive, professional content that directly addresses the task requirements
22
+ 4. **MANDATORY FORMATTING STEP**: **THIS STEP IS REQUIRED** - Format the content as a professional HTML document with:
23
+ - Complete HTML5 structure with DOCTYPE
24
+ - Professional styling with embedded CSS
25
+ - Responsive design and clean typography
26
+ - Visual hierarchy with proper sections
27
+ - Data tables where appropriate
28
+ - Professional color scheme (blues, grays, whites)
29
+ - Cross-browser compatibility and print-friendly design
30
+ 5. **Final output**: Write the fully formatted HTML to the target file using `write_file`
31
+
32
+ ## Content Guidelines:
33
+ - Focus ONLY on what the user specifically requested - ignore technical execution details
34
+ - Create content that directly addresses the user's needs (comparison, analysis, research findings, etc.)
35
+ - DO NOT include methodology, task overview, or technical process information
36
+ - DO NOT mention agents, browser automation, or technical execution methods
37
+ - Write as if you're delivering exactly what the user asked for
38
+ - Use a professional, clear, and engaging style
39
+ - Structure content with clear sections relevant to the user's request
40
+
41
+ ## HTML Requirements:
42
+ - Complete HTML5 document with DOCTYPE
43
+ - Embedded CSS (no external dependencies)
44
+ - Responsive design with proper meta tags
45
+ - Professional styling with modern CSS features
46
+ - Clean, readable typography
47
+ - Proper spacing, margins, and visual hierarchy
48
+ - Cross-browser compatibility
49
+ - Print-friendly design
50
+ - Semantic HTML elements
51
+ - **For local files (images, documents, etc.)**: Use relative paths in standard HTML format:
52
+ - Images: `<img src="path/to/image.jpg" alt="description">`
53
+ - Links: `<a href="path/to/document.pdf">Link text</a>`
54
+ - The system will automatically convert these to absolute file:// URLs. Please do not use `file://` before path.
55
+
56
+ ## Title Guidelines:
57
+ - Create titles based on the actual content/topic
58
+ - NOT "Task Execution Report" or similar generic titles
59
+ - Make it specific to what was researched/analyzed
60
+
61
+ ## Execution Requirements:
62
+ - **ALWAYS** start by analyzing if you need to read any files first
63
+ - Generate comprehensive content that addresses the user's specific request
64
+ - **MANDATORY**: Complete the formatting step - transform content into professional HTML format
65
+ - **CRITICAL**: The formatting step cannot be skipped - it is required for every report
66
+ - Write the final formatted HTML to the target file using `write_file`
67
+ - Call `task_done` only after the report is fully formatted and written
68
+
69
+ ## Key Reminder:
70
+ **Every report MUST include a dedicated formatting step** (typically the final step before output). This step transforms your content into a professional, well-structured HTML document. Raw content without proper HTML formatting is not acceptable.
71
+
72
+ Remember: You are creating a professional deliverable that directly fulfills the user's request. Focus on the subject matter, not the technical process.
73
+ """