vibesurf 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vibesurf might be problematic. Click here for more details.
- vibe_surf/__init__.py +12 -0
- vibe_surf/_version.py +34 -0
- vibe_surf/agents/__init__.py +0 -0
- vibe_surf/agents/browser_use_agent.py +1106 -0
- vibe_surf/agents/prompts/__init__.py +1 -0
- vibe_surf/agents/prompts/vibe_surf_prompt.py +176 -0
- vibe_surf/agents/report_writer_agent.py +360 -0
- vibe_surf/agents/vibe_surf_agent.py +1632 -0
- vibe_surf/backend/__init__.py +0 -0
- vibe_surf/backend/api/__init__.py +3 -0
- vibe_surf/backend/api/activity.py +243 -0
- vibe_surf/backend/api/config.py +740 -0
- vibe_surf/backend/api/files.py +322 -0
- vibe_surf/backend/api/models.py +257 -0
- vibe_surf/backend/api/task.py +300 -0
- vibe_surf/backend/database/__init__.py +13 -0
- vibe_surf/backend/database/manager.py +129 -0
- vibe_surf/backend/database/models.py +164 -0
- vibe_surf/backend/database/queries.py +922 -0
- vibe_surf/backend/database/schemas.py +100 -0
- vibe_surf/backend/llm_config.py +182 -0
- vibe_surf/backend/main.py +137 -0
- vibe_surf/backend/migrations/__init__.py +16 -0
- vibe_surf/backend/migrations/init_db.py +303 -0
- vibe_surf/backend/migrations/seed_data.py +236 -0
- vibe_surf/backend/shared_state.py +601 -0
- vibe_surf/backend/utils/__init__.py +7 -0
- vibe_surf/backend/utils/encryption.py +164 -0
- vibe_surf/backend/utils/llm_factory.py +225 -0
- vibe_surf/browser/__init__.py +8 -0
- vibe_surf/browser/agen_browser_profile.py +130 -0
- vibe_surf/browser/agent_browser_session.py +416 -0
- vibe_surf/browser/browser_manager.py +296 -0
- vibe_surf/browser/utils.py +790 -0
- vibe_surf/browser/watchdogs/__init__.py +0 -0
- vibe_surf/browser/watchdogs/action_watchdog.py +291 -0
- vibe_surf/browser/watchdogs/dom_watchdog.py +954 -0
- vibe_surf/chrome_extension/background.js +558 -0
- vibe_surf/chrome_extension/config.js +48 -0
- vibe_surf/chrome_extension/content.js +284 -0
- vibe_surf/chrome_extension/dev-reload.js +47 -0
- vibe_surf/chrome_extension/icons/convert-svg.js +33 -0
- vibe_surf/chrome_extension/icons/logo-preview.html +187 -0
- vibe_surf/chrome_extension/icons/logo.png +0 -0
- vibe_surf/chrome_extension/manifest.json +53 -0
- vibe_surf/chrome_extension/popup.html +134 -0
- vibe_surf/chrome_extension/scripts/api-client.js +473 -0
- vibe_surf/chrome_extension/scripts/main.js +491 -0
- vibe_surf/chrome_extension/scripts/markdown-it.min.js +3 -0
- vibe_surf/chrome_extension/scripts/session-manager.js +599 -0
- vibe_surf/chrome_extension/scripts/ui-manager.js +3687 -0
- vibe_surf/chrome_extension/sidepanel.html +347 -0
- vibe_surf/chrome_extension/styles/animations.css +471 -0
- vibe_surf/chrome_extension/styles/components.css +670 -0
- vibe_surf/chrome_extension/styles/main.css +2307 -0
- vibe_surf/chrome_extension/styles/settings.css +1100 -0
- vibe_surf/cli.py +357 -0
- vibe_surf/controller/__init__.py +0 -0
- vibe_surf/controller/file_system.py +53 -0
- vibe_surf/controller/mcp_client.py +68 -0
- vibe_surf/controller/vibesurf_controller.py +616 -0
- vibe_surf/controller/views.py +37 -0
- vibe_surf/llm/__init__.py +21 -0
- vibe_surf/llm/openai_compatible.py +237 -0
- vibesurf-0.1.0.dist-info/METADATA +97 -0
- vibesurf-0.1.0.dist-info/RECORD +70 -0
- vibesurf-0.1.0.dist-info/WHEEL +5 -0
- vibesurf-0.1.0.dist-info/entry_points.txt +2 -0
- vibesurf-0.1.0.dist-info/licenses/LICENSE +201 -0
- vibesurf-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1106 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import gc
|
|
3
|
+
import inspect
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import pdb
|
|
7
|
+
import re
|
|
8
|
+
import sys
|
|
9
|
+
import tempfile
|
|
10
|
+
import time
|
|
11
|
+
from collections.abc import Awaitable, Callable
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Generic, Literal, TypeVar
|
|
15
|
+
from urllib.parse import urlparse
|
|
16
|
+
|
|
17
|
+
from dotenv import load_dotenv
|
|
18
|
+
|
|
19
|
+
from browser_use.agent.cloud_events import (
|
|
20
|
+
CreateAgentOutputFileEvent,
|
|
21
|
+
CreateAgentSessionEvent,
|
|
22
|
+
CreateAgentStepEvent,
|
|
23
|
+
CreateAgentTaskEvent,
|
|
24
|
+
UpdateAgentTaskEvent,
|
|
25
|
+
)
|
|
26
|
+
from browser_use.agent.message_manager.utils import save_conversation
|
|
27
|
+
from browser_use.llm.base import BaseChatModel
|
|
28
|
+
from browser_use.llm.messages import BaseMessage, UserMessage
|
|
29
|
+
from browser_use.llm.openai.chat import ChatOpenAI
|
|
30
|
+
from browser_use.tokens.service import TokenCost
|
|
31
|
+
|
|
32
|
+
from bubus import EventBus
|
|
33
|
+
from pydantic import ValidationError
|
|
34
|
+
from uuid_extensions import uuid7str
|
|
35
|
+
|
|
36
|
+
# Lazy import for gif to avoid heavy agent.views import at startup
|
|
37
|
+
# from browser_use.agent.gif import create_history_gif
|
|
38
|
+
from browser_use.agent.message_manager.service import (
|
|
39
|
+
MessageManager,
|
|
40
|
+
)
|
|
41
|
+
from browser_use.agent.prompts import SystemPrompt
|
|
42
|
+
from browser_use.agent.views import (
|
|
43
|
+
ActionResult,
|
|
44
|
+
AgentError,
|
|
45
|
+
AgentHistory,
|
|
46
|
+
AgentHistoryList,
|
|
47
|
+
AgentOutput,
|
|
48
|
+
AgentSettings,
|
|
49
|
+
AgentState,
|
|
50
|
+
AgentStepInfo,
|
|
51
|
+
AgentStructuredOutput,
|
|
52
|
+
BrowserStateHistory,
|
|
53
|
+
StepMetadata,
|
|
54
|
+
)
|
|
55
|
+
from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model, model_validator
|
|
56
|
+
from browser_use.browser import BrowserProfile, BrowserSession
|
|
57
|
+
from browser_use.browser.session import DEFAULT_BROWSER_PROFILE
|
|
58
|
+
from browser_use.browser.views import BrowserStateSummary
|
|
59
|
+
from browser_use.config import CONFIG
|
|
60
|
+
from browser_use.controller.registry.views import ActionModel
|
|
61
|
+
from browser_use.controller.service import Controller
|
|
62
|
+
from browser_use.dom.views import DOMInteractedElement
|
|
63
|
+
from browser_use.filesystem.file_system import FileSystem
|
|
64
|
+
from browser_use.observability import observe, observe_debug
|
|
65
|
+
from browser_use.sync import CloudSync
|
|
66
|
+
from browser_use.telemetry.service import ProductTelemetry
|
|
67
|
+
from browser_use.telemetry.views import AgentTelemetryEvent
|
|
68
|
+
from browser_use.utils import (
|
|
69
|
+
_log_pretty_path,
|
|
70
|
+
get_browser_use_version,
|
|
71
|
+
get_git_info,
|
|
72
|
+
time_execution_async,
|
|
73
|
+
time_execution_sync,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
from browser_use.agent.service import Agent, AgentHookFunc
|
|
77
|
+
from vibe_surf.controller.file_system import CustomFileSystem
|
|
78
|
+
|
|
79
|
+
Context = TypeVar('Context')
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def log_response(response: AgentOutput, registry=None, logger=None) -> None:
|
|
83
|
+
"""Utility function to log the model's response."""
|
|
84
|
+
|
|
85
|
+
# Use module logger if no logger provided
|
|
86
|
+
if logger is None:
|
|
87
|
+
logger = logging.getLogger(__name__)
|
|
88
|
+
|
|
89
|
+
# Only log thinking if it's present
|
|
90
|
+
if response.current_state.thinking:
|
|
91
|
+
logger.info(f'💡 Thinking:\n{response.current_state.thinking}')
|
|
92
|
+
|
|
93
|
+
# Only log evaluation if it's not empty
|
|
94
|
+
eval_goal = response.current_state.evaluation_previous_goal
|
|
95
|
+
if eval_goal:
|
|
96
|
+
if 'success' in eval_goal.lower():
|
|
97
|
+
emoji = '👍'
|
|
98
|
+
# Green color for success
|
|
99
|
+
logger.info(f' \033[32m{emoji} Eval: {eval_goal}\033[0m')
|
|
100
|
+
elif 'failure' in eval_goal.lower():
|
|
101
|
+
emoji = '⚠️'
|
|
102
|
+
# Red color for failure
|
|
103
|
+
logger.info(f' \033[31m{emoji} Eval: {eval_goal}\033[0m')
|
|
104
|
+
else:
|
|
105
|
+
emoji = '❔'
|
|
106
|
+
# No color for unknown/neutral
|
|
107
|
+
logger.info(f' {emoji} Eval: {eval_goal}')
|
|
108
|
+
|
|
109
|
+
# Always log memory if present
|
|
110
|
+
if response.current_state.memory:
|
|
111
|
+
logger.debug(f'🧠 Memory: {response.current_state.memory}')
|
|
112
|
+
|
|
113
|
+
# Only log next goal if it's not empty
|
|
114
|
+
next_goal = response.current_state.next_goal
|
|
115
|
+
if next_goal:
|
|
116
|
+
# Blue color for next goal
|
|
117
|
+
logger.info(f' \033[34m🎯 Next goal: {next_goal}\033[0m')
|
|
118
|
+
else:
|
|
119
|
+
logger.info('') # Add empty line for spacing
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class BrowserUseAgent(Agent):
|
|
123
|
+
@time_execution_sync('--init')
|
|
124
|
+
def __init__(
|
|
125
|
+
self,
|
|
126
|
+
task: str,
|
|
127
|
+
llm: BaseChatModel = ChatOpenAI(model='gpt-4.1-mini'),
|
|
128
|
+
# Optional parameters
|
|
129
|
+
browser_profile: BrowserProfile | None = None,
|
|
130
|
+
browser_session: BrowserSession | None = None,
|
|
131
|
+
controller: Controller[Context] | None = None,
|
|
132
|
+
# Initial agent run parameters
|
|
133
|
+
sensitive_data: dict[str, str | dict[str, str]] | None = None,
|
|
134
|
+
initial_actions: list[dict[str, dict[str, Any]]] | None = None,
|
|
135
|
+
# Cloud Callbacks
|
|
136
|
+
register_new_step_callback: (
|
|
137
|
+
Callable[['BrowserStateSummary', 'AgentOutput', int], None] # Sync callback
|
|
138
|
+
| Callable[['BrowserStateSummary', 'AgentOutput', int], Awaitable[None]] # Async callback
|
|
139
|
+
| None
|
|
140
|
+
) = None,
|
|
141
|
+
register_done_callback: (
|
|
142
|
+
Callable[['AgentHistoryList'], Awaitable[None]] # Async Callback
|
|
143
|
+
| Callable[['AgentHistoryList'], None] # Sync Callback
|
|
144
|
+
| None
|
|
145
|
+
) = None,
|
|
146
|
+
register_external_agent_status_raise_error_callback: Callable[[], Awaitable[bool]] | None = None,
|
|
147
|
+
# Agent settings
|
|
148
|
+
output_model_schema: type[AgentStructuredOutput] | None = None,
|
|
149
|
+
use_vision: bool = True,
|
|
150
|
+
use_vision_for_planner: bool = False, # Deprecated
|
|
151
|
+
save_conversation_path: str | Path | None = None,
|
|
152
|
+
save_conversation_path_encoding: str | None = 'utf-8',
|
|
153
|
+
max_failures: int = 3,
|
|
154
|
+
retry_delay: int = 10,
|
|
155
|
+
override_system_message: str | None = None,
|
|
156
|
+
extend_system_message: str | None = None,
|
|
157
|
+
validate_output: bool = False,
|
|
158
|
+
generate_gif: bool | str = False,
|
|
159
|
+
available_file_paths: list[str] | None = None,
|
|
160
|
+
include_attributes: list[str] | None = None,
|
|
161
|
+
max_actions_per_step: int = 10,
|
|
162
|
+
use_thinking: bool = True,
|
|
163
|
+
flash_mode: bool = False,
|
|
164
|
+
max_history_items: int | None = None,
|
|
165
|
+
page_extraction_llm: BaseChatModel | None = None,
|
|
166
|
+
planner_llm: BaseChatModel | None = None, # Deprecated
|
|
167
|
+
planner_interval: int = 1, # Deprecated
|
|
168
|
+
is_planner_reasoning: bool = False, # Deprecated
|
|
169
|
+
extend_planner_system_message: str | None = None, # Deprecated
|
|
170
|
+
injected_agent_state: AgentState | None = None,
|
|
171
|
+
context: Context | None = None,
|
|
172
|
+
source: str | None = None,
|
|
173
|
+
file_system_path: str | None = None,
|
|
174
|
+
task_id: str | None = None,
|
|
175
|
+
cloud_sync: CloudSync | None = None,
|
|
176
|
+
calculate_cost: bool = False,
|
|
177
|
+
display_files_in_done_text: bool = True,
|
|
178
|
+
include_tool_call_examples: bool = False,
|
|
179
|
+
vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
|
|
180
|
+
llm_timeout: int = 90,
|
|
181
|
+
step_timeout: int = 120,
|
|
182
|
+
preload: bool = True,
|
|
183
|
+
include_recent_events: bool = False,
|
|
184
|
+
allow_parallel_action_types: list[str] = ["extract_structured_data", "extract_content_from_file"],
|
|
185
|
+
**kwargs,
|
|
186
|
+
):
|
|
187
|
+
if not isinstance(llm, BaseChatModel):
|
|
188
|
+
raise ValueError('invalid llm, must be from browser_use.llm')
|
|
189
|
+
# Check for deprecated planner parameters
|
|
190
|
+
planner_params = [
|
|
191
|
+
planner_llm,
|
|
192
|
+
use_vision_for_planner,
|
|
193
|
+
is_planner_reasoning,
|
|
194
|
+
extend_planner_system_message,
|
|
195
|
+
]
|
|
196
|
+
if any(param is not None and param is not False for param in planner_params) or planner_interval != 1:
|
|
197
|
+
self.logger.warning(
|
|
198
|
+
'⚠️ Planner functionality has been removed in browser-use v0.3.3+. '
|
|
199
|
+
'The planner_llm, use_vision_for_planner, planner_interval, is_planner_reasoning, '
|
|
200
|
+
'and extend_planner_system_message parameters are deprecated and will be ignored. '
|
|
201
|
+
'Please remove these parameters from your Agent() initialization.'
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Check for deprecated memory parameters
|
|
205
|
+
if kwargs.get('enable_memory', False) or kwargs.get('memory_config') is not None:
|
|
206
|
+
self.logger.warning(
|
|
207
|
+
'Memory support has been removed as of version 0.3.2. '
|
|
208
|
+
'The agent context for memory is significantly improved and no longer requires the old memory system. '
|
|
209
|
+
"Please remove the 'enable_memory' and 'memory_config' parameters."
|
|
210
|
+
)
|
|
211
|
+
kwargs['enable_memory'] = False
|
|
212
|
+
kwargs['memory_config'] = None
|
|
213
|
+
|
|
214
|
+
if page_extraction_llm is None:
|
|
215
|
+
page_extraction_llm = llm
|
|
216
|
+
if available_file_paths is None:
|
|
217
|
+
available_file_paths = []
|
|
218
|
+
|
|
219
|
+
self.id = task_id or uuid7str()
|
|
220
|
+
self.task_id: str = self.id
|
|
221
|
+
self.session_id: str = uuid7str()
|
|
222
|
+
self.allow_parallel_action_types = allow_parallel_action_types
|
|
223
|
+
|
|
224
|
+
# Initialize available file paths as direct attribute
|
|
225
|
+
self.available_file_paths = available_file_paths
|
|
226
|
+
|
|
227
|
+
# Temporary logger for initialization (will be replaced by property)
|
|
228
|
+
self._logger = None
|
|
229
|
+
|
|
230
|
+
# Core components
|
|
231
|
+
self.task = task
|
|
232
|
+
self.llm = llm
|
|
233
|
+
self.preload = preload
|
|
234
|
+
self.include_recent_events = include_recent_events
|
|
235
|
+
self.controller = (
|
|
236
|
+
controller if controller is not None else Controller(display_files_in_done_text=display_files_in_done_text)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Structured output
|
|
240
|
+
self.output_model_schema = output_model_schema
|
|
241
|
+
if self.output_model_schema is not None:
|
|
242
|
+
self.controller.use_structured_output_action(self.output_model_schema)
|
|
243
|
+
|
|
244
|
+
self.sensitive_data = sensitive_data
|
|
245
|
+
|
|
246
|
+
self.settings = AgentSettings(
|
|
247
|
+
use_vision=use_vision,
|
|
248
|
+
vision_detail_level=vision_detail_level,
|
|
249
|
+
use_vision_for_planner=False, # Always False now (deprecated)
|
|
250
|
+
save_conversation_path=save_conversation_path,
|
|
251
|
+
save_conversation_path_encoding=save_conversation_path_encoding,
|
|
252
|
+
max_failures=max_failures,
|
|
253
|
+
retry_delay=retry_delay,
|
|
254
|
+
override_system_message=override_system_message,
|
|
255
|
+
extend_system_message=extend_system_message,
|
|
256
|
+
validate_output=validate_output,
|
|
257
|
+
generate_gif=generate_gif,
|
|
258
|
+
include_attributes=include_attributes,
|
|
259
|
+
max_actions_per_step=max_actions_per_step,
|
|
260
|
+
use_thinking=use_thinking,
|
|
261
|
+
flash_mode=flash_mode,
|
|
262
|
+
max_history_items=max_history_items,
|
|
263
|
+
page_extraction_llm=page_extraction_llm,
|
|
264
|
+
planner_llm=None, # Always None now (deprecated)
|
|
265
|
+
planner_interval=1, # Always 1 now (deprecated)
|
|
266
|
+
is_planner_reasoning=False, # Always False now (deprecated)
|
|
267
|
+
extend_planner_system_message=None, # Always None now (deprecated)
|
|
268
|
+
calculate_cost=calculate_cost,
|
|
269
|
+
include_tool_call_examples=include_tool_call_examples,
|
|
270
|
+
llm_timeout=llm_timeout,
|
|
271
|
+
step_timeout=step_timeout,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Token cost service
|
|
275
|
+
self.token_cost_service = TokenCost(include_cost=calculate_cost)
|
|
276
|
+
self.token_cost_service.register_llm(llm)
|
|
277
|
+
self.token_cost_service.register_llm(page_extraction_llm)
|
|
278
|
+
# Note: No longer registering planner_llm (deprecated)
|
|
279
|
+
|
|
280
|
+
# Initialize state
|
|
281
|
+
self.state = injected_agent_state or AgentState()
|
|
282
|
+
|
|
283
|
+
# Initialize history
|
|
284
|
+
self.history = AgentHistoryList(history=[], usage=None)
|
|
285
|
+
|
|
286
|
+
# Initialize agent directory
|
|
287
|
+
import time
|
|
288
|
+
|
|
289
|
+
timestamp = int(time.time())
|
|
290
|
+
base_tmp = Path(tempfile.gettempdir())
|
|
291
|
+
self.agent_directory = base_tmp / f'browser_use_agent_{self.id}_{timestamp}'
|
|
292
|
+
|
|
293
|
+
# Initialize file system and screenshot service
|
|
294
|
+
self._set_file_system(file_system_path)
|
|
295
|
+
self._set_screenshot_service()
|
|
296
|
+
|
|
297
|
+
# Action setup
|
|
298
|
+
self._setup_action_models()
|
|
299
|
+
self._set_browser_use_version_and_source(source)
|
|
300
|
+
self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None
|
|
301
|
+
|
|
302
|
+
# Verify we can connect to the model
|
|
303
|
+
self._verify_and_setup_llm()
|
|
304
|
+
|
|
305
|
+
# TODO: move this logic to the LLMs
|
|
306
|
+
# Handle users trying to use use_vision=True with DeepSeek models
|
|
307
|
+
if 'deepseek' in self.llm.model.lower():
|
|
308
|
+
self.logger.warning(
|
|
309
|
+
'⚠️ DeepSeek models do not support use_vision=True yet. Setting use_vision=False for now...')
|
|
310
|
+
self.settings.use_vision = False
|
|
311
|
+
# Note: No longer checking planner_llm for DeepSeek (deprecated)
|
|
312
|
+
|
|
313
|
+
# Handle users trying to use use_vision=True with XAI models
|
|
314
|
+
if 'grok' in self.llm.model.lower():
|
|
315
|
+
self.logger.warning('⚠️ XAI models do not support use_vision=True yet. Setting use_vision=False for now...')
|
|
316
|
+
self.settings.use_vision = False
|
|
317
|
+
# Note: No longer checking planner_llm for XAI models (deprecated)
|
|
318
|
+
|
|
319
|
+
self.logger.info(f'🧠 Starting a browser-use version {self.version} with model={self.llm.model}')
|
|
320
|
+
self.logger.debug(
|
|
321
|
+
f'{" +vision" if self.settings.use_vision else ""}'
|
|
322
|
+
f' extraction_model={self.settings.page_extraction_llm.model if self.settings.page_extraction_llm else "Unknown"}'
|
|
323
|
+
# Note: No longer logging planner_model (deprecated)
|
|
324
|
+
f'{" +file_system" if self.file_system else ""}'
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Initialize available actions for system prompt (only non-filtered actions)
|
|
328
|
+
# These will be used for the system prompt to maintain caching
|
|
329
|
+
self.unfiltered_actions = self.controller.registry.get_prompt_description()
|
|
330
|
+
|
|
331
|
+
# Initialize message manager with state
|
|
332
|
+
# Initial system prompt with all actions - will be updated during each step
|
|
333
|
+
system_message = SystemPrompt(
|
|
334
|
+
action_description=self.unfiltered_actions,
|
|
335
|
+
max_actions_per_step=self.settings.max_actions_per_step,
|
|
336
|
+
override_system_message=override_system_message,
|
|
337
|
+
extend_system_message=extend_system_message,
|
|
338
|
+
use_thinking=self.settings.use_thinking,
|
|
339
|
+
flash_mode=self.settings.flash_mode,
|
|
340
|
+
).get_system_message()
|
|
341
|
+
self.logger.debug(system_message)
|
|
342
|
+
self._message_manager = MessageManager(
|
|
343
|
+
task=task,
|
|
344
|
+
system_message=system_message,
|
|
345
|
+
file_system=self.file_system,
|
|
346
|
+
state=self.state.message_manager_state,
|
|
347
|
+
use_thinking=self.settings.use_thinking,
|
|
348
|
+
# Settings that were previously in MessageManagerSettings
|
|
349
|
+
include_attributes=self.settings.include_attributes,
|
|
350
|
+
sensitive_data=sensitive_data,
|
|
351
|
+
max_history_items=self.settings.max_history_items,
|
|
352
|
+
vision_detail_level=self.settings.vision_detail_level,
|
|
353
|
+
include_tool_call_examples=self.settings.include_tool_call_examples,
|
|
354
|
+
include_recent_events=self.include_recent_events,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
browser_profile = browser_profile or DEFAULT_BROWSER_PROFILE
|
|
358
|
+
|
|
359
|
+
self.browser_session = browser_session or BrowserSession(
|
|
360
|
+
browser_profile=browser_profile,
|
|
361
|
+
id=uuid7str()[:-4] + self.id[-4:], # re-use the same 4-char suffix so they show up together in logs
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
if self.sensitive_data:
|
|
365
|
+
# Check if sensitive_data has domain-specific credentials
|
|
366
|
+
has_domain_specific_credentials = any(isinstance(v, dict) for v in self.sensitive_data.values())
|
|
367
|
+
|
|
368
|
+
# If no allowed_domains are configured, show a security warning
|
|
369
|
+
if not self.browser_profile.allowed_domains:
|
|
370
|
+
self.logger.error(
|
|
371
|
+
'⚠️⚠️⚠️ Agent(sensitive_data=••••••••) was provided but BrowserSession(allowed_domains=[...]) is not locked down! ⚠️⚠️⚠️\n'
|
|
372
|
+
' ☠️ If the agent visits a malicious website and encounters a prompt-injection attack, your sensitive_data may be exposed!\n\n'
|
|
373
|
+
' https://docs.browser-use.com/customize/browser-settings#restrict-urls\n'
|
|
374
|
+
'Waiting 10 seconds before continuing... Press [Ctrl+C] to abort.'
|
|
375
|
+
)
|
|
376
|
+
if sys.stdin.isatty():
|
|
377
|
+
try:
|
|
378
|
+
time.sleep(10)
|
|
379
|
+
except KeyboardInterrupt:
|
|
380
|
+
print(
|
|
381
|
+
'\n\n 🛑 Exiting now... set BrowserSession(allowed_domains=["example.com", "example.org"]) to only domains you trust to see your sensitive_data.'
|
|
382
|
+
)
|
|
383
|
+
sys.exit(0)
|
|
384
|
+
else:
|
|
385
|
+
pass # no point waiting if we're not in an interactive shell
|
|
386
|
+
self.logger.warning(
|
|
387
|
+
'‼️ Continuing with insecure settings for now... but this will become a hard error in the future!'
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# If we're using domain-specific credentials, validate domain patterns
|
|
391
|
+
elif has_domain_specific_credentials:
|
|
392
|
+
# For domain-specific format, ensure all domain patterns are included in allowed_domains
|
|
393
|
+
domain_patterns = [k for k, v in self.sensitive_data.items() if isinstance(v, dict)]
|
|
394
|
+
|
|
395
|
+
# Validate each domain pattern against allowed_domains
|
|
396
|
+
for domain_pattern in domain_patterns:
|
|
397
|
+
is_allowed = False
|
|
398
|
+
for allowed_domain in self.browser_profile.allowed_domains:
|
|
399
|
+
# Special cases that don't require URL matching
|
|
400
|
+
if domain_pattern == allowed_domain or allowed_domain == '*':
|
|
401
|
+
is_allowed = True
|
|
402
|
+
break
|
|
403
|
+
|
|
404
|
+
# Need to create example URLs to compare the patterns
|
|
405
|
+
# Extract the domain parts, ignoring scheme
|
|
406
|
+
pattern_domain = domain_pattern.split('://')[-1] if '://' in domain_pattern else domain_pattern
|
|
407
|
+
allowed_domain_part = allowed_domain.split('://')[
|
|
408
|
+
-1] if '://' in allowed_domain else allowed_domain
|
|
409
|
+
|
|
410
|
+
# Check if pattern is covered by an allowed domain
|
|
411
|
+
# Example: "google.com" is covered by "*.google.com"
|
|
412
|
+
if pattern_domain == allowed_domain_part or (
|
|
413
|
+
allowed_domain_part.startswith('*.')
|
|
414
|
+
and (
|
|
415
|
+
pattern_domain == allowed_domain_part[2:]
|
|
416
|
+
or pattern_domain.endswith('.' + allowed_domain_part[2:])
|
|
417
|
+
)
|
|
418
|
+
):
|
|
419
|
+
is_allowed = True
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
if not is_allowed:
|
|
423
|
+
self.logger.warning(
|
|
424
|
+
f'⚠️ Domain pattern "{domain_pattern}" in sensitive_data is not covered by any pattern in allowed_domains={self.browser_profile.allowed_domains}\n'
|
|
425
|
+
f' This may be a security risk as credentials could be used on unintended domains.'
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# Callbacks
|
|
429
|
+
self.register_new_step_callback = register_new_step_callback
|
|
430
|
+
self.register_done_callback = register_done_callback
|
|
431
|
+
self.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback
|
|
432
|
+
|
|
433
|
+
# Context
|
|
434
|
+
self.context: Context | None = context
|
|
435
|
+
|
|
436
|
+
# Telemetry
|
|
437
|
+
self.telemetry = ProductTelemetry()
|
|
438
|
+
|
|
439
|
+
if self.settings.save_conversation_path:
|
|
440
|
+
self.settings.save_conversation_path = Path(self.settings.save_conversation_path).expanduser().resolve()
|
|
441
|
+
self.logger.info(f'💬 Saving conversation to {_log_pretty_path(self.settings.save_conversation_path)}')
|
|
442
|
+
|
|
443
|
+
# Initialize download tracking
|
|
444
|
+
assert self.browser_session is not None, 'BrowserSession is not set up'
|
|
445
|
+
self.has_downloads_path = self.browser_session.browser_profile.downloads_path is not None
|
|
446
|
+
if self.has_downloads_path:
|
|
447
|
+
self._last_known_downloads: list[str] = []
|
|
448
|
+
self.logger.debug('📁 Initialized download tracking for agent')
|
|
449
|
+
|
|
450
|
+
self._external_pause_event = asyncio.Event()
|
|
451
|
+
self._external_pause_event.set()
|
|
452
|
+
|
|
453
|
+
def _set_file_system(self, file_system_path: str | None = None) -> None:
|
|
454
|
+
# Check for conflicting parameters
|
|
455
|
+
if self.state.file_system_state and file_system_path:
|
|
456
|
+
raise ValueError(
|
|
457
|
+
'Cannot provide both file_system_state (from agent state) and file_system_path. '
|
|
458
|
+
'Either restore from existing state or create new file system at specified path, not both.'
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Check if we should restore from existing state first
|
|
462
|
+
if self.state.file_system_state:
|
|
463
|
+
try:
|
|
464
|
+
# Restore file system from state at the exact same location
|
|
465
|
+
self.file_system = CustomFileSystem.from_state(self.state.file_system_state)
|
|
466
|
+
# The parent directory of base_dir is the original file_system_path
|
|
467
|
+
self.file_system_path = str(self.file_system.base_dir)
|
|
468
|
+
self.logger.debug(f'💾 File system restored from state to: {self.file_system_path}')
|
|
469
|
+
return
|
|
470
|
+
except Exception as e:
|
|
471
|
+
self.logger.error(f'💾 Failed to restore file system from state: {e}')
|
|
472
|
+
raise e
|
|
473
|
+
|
|
474
|
+
# Initialize new file system
|
|
475
|
+
try:
|
|
476
|
+
if file_system_path:
|
|
477
|
+
self.file_system = CustomFileSystem(file_system_path)
|
|
478
|
+
self.file_system_path = file_system_path
|
|
479
|
+
else:
|
|
480
|
+
# Use the agent directory for file system
|
|
481
|
+
self.file_system = CustomFileSystem(self.agent_directory)
|
|
482
|
+
self.file_system_path = str(self.agent_directory)
|
|
483
|
+
except Exception as e:
|
|
484
|
+
self.logger.error(f'💾 Failed to initialize file system: {e}.')
|
|
485
|
+
raise e
|
|
486
|
+
|
|
487
|
+
# Save file system state to agent state
|
|
488
|
+
self.state.file_system_state = self.file_system.get_state()
|
|
489
|
+
|
|
490
|
+
self.logger.debug(f'💾 File system path: {self.file_system_path}')
|
|
491
|
+
|
|
492
|
+
@property
|
|
493
|
+
def logger(self) -> logging.Logger:
|
|
494
|
+
"""Get instance-specific logger with task ID and browser session info"""
|
|
495
|
+
# Update target ID dynamically if available
|
|
496
|
+
_browser_session_id = self.browser_session.id if self.browser_session else self.id
|
|
497
|
+
_current_target_id = (
|
|
498
|
+
self.browser_session.agent_focus.target_id[-4:]
|
|
499
|
+
if self.browser_session and hasattr(self.browser_session,
|
|
500
|
+
'agent_focus') and self.browser_session.agent_focus and hasattr(
|
|
501
|
+
self.browser_session.agent_focus, 'target_id')
|
|
502
|
+
else '--'
|
|
503
|
+
)
|
|
504
|
+
return logging.getLogger(
|
|
505
|
+
f'browser-use.Agent:{self.task_id[-4:]} on target:{_current_target_id} of browser:{_browser_session_id[-4:]}')
|
|
506
|
+
|
|
507
|
+
async def _finalize(self, browser_state_summary: BrowserStateSummary | None) -> None:
|
|
508
|
+
"""Finalize the step with history, logging, and events"""
|
|
509
|
+
step_end_time = time.time()
|
|
510
|
+
if not self.state.last_result:
|
|
511
|
+
return
|
|
512
|
+
|
|
513
|
+
if browser_state_summary:
|
|
514
|
+
metadata = StepMetadata(
|
|
515
|
+
step_number=self.state.n_steps,
|
|
516
|
+
step_start_time=self.step_start_time,
|
|
517
|
+
step_end_time=step_end_time,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
# Use _make_history_item like main branch
|
|
521
|
+
await self._make_history_item(self.state.last_model_output, browser_state_summary, self.state.last_result,
|
|
522
|
+
metadata)
|
|
523
|
+
|
|
524
|
+
# Log step completion summary
|
|
525
|
+
self._log_step_completion_summary(self.step_start_time, self.state.last_result)
|
|
526
|
+
|
|
527
|
+
# Save file system state after step completion
|
|
528
|
+
self.save_file_system_state()
|
|
529
|
+
|
|
530
|
+
# Emit both step created and executed events
|
|
531
|
+
if browser_state_summary and self.state.last_model_output:
|
|
532
|
+
# Extract key step data for the event
|
|
533
|
+
actions_data = []
|
|
534
|
+
if self.state.last_model_output.action:
|
|
535
|
+
for action in self.state.last_model_output.action:
|
|
536
|
+
action_dict = action.model_dump() if hasattr(action, 'model_dump') else {}
|
|
537
|
+
actions_data.append(action_dict)
|
|
538
|
+
|
|
539
|
+
# Increment step counter after step is fully completed
|
|
540
|
+
self.state.n_steps += 1
|
|
541
|
+
|
|
542
|
+
@observe(name='agent.run', metadata={'task': '{{task}}', 'debug': '{{debug}}'})
|
|
543
|
+
@time_execution_async('--run')
|
|
544
|
+
async def run(
|
|
545
|
+
self,
|
|
546
|
+
max_steps: int = 100,
|
|
547
|
+
on_step_start: AgentHookFunc | None = None,
|
|
548
|
+
on_step_end: AgentHookFunc | None = None,
|
|
549
|
+
) -> AgentHistoryList[AgentStructuredOutput]:
|
|
550
|
+
"""Execute the task with maximum number of steps"""
|
|
551
|
+
|
|
552
|
+
loop = asyncio.get_event_loop()
|
|
553
|
+
agent_run_error: str | None = None # Initialize error tracking variable
|
|
554
|
+
self._force_exit_telemetry_logged = False # ADDED: Flag for custom telemetry on force exit
|
|
555
|
+
|
|
556
|
+
# Set up the signal handler with callbacks specific to this agent
|
|
557
|
+
from browser_use.utils import SignalHandler
|
|
558
|
+
|
|
559
|
+
# Define the custom exit callback function for second CTRL+C
|
|
560
|
+
def on_force_exit_log_telemetry():
|
|
561
|
+
self._log_agent_event(max_steps=max_steps, agent_run_error='SIGINT: Cancelled by user')
|
|
562
|
+
# NEW: Call the flush method on the telemetry instance
|
|
563
|
+
if hasattr(self, 'telemetry') and self.telemetry:
|
|
564
|
+
self.telemetry.flush()
|
|
565
|
+
self._force_exit_telemetry_logged = True # Set the flag
|
|
566
|
+
|
|
567
|
+
signal_handler = SignalHandler(
|
|
568
|
+
loop=loop,
|
|
569
|
+
pause_callback=self.pause,
|
|
570
|
+
resume_callback=self.resume,
|
|
571
|
+
custom_exit_callback=on_force_exit_log_telemetry, # Pass the new telemetrycallback
|
|
572
|
+
exit_on_second_int=True,
|
|
573
|
+
)
|
|
574
|
+
signal_handler.register()
|
|
575
|
+
|
|
576
|
+
try:
|
|
577
|
+
self._log_agent_run()
|
|
578
|
+
|
|
579
|
+
self.logger.debug(
|
|
580
|
+
f'🔧 Agent setup: Task ID {self.task_id[-4:]}, Session ID {self.session_id[-4:]}, Browser Session ID {self.browser_session.id[-4:] if self.browser_session else "None"}'
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
# Initialize timing for session and task
|
|
584
|
+
self._session_start_time = time.time()
|
|
585
|
+
self._task_start_time = self._session_start_time # Initialize task start time
|
|
586
|
+
|
|
587
|
+
self.logger.debug('🔧 Browser session started with watchdogs attached')
|
|
588
|
+
|
|
589
|
+
# Check if task contains a URL and add it as an initial action (only if preload is enabled)
|
|
590
|
+
if self.preload:
|
|
591
|
+
initial_url = self._extract_url_from_task(self.task)
|
|
592
|
+
if initial_url:
|
|
593
|
+
self.logger.info(f'🔗 Found URL in task: {initial_url}, adding as initial action...')
|
|
594
|
+
|
|
595
|
+
# Create a go_to_url action for the initial URL
|
|
596
|
+
go_to_url_action = {
|
|
597
|
+
'go_to_url': {
|
|
598
|
+
'url': initial_url,
|
|
599
|
+
'new_tab': False, # Navigate in current tab
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
# Add to initial_actions or create new list if none exist
|
|
604
|
+
if self.initial_actions:
|
|
605
|
+
# Convert back to dict format, prepend URL navigation, then convert back
|
|
606
|
+
initial_actions_dicts = []
|
|
607
|
+
for action in self.initial_actions:
|
|
608
|
+
action_data = action.model_dump(exclude_unset=True)
|
|
609
|
+
initial_actions_dicts.append(action_data)
|
|
610
|
+
|
|
611
|
+
# Prepend the go_to_url action
|
|
612
|
+
initial_actions_dicts = [go_to_url_action] + initial_actions_dicts
|
|
613
|
+
|
|
614
|
+
# Convert back to ActionModel instances
|
|
615
|
+
self.initial_actions = self._convert_initial_actions(initial_actions_dicts)
|
|
616
|
+
else:
|
|
617
|
+
# Create new initial_actions with just the go_to_url
|
|
618
|
+
self.initial_actions = self._convert_initial_actions([go_to_url_action])
|
|
619
|
+
|
|
620
|
+
self.logger.debug(f'✅ Added navigation to {initial_url} as initial action')
|
|
621
|
+
|
|
622
|
+
# Execute initial actions if provided
|
|
623
|
+
if self.initial_actions:
|
|
624
|
+
self.logger.debug(f'⚡ Executing {len(self.initial_actions)} initial actions...')
|
|
625
|
+
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
|
|
626
|
+
self.state.last_result = result
|
|
627
|
+
self.logger.debug('✅ Initial actions completed')
|
|
628
|
+
|
|
629
|
+
self.logger.debug(f'🔄 Starting main execution loop with max {max_steps} steps...')
|
|
630
|
+
for step in range(max_steps):
|
|
631
|
+
# Replace the polling with clean pause-wait
|
|
632
|
+
if self.state.paused:
|
|
633
|
+
self.logger.debug(f'⏸️ Step {step}: Agent paused, waiting to resume...')
|
|
634
|
+
await self.wait_until_resumed()
|
|
635
|
+
signal_handler.reset()
|
|
636
|
+
|
|
637
|
+
# Check if we should stop due to too many failures
|
|
638
|
+
if self.state.consecutive_failures >= self.settings.max_failures:
|
|
639
|
+
self.logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
|
|
640
|
+
agent_run_error = f'Stopped due to {self.settings.max_failures} consecutive failures'
|
|
641
|
+
break
|
|
642
|
+
|
|
643
|
+
# Check control flags before each step
|
|
644
|
+
if self.state.stopped:
|
|
645
|
+
self.logger.info('🛑 Agent stopped')
|
|
646
|
+
agent_run_error = 'Agent stopped programmatically'
|
|
647
|
+
break
|
|
648
|
+
|
|
649
|
+
while self.state.paused:
|
|
650
|
+
await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
|
|
651
|
+
if self.state.stopped: # Allow stopping while paused
|
|
652
|
+
agent_run_error = 'Agent stopped programmatically while paused'
|
|
653
|
+
break
|
|
654
|
+
|
|
655
|
+
if on_step_start is not None:
|
|
656
|
+
await on_step_start(self)
|
|
657
|
+
|
|
658
|
+
self.logger.debug(f'🚶 Starting step {step + 1}/{max_steps}...')
|
|
659
|
+
step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
|
|
660
|
+
|
|
661
|
+
try:
|
|
662
|
+
await asyncio.wait_for(
|
|
663
|
+
self.step(step_info),
|
|
664
|
+
timeout=self.settings.step_timeout,
|
|
665
|
+
)
|
|
666
|
+
self.logger.debug(f'✅ Completed step {step + 1}/{max_steps}')
|
|
667
|
+
except TimeoutError:
|
|
668
|
+
# Handle step timeout gracefully
|
|
669
|
+
error_msg = f'Step {step + 1} timed out after {self.settings.step_timeout} seconds'
|
|
670
|
+
self.logger.error(f'⏰ {error_msg}')
|
|
671
|
+
self.state.consecutive_failures += 1
|
|
672
|
+
self.state.last_result = [ActionResult(error=error_msg)]
|
|
673
|
+
|
|
674
|
+
if on_step_end is not None:
|
|
675
|
+
await on_step_end(self)
|
|
676
|
+
|
|
677
|
+
if self.history.is_done():
|
|
678
|
+
self.logger.debug(f'🎯 Task completed after {step + 1} steps!')
|
|
679
|
+
await self.log_completion()
|
|
680
|
+
|
|
681
|
+
if self.register_done_callback:
|
|
682
|
+
if inspect.iscoroutinefunction(self.register_done_callback):
|
|
683
|
+
await self.register_done_callback(self.history)
|
|
684
|
+
else:
|
|
685
|
+
self.register_done_callback(self.history)
|
|
686
|
+
|
|
687
|
+
# Task completed
|
|
688
|
+
break
|
|
689
|
+
else:
|
|
690
|
+
agent_run_error = 'Failed to complete task in maximum steps'
|
|
691
|
+
|
|
692
|
+
self.history.add_item(
|
|
693
|
+
AgentHistory(
|
|
694
|
+
model_output=None,
|
|
695
|
+
result=[ActionResult(error=agent_run_error, include_in_memory=True)],
|
|
696
|
+
state=BrowserStateHistory(
|
|
697
|
+
url='',
|
|
698
|
+
title='',
|
|
699
|
+
tabs=[],
|
|
700
|
+
interacted_element=[],
|
|
701
|
+
screenshot_path=None,
|
|
702
|
+
),
|
|
703
|
+
metadata=None,
|
|
704
|
+
)
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
self.logger.info(f'❌ {agent_run_error}')
|
|
708
|
+
|
|
709
|
+
self.logger.debug('📊 Collecting usage summary...')
|
|
710
|
+
self.history.usage = await self.token_cost_service.get_usage_summary()
|
|
711
|
+
|
|
712
|
+
# set the model output schema and call it on the fly
|
|
713
|
+
if self.history._output_model_schema is None and self.output_model_schema is not None:
|
|
714
|
+
self.history._output_model_schema = self.output_model_schema
|
|
715
|
+
|
|
716
|
+
self.logger.debug('🏁 Agent.run() completed successfully')
|
|
717
|
+
return self.history
|
|
718
|
+
|
|
719
|
+
except KeyboardInterrupt:
|
|
720
|
+
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
|
|
721
|
+
self.logger.debug('Got KeyboardInterrupt during execution, returning current history')
|
|
722
|
+
agent_run_error = 'KeyboardInterrupt'
|
|
723
|
+
|
|
724
|
+
self.history.usage = await self.token_cost_service.get_usage_summary()
|
|
725
|
+
|
|
726
|
+
return self.history
|
|
727
|
+
|
|
728
|
+
except Exception as e:
|
|
729
|
+
self.logger.error(f'Agent run failed with exception: {e}', exc_info=True)
|
|
730
|
+
agent_run_error = str(e)
|
|
731
|
+
raise e
|
|
732
|
+
|
|
733
|
+
finally:
|
|
734
|
+
# Log token usage summary
|
|
735
|
+
await self.token_cost_service.log_usage_summary()
|
|
736
|
+
|
|
737
|
+
# Unregister signal handlers before cleanup
|
|
738
|
+
signal_handler.unregister()
|
|
739
|
+
|
|
740
|
+
if not self._force_exit_telemetry_logged: # MODIFIED: Check the flag
|
|
741
|
+
try:
|
|
742
|
+
self._log_agent_event(max_steps=max_steps, agent_run_error=agent_run_error)
|
|
743
|
+
except Exception as log_e: # Catch potential errors during logging itself
|
|
744
|
+
self.logger.error(f'Failed to log telemetry event: {log_e}', exc_info=True)
|
|
745
|
+
else:
|
|
746
|
+
# ADDED: Info message when custom telemetry for SIGINT was already logged
|
|
747
|
+
self.logger.debug('Telemetry for force exit (SIGINT) was logged by custom exit callback.')
|
|
748
|
+
|
|
749
|
+
# Generate GIF if needed before stopping event bus
|
|
750
|
+
if self.settings.generate_gif:
|
|
751
|
+
output_path: str = 'agent_history.gif'
|
|
752
|
+
if isinstance(self.settings.generate_gif, str):
|
|
753
|
+
output_path = self.settings.generate_gif
|
|
754
|
+
|
|
755
|
+
# Lazy import gif module to avoid heavy startup cost
|
|
756
|
+
from browser_use.agent.gif import create_history_gif
|
|
757
|
+
|
|
758
|
+
create_history_gif(task=self.task, history=self.history, output_path=output_path)
|
|
759
|
+
|
|
760
|
+
await self.close()
|
|
761
|
+
|
|
762
|
+
def _matches_action_type(self, action_type: str, allowed_pattern: str) -> bool:
|
|
763
|
+
"""
|
|
764
|
+
Check if an action type matches an allowed pattern, supporting wildcards.
|
|
765
|
+
|
|
766
|
+
Args:
|
|
767
|
+
action_type: The actual action type (e.g., "mcp.filesystem.read_file")
|
|
768
|
+
allowed_pattern: The pattern to match (e.g., "mcp.filesystem*")
|
|
769
|
+
|
|
770
|
+
Returns:
|
|
771
|
+
True if the action type matches the pattern
|
|
772
|
+
"""
|
|
773
|
+
if allowed_pattern.endswith('*'):
|
|
774
|
+
# Wildcard matching
|
|
775
|
+
prefix = allowed_pattern[:-1]
|
|
776
|
+
return action_type.startswith(prefix)
|
|
777
|
+
else:
|
|
778
|
+
# Exact matching
|
|
779
|
+
return action_type == allowed_pattern
|
|
780
|
+
|
|
781
|
+
def _is_action_parallel_allowed(self, action: ActionModel) -> bool:
|
|
782
|
+
"""
|
|
783
|
+
Check if an action is allowed to be executed in parallel.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
action: The action to check
|
|
787
|
+
|
|
788
|
+
Returns:
|
|
789
|
+
True if the action can be executed in parallel
|
|
790
|
+
"""
|
|
791
|
+
action_data = action.model_dump(exclude_unset=True)
|
|
792
|
+
action_type = next(iter(action_data.keys())) if action_data else None
|
|
793
|
+
|
|
794
|
+
if not action_type:
|
|
795
|
+
return False
|
|
796
|
+
|
|
797
|
+
for allowed_pattern in self.allow_parallel_action_types:
|
|
798
|
+
if self._matches_action_type(action_type, allowed_pattern):
|
|
799
|
+
return True
|
|
800
|
+
|
|
801
|
+
return False
|
|
802
|
+
|
|
803
|
+
def _group_actions_for_parallel_execution(self, actions: list[ActionModel]) -> list[list[ActionModel]]:
|
|
804
|
+
"""
|
|
805
|
+
Group consecutive actions that can be executed in parallel.
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
actions: List of actions to group
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
List of action groups, where each group can be executed in parallel
|
|
812
|
+
"""
|
|
813
|
+
if not actions:
|
|
814
|
+
return []
|
|
815
|
+
|
|
816
|
+
groups = []
|
|
817
|
+
current_group = [actions[0]]
|
|
818
|
+
|
|
819
|
+
for i in range(1, len(actions)):
|
|
820
|
+
current_action = actions[i]
|
|
821
|
+
previous_action = actions[i-1]
|
|
822
|
+
|
|
823
|
+
# Check if both current and previous actions can be executed in parallel
|
|
824
|
+
if (self._is_action_parallel_allowed(current_action) and
|
|
825
|
+
self._is_action_parallel_allowed(previous_action)):
|
|
826
|
+
# Add to current group
|
|
827
|
+
current_group.append(current_action)
|
|
828
|
+
else:
|
|
829
|
+
# Start a new group
|
|
830
|
+
groups.append(current_group)
|
|
831
|
+
current_group = [current_action]
|
|
832
|
+
|
|
833
|
+
# Add the last group
|
|
834
|
+
groups.append(current_group)
|
|
835
|
+
|
|
836
|
+
return groups
|
|
837
|
+
|
|
838
|
+
@observe_debug(ignore_input=True, ignore_output=True)
|
|
839
|
+
@time_execution_async('--multi_act')
|
|
840
|
+
async def multi_act(
|
|
841
|
+
self,
|
|
842
|
+
actions: list[ActionModel],
|
|
843
|
+
check_for_new_elements: bool = True,
|
|
844
|
+
) -> list[ActionResult]:
|
|
845
|
+
"""Execute multiple actions, with parallel execution for allowed action types"""
|
|
846
|
+
results: list[ActionResult] = []
|
|
847
|
+
time_elapsed = 0
|
|
848
|
+
total_actions = len(actions)
|
|
849
|
+
|
|
850
|
+
assert self.browser_session is not None, 'BrowserSession is not set up'
|
|
851
|
+
try:
|
|
852
|
+
if (
|
|
853
|
+
self.browser_session._cached_browser_state_summary is not None
|
|
854
|
+
and self.browser_session._cached_browser_state_summary.dom_state is not None
|
|
855
|
+
):
|
|
856
|
+
cached_selector_map = dict(self.browser_session._cached_browser_state_summary.dom_state.selector_map)
|
|
857
|
+
cached_element_hashes = {e.parent_branch_hash() for e in cached_selector_map.values()}
|
|
858
|
+
else:
|
|
859
|
+
cached_selector_map = {}
|
|
860
|
+
cached_element_hashes = set()
|
|
861
|
+
except Exception as e:
|
|
862
|
+
self.logger.error(f'Error getting cached selector map: {e}')
|
|
863
|
+
cached_selector_map = {}
|
|
864
|
+
cached_element_hashes = set()
|
|
865
|
+
|
|
866
|
+
# Group actions for potential parallel execution
|
|
867
|
+
action_groups = self._group_actions_for_parallel_execution(actions)
|
|
868
|
+
|
|
869
|
+
# Track global action index for logging and DOM checks
|
|
870
|
+
global_action_index = 0
|
|
871
|
+
|
|
872
|
+
for group_index, action_group in enumerate(action_groups):
|
|
873
|
+
group_size = len(action_group)
|
|
874
|
+
|
|
875
|
+
# Check if this group can be executed in parallel
|
|
876
|
+
can_execute_in_parallel = (
|
|
877
|
+
group_size > 1 and
|
|
878
|
+
all(self._is_action_parallel_allowed(action) for action in action_group)
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
if can_execute_in_parallel:
|
|
882
|
+
self.logger.info(f'🚀 Executing {group_size} actions in parallel: group {group_index + 1}/{len(action_groups)}')
|
|
883
|
+
# Execute actions in parallel using asyncio.gather
|
|
884
|
+
parallel_results = await self._execute_actions_in_parallel(
|
|
885
|
+
action_group, global_action_index, total_actions,
|
|
886
|
+
cached_selector_map, cached_element_hashes, check_for_new_elements
|
|
887
|
+
)
|
|
888
|
+
results.extend(parallel_results)
|
|
889
|
+
global_action_index += group_size
|
|
890
|
+
|
|
891
|
+
# Check if any result indicates completion or error
|
|
892
|
+
if any(result.is_done or result.error for result in parallel_results):
|
|
893
|
+
break
|
|
894
|
+
else:
|
|
895
|
+
# Execute actions sequentially
|
|
896
|
+
for local_index, action in enumerate(action_group):
|
|
897
|
+
i = global_action_index + local_index
|
|
898
|
+
|
|
899
|
+
# Original sequential execution logic continues here...
|
|
900
|
+
if i > 0:
|
|
901
|
+
# ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION
|
|
902
|
+
if action.model_dump(exclude_unset=True).get('done') is not None:
|
|
903
|
+
msg = f'Done action is allowed only as a single action - stopped after action {i} / {total_actions}.'
|
|
904
|
+
self.logger.debug(msg)
|
|
905
|
+
break
|
|
906
|
+
|
|
907
|
+
# DOM synchronization check - verify element indexes are still valid AFTER first action
|
|
908
|
+
if action.get_index() is not None and i != 0:
|
|
909
|
+
result = await self._check_dom_synchronization(
|
|
910
|
+
action, i, total_actions, cached_selector_map, cached_element_hashes,
|
|
911
|
+
check_for_new_elements, actions
|
|
912
|
+
)
|
|
913
|
+
if result:
|
|
914
|
+
results.append(result)
|
|
915
|
+
break
|
|
916
|
+
|
|
917
|
+
# wait between actions (only after first action)
|
|
918
|
+
if i > 0:
|
|
919
|
+
await asyncio.sleep(self.browser_profile.wait_between_actions)
|
|
920
|
+
|
|
921
|
+
# Execute single action
|
|
922
|
+
try:
|
|
923
|
+
action_result = await self._execute_single_action(action, i, total_actions)
|
|
924
|
+
results.append(action_result)
|
|
925
|
+
|
|
926
|
+
if action_result.is_done or action_result.error or i == total_actions - 1:
|
|
927
|
+
break
|
|
928
|
+
|
|
929
|
+
except Exception as e:
|
|
930
|
+
self.logger.error(f'❌ Executing action {i + 1} failed: {type(e).__name__}: {e}')
|
|
931
|
+
raise e
|
|
932
|
+
|
|
933
|
+
global_action_index += len(action_group)
|
|
934
|
+
|
|
935
|
+
return results
|
|
936
|
+
|
|
937
|
+
async def _execute_actions_in_parallel(
|
|
938
|
+
self,
|
|
939
|
+
actions: list[ActionModel],
|
|
940
|
+
start_index: int,
|
|
941
|
+
total_actions: int,
|
|
942
|
+
cached_selector_map: dict,
|
|
943
|
+
cached_element_hashes: set,
|
|
944
|
+
check_for_new_elements: bool
|
|
945
|
+
) -> list[ActionResult]:
|
|
946
|
+
"""Execute a group of actions in parallel using asyncio.gather"""
|
|
947
|
+
|
|
948
|
+
async def execute_single_parallel_action(action: ActionModel, action_index: int) -> ActionResult:
|
|
949
|
+
"""Execute a single action for parallel execution"""
|
|
950
|
+
await self._raise_if_stopped_or_paused()
|
|
951
|
+
|
|
952
|
+
# Get action info for logging
|
|
953
|
+
action_data = action.model_dump(exclude_unset=True)
|
|
954
|
+
action_name = next(iter(action_data.keys())) if action_data else 'unknown'
|
|
955
|
+
action_params = getattr(action, action_name, '') or str(action.model_dump(mode='json'))[:140].replace(
|
|
956
|
+
'"', ''
|
|
957
|
+
).replace('{', '').replace('}', '').replace("'", '').strip().strip(',')
|
|
958
|
+
action_params = str(action_params)
|
|
959
|
+
action_params = f'{action_params[:122]}...' if len(action_params) > 128 else action_params
|
|
960
|
+
|
|
961
|
+
time_start = time.time()
|
|
962
|
+
blue = '\033[34m'
|
|
963
|
+
reset = '\033[0m'
|
|
964
|
+
self.logger.info(f' 🦾 {blue}[PARALLEL ACTION {action_index + 1}/{total_actions}]{reset} {action_params}')
|
|
965
|
+
|
|
966
|
+
# Execute the action
|
|
967
|
+
result = await self.controller.act(
|
|
968
|
+
action=action,
|
|
969
|
+
browser_session=self.browser_session,
|
|
970
|
+
file_system=self.file_system,
|
|
971
|
+
page_extraction_llm=self.settings.page_extraction_llm,
|
|
972
|
+
sensitive_data=self.sensitive_data,
|
|
973
|
+
available_file_paths=self.available_file_paths,
|
|
974
|
+
context=self.context,
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
time_end = time.time()
|
|
978
|
+
time_elapsed = time_end - time_start
|
|
979
|
+
|
|
980
|
+
green = '\033[92m'
|
|
981
|
+
self.logger.debug(
|
|
982
|
+
f'☑️ Parallel action {action_index + 1}/{total_actions}: {green}{action_params}{reset} in {time_elapsed:.2f}s'
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
return result
|
|
986
|
+
|
|
987
|
+
# Create tasks for parallel execution
|
|
988
|
+
tasks = [
|
|
989
|
+
execute_single_parallel_action(action, start_index + i)
|
|
990
|
+
for i, action in enumerate(actions)
|
|
991
|
+
]
|
|
992
|
+
|
|
993
|
+
# Execute all tasks in parallel
|
|
994
|
+
parallel_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
995
|
+
|
|
996
|
+
# Process results and handle any exceptions
|
|
997
|
+
processed_results = []
|
|
998
|
+
for i, result in enumerate(parallel_results):
|
|
999
|
+
if isinstance(result, Exception):
|
|
1000
|
+
action_index = start_index + i
|
|
1001
|
+
self.logger.error(f'❌ Parallel action {action_index + 1} failed: {type(result).__name__}: {result}')
|
|
1002
|
+
raise result
|
|
1003
|
+
else:
|
|
1004
|
+
processed_results.append(result)
|
|
1005
|
+
|
|
1006
|
+
return processed_results
|
|
1007
|
+
|
|
1008
|
+
async def _check_dom_synchronization(
|
|
1009
|
+
self,
|
|
1010
|
+
action: ActionModel,
|
|
1011
|
+
action_index: int,
|
|
1012
|
+
total_actions: int,
|
|
1013
|
+
cached_selector_map: dict,
|
|
1014
|
+
cached_element_hashes: set,
|
|
1015
|
+
check_for_new_elements: bool,
|
|
1016
|
+
all_actions: list[ActionModel]
|
|
1017
|
+
) -> ActionResult | None:
|
|
1018
|
+
"""Check DOM synchronization and return result if page changed"""
|
|
1019
|
+
new_browser_state_summary = await self.browser_session.get_browser_state_summary(
|
|
1020
|
+
cache_clickable_elements_hashes=False,
|
|
1021
|
+
include_screenshot=False,
|
|
1022
|
+
)
|
|
1023
|
+
new_selector_map = new_browser_state_summary.dom_state.selector_map
|
|
1024
|
+
|
|
1025
|
+
# Detect index change after previous action
|
|
1026
|
+
orig_target = cached_selector_map.get(action.get_index())
|
|
1027
|
+
orig_target_hash = orig_target.parent_branch_hash() if orig_target else None
|
|
1028
|
+
|
|
1029
|
+
new_target = new_selector_map.get(action.get_index()) # type: ignore
|
|
1030
|
+
new_target_hash = new_target.parent_branch_hash() if new_target else None
|
|
1031
|
+
|
|
1032
|
+
def get_remaining_actions_str(actions: list[ActionModel], index: int) -> str:
|
|
1033
|
+
remaining_actions = []
|
|
1034
|
+
for remaining_action in actions[index:]:
|
|
1035
|
+
action_data = remaining_action.model_dump(exclude_unset=True)
|
|
1036
|
+
action_name = next(iter(action_data.keys())) if action_data else 'unknown'
|
|
1037
|
+
remaining_actions.append(action_name)
|
|
1038
|
+
return ', '.join(remaining_actions)
|
|
1039
|
+
|
|
1040
|
+
if orig_target_hash != new_target_hash:
|
|
1041
|
+
# Get names of remaining actions that won't be executed
|
|
1042
|
+
remaining_actions_str = get_remaining_actions_str(all_actions, action_index)
|
|
1043
|
+
msg = f'Page changed after action {action_index} / {total_actions}: actions {remaining_actions_str} were not executed'
|
|
1044
|
+
self.logger.info(msg)
|
|
1045
|
+
return ActionResult(
|
|
1046
|
+
extracted_content=msg,
|
|
1047
|
+
include_in_memory=True,
|
|
1048
|
+
long_term_memory=msg,
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
# Check for new elements that appeared
|
|
1052
|
+
new_element_hashes = {e.parent_branch_hash() for e in new_selector_map.values()}
|
|
1053
|
+
if check_for_new_elements and not new_element_hashes.issubset(cached_element_hashes):
|
|
1054
|
+
# next action requires index but there are new elements on the page
|
|
1055
|
+
remaining_actions_str = get_remaining_actions_str(all_actions, action_index)
|
|
1056
|
+
msg = f'Something new appeared after action {action_index} / {total_actions}: actions {remaining_actions_str} were not executed'
|
|
1057
|
+
self.logger.info(msg)
|
|
1058
|
+
return ActionResult(
|
|
1059
|
+
extracted_content=msg,
|
|
1060
|
+
include_in_memory=True,
|
|
1061
|
+
long_term_memory=msg,
|
|
1062
|
+
)
|
|
1063
|
+
|
|
1064
|
+
return None
|
|
1065
|
+
|
|
1066
|
+
async def _execute_single_action(self, action: ActionModel, action_index: int, total_actions: int) -> ActionResult:
|
|
1067
|
+
"""Execute a single action in sequential mode"""
|
|
1068
|
+
await self._raise_if_stopped_or_paused()
|
|
1069
|
+
|
|
1070
|
+
# Get action name from the action model
|
|
1071
|
+
action_data = action.model_dump(exclude_unset=True)
|
|
1072
|
+
action_name = next(iter(action_data.keys())) if action_data else 'unknown'
|
|
1073
|
+
action_params = getattr(action, action_name, '') or str(action.model_dump(mode='json'))[:140].replace(
|
|
1074
|
+
'"', ''
|
|
1075
|
+
).replace('{', '').replace('}', '').replace("'", '').strip().strip(',')
|
|
1076
|
+
# Ensure action_params is always a string before checking length
|
|
1077
|
+
action_params = str(action_params)
|
|
1078
|
+
action_params = f'{action_params[:122]}...' if len(action_params) > 128 else action_params
|
|
1079
|
+
|
|
1080
|
+
time_start = time.time()
|
|
1081
|
+
|
|
1082
|
+
red = '\033[91m'
|
|
1083
|
+
green = '\033[92m'
|
|
1084
|
+
blue = '\033[34m'
|
|
1085
|
+
reset = '\033[0m'
|
|
1086
|
+
|
|
1087
|
+
self.logger.info(f' 🦾 {blue}[ACTION {action_index + 1}/{total_actions}]{reset} {action_params}')
|
|
1088
|
+
|
|
1089
|
+
result = await self.controller.act(
|
|
1090
|
+
action=action,
|
|
1091
|
+
browser_session=self.browser_session,
|
|
1092
|
+
file_system=self.file_system,
|
|
1093
|
+
page_extraction_llm=self.settings.page_extraction_llm,
|
|
1094
|
+
sensitive_data=self.sensitive_data,
|
|
1095
|
+
available_file_paths=self.available_file_paths,
|
|
1096
|
+
context=self.context,
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
time_end = time.time()
|
|
1100
|
+
time_elapsed = time_end - time_start
|
|
1101
|
+
|
|
1102
|
+
self.logger.debug(
|
|
1103
|
+
f'☑️ Executed action {action_index + 1}/{total_actions}: {green}{action_params}{reset} in {time_elapsed:.2f}s'
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
return result
|