vibesurf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (70) hide show
  1. vibe_surf/__init__.py +12 -0
  2. vibe_surf/_version.py +34 -0
  3. vibe_surf/agents/__init__.py +0 -0
  4. vibe_surf/agents/browser_use_agent.py +1106 -0
  5. vibe_surf/agents/prompts/__init__.py +1 -0
  6. vibe_surf/agents/prompts/vibe_surf_prompt.py +176 -0
  7. vibe_surf/agents/report_writer_agent.py +360 -0
  8. vibe_surf/agents/vibe_surf_agent.py +1632 -0
  9. vibe_surf/backend/__init__.py +0 -0
  10. vibe_surf/backend/api/__init__.py +3 -0
  11. vibe_surf/backend/api/activity.py +243 -0
  12. vibe_surf/backend/api/config.py +740 -0
  13. vibe_surf/backend/api/files.py +322 -0
  14. vibe_surf/backend/api/models.py +257 -0
  15. vibe_surf/backend/api/task.py +300 -0
  16. vibe_surf/backend/database/__init__.py +13 -0
  17. vibe_surf/backend/database/manager.py +129 -0
  18. vibe_surf/backend/database/models.py +164 -0
  19. vibe_surf/backend/database/queries.py +922 -0
  20. vibe_surf/backend/database/schemas.py +100 -0
  21. vibe_surf/backend/llm_config.py +182 -0
  22. vibe_surf/backend/main.py +137 -0
  23. vibe_surf/backend/migrations/__init__.py +16 -0
  24. vibe_surf/backend/migrations/init_db.py +303 -0
  25. vibe_surf/backend/migrations/seed_data.py +236 -0
  26. vibe_surf/backend/shared_state.py +601 -0
  27. vibe_surf/backend/utils/__init__.py +7 -0
  28. vibe_surf/backend/utils/encryption.py +164 -0
  29. vibe_surf/backend/utils/llm_factory.py +225 -0
  30. vibe_surf/browser/__init__.py +8 -0
  31. vibe_surf/browser/agen_browser_profile.py +130 -0
  32. vibe_surf/browser/agent_browser_session.py +416 -0
  33. vibe_surf/browser/browser_manager.py +296 -0
  34. vibe_surf/browser/utils.py +790 -0
  35. vibe_surf/browser/watchdogs/__init__.py +0 -0
  36. vibe_surf/browser/watchdogs/action_watchdog.py +291 -0
  37. vibe_surf/browser/watchdogs/dom_watchdog.py +954 -0
  38. vibe_surf/chrome_extension/background.js +558 -0
  39. vibe_surf/chrome_extension/config.js +48 -0
  40. vibe_surf/chrome_extension/content.js +284 -0
  41. vibe_surf/chrome_extension/dev-reload.js +47 -0
  42. vibe_surf/chrome_extension/icons/convert-svg.js +33 -0
  43. vibe_surf/chrome_extension/icons/logo-preview.html +187 -0
  44. vibe_surf/chrome_extension/icons/logo.png +0 -0
  45. vibe_surf/chrome_extension/manifest.json +53 -0
  46. vibe_surf/chrome_extension/popup.html +134 -0
  47. vibe_surf/chrome_extension/scripts/api-client.js +473 -0
  48. vibe_surf/chrome_extension/scripts/main.js +491 -0
  49. vibe_surf/chrome_extension/scripts/markdown-it.min.js +3 -0
  50. vibe_surf/chrome_extension/scripts/session-manager.js +599 -0
  51. vibe_surf/chrome_extension/scripts/ui-manager.js +3687 -0
  52. vibe_surf/chrome_extension/sidepanel.html +347 -0
  53. vibe_surf/chrome_extension/styles/animations.css +471 -0
  54. vibe_surf/chrome_extension/styles/components.css +670 -0
  55. vibe_surf/chrome_extension/styles/main.css +2307 -0
  56. vibe_surf/chrome_extension/styles/settings.css +1100 -0
  57. vibe_surf/cli.py +357 -0
  58. vibe_surf/controller/__init__.py +0 -0
  59. vibe_surf/controller/file_system.py +53 -0
  60. vibe_surf/controller/mcp_client.py +68 -0
  61. vibe_surf/controller/vibesurf_controller.py +616 -0
  62. vibe_surf/controller/views.py +37 -0
  63. vibe_surf/llm/__init__.py +21 -0
  64. vibe_surf/llm/openai_compatible.py +237 -0
  65. vibesurf-0.1.0.dist-info/METADATA +97 -0
  66. vibesurf-0.1.0.dist-info/RECORD +70 -0
  67. vibesurf-0.1.0.dist-info/WHEEL +5 -0
  68. vibesurf-0.1.0.dist-info/entry_points.txt +2 -0
  69. vibesurf-0.1.0.dist-info/licenses/LICENSE +201 -0
  70. vibesurf-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1106 @@
1
+ import asyncio
2
+ import gc
3
+ import inspect
4
+ import json
5
+ import logging
6
+ import pdb
7
+ import re
8
+ import sys
9
+ import tempfile
10
+ import time
11
+ from collections.abc import Awaitable, Callable
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Any, Generic, Literal, TypeVar
15
+ from urllib.parse import urlparse
16
+
17
+ from dotenv import load_dotenv
18
+
19
+ from browser_use.agent.cloud_events import (
20
+ CreateAgentOutputFileEvent,
21
+ CreateAgentSessionEvent,
22
+ CreateAgentStepEvent,
23
+ CreateAgentTaskEvent,
24
+ UpdateAgentTaskEvent,
25
+ )
26
+ from browser_use.agent.message_manager.utils import save_conversation
27
+ from browser_use.llm.base import BaseChatModel
28
+ from browser_use.llm.messages import BaseMessage, UserMessage
29
+ from browser_use.llm.openai.chat import ChatOpenAI
30
+ from browser_use.tokens.service import TokenCost
31
+
32
+ from bubus import EventBus
33
+ from pydantic import ValidationError
34
+ from uuid_extensions import uuid7str
35
+
36
+ # Lazy import for gif to avoid heavy agent.views import at startup
37
+ # from browser_use.agent.gif import create_history_gif
38
+ from browser_use.agent.message_manager.service import (
39
+ MessageManager,
40
+ )
41
+ from browser_use.agent.prompts import SystemPrompt
42
+ from browser_use.agent.views import (
43
+ ActionResult,
44
+ AgentError,
45
+ AgentHistory,
46
+ AgentHistoryList,
47
+ AgentOutput,
48
+ AgentSettings,
49
+ AgentState,
50
+ AgentStepInfo,
51
+ AgentStructuredOutput,
52
+ BrowserStateHistory,
53
+ StepMetadata,
54
+ )
55
+ from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model, model_validator
56
+ from browser_use.browser import BrowserProfile, BrowserSession
57
+ from browser_use.browser.session import DEFAULT_BROWSER_PROFILE
58
+ from browser_use.browser.views import BrowserStateSummary
59
+ from browser_use.config import CONFIG
60
+ from browser_use.controller.registry.views import ActionModel
61
+ from browser_use.controller.service import Controller
62
+ from browser_use.dom.views import DOMInteractedElement
63
+ from browser_use.filesystem.file_system import FileSystem
64
+ from browser_use.observability import observe, observe_debug
65
+ from browser_use.sync import CloudSync
66
+ from browser_use.telemetry.service import ProductTelemetry
67
+ from browser_use.telemetry.views import AgentTelemetryEvent
68
+ from browser_use.utils import (
69
+ _log_pretty_path,
70
+ get_browser_use_version,
71
+ get_git_info,
72
+ time_execution_async,
73
+ time_execution_sync,
74
+ )
75
+
76
+ from browser_use.agent.service import Agent, AgentHookFunc
77
+ from vibe_surf.controller.file_system import CustomFileSystem
78
+
79
+ Context = TypeVar('Context')
80
+
81
+
82
+ def log_response(response: AgentOutput, registry=None, logger=None) -> None:
83
+ """Utility function to log the model's response."""
84
+
85
+ # Use module logger if no logger provided
86
+ if logger is None:
87
+ logger = logging.getLogger(__name__)
88
+
89
+ # Only log thinking if it's present
90
+ if response.current_state.thinking:
91
+ logger.info(f'💡 Thinking:\n{response.current_state.thinking}')
92
+
93
+ # Only log evaluation if it's not empty
94
+ eval_goal = response.current_state.evaluation_previous_goal
95
+ if eval_goal:
96
+ if 'success' in eval_goal.lower():
97
+ emoji = '👍'
98
+ # Green color for success
99
+ logger.info(f' \033[32m{emoji} Eval: {eval_goal}\033[0m')
100
+ elif 'failure' in eval_goal.lower():
101
+ emoji = '⚠️'
102
+ # Red color for failure
103
+ logger.info(f' \033[31m{emoji} Eval: {eval_goal}\033[0m')
104
+ else:
105
+ emoji = '❔'
106
+ # No color for unknown/neutral
107
+ logger.info(f' {emoji} Eval: {eval_goal}')
108
+
109
+ # Always log memory if present
110
+ if response.current_state.memory:
111
+ logger.debug(f'🧠 Memory: {response.current_state.memory}')
112
+
113
+ # Only log next goal if it's not empty
114
+ next_goal = response.current_state.next_goal
115
+ if next_goal:
116
+ # Blue color for next goal
117
+ logger.info(f' \033[34m🎯 Next goal: {next_goal}\033[0m')
118
+ else:
119
+ logger.info('') # Add empty line for spacing
120
+
121
+
122
+ class BrowserUseAgent(Agent):
123
+ @time_execution_sync('--init')
124
+ def __init__(
125
+ self,
126
+ task: str,
127
+ llm: BaseChatModel = ChatOpenAI(model='gpt-4.1-mini'),
128
+ # Optional parameters
129
+ browser_profile: BrowserProfile | None = None,
130
+ browser_session: BrowserSession | None = None,
131
+ controller: Controller[Context] | None = None,
132
+ # Initial agent run parameters
133
+ sensitive_data: dict[str, str | dict[str, str]] | None = None,
134
+ initial_actions: list[dict[str, dict[str, Any]]] | None = None,
135
+ # Cloud Callbacks
136
+ register_new_step_callback: (
137
+ Callable[['BrowserStateSummary', 'AgentOutput', int], None] # Sync callback
138
+ | Callable[['BrowserStateSummary', 'AgentOutput', int], Awaitable[None]] # Async callback
139
+ | None
140
+ ) = None,
141
+ register_done_callback: (
142
+ Callable[['AgentHistoryList'], Awaitable[None]] # Async Callback
143
+ | Callable[['AgentHistoryList'], None] # Sync Callback
144
+ | None
145
+ ) = None,
146
+ register_external_agent_status_raise_error_callback: Callable[[], Awaitable[bool]] | None = None,
147
+ # Agent settings
148
+ output_model_schema: type[AgentStructuredOutput] | None = None,
149
+ use_vision: bool = True,
150
+ use_vision_for_planner: bool = False, # Deprecated
151
+ save_conversation_path: str | Path | None = None,
152
+ save_conversation_path_encoding: str | None = 'utf-8',
153
+ max_failures: int = 3,
154
+ retry_delay: int = 10,
155
+ override_system_message: str | None = None,
156
+ extend_system_message: str | None = None,
157
+ validate_output: bool = False,
158
+ generate_gif: bool | str = False,
159
+ available_file_paths: list[str] | None = None,
160
+ include_attributes: list[str] | None = None,
161
+ max_actions_per_step: int = 10,
162
+ use_thinking: bool = True,
163
+ flash_mode: bool = False,
164
+ max_history_items: int | None = None,
165
+ page_extraction_llm: BaseChatModel | None = None,
166
+ planner_llm: BaseChatModel | None = None, # Deprecated
167
+ planner_interval: int = 1, # Deprecated
168
+ is_planner_reasoning: bool = False, # Deprecated
169
+ extend_planner_system_message: str | None = None, # Deprecated
170
+ injected_agent_state: AgentState | None = None,
171
+ context: Context | None = None,
172
+ source: str | None = None,
173
+ file_system_path: str | None = None,
174
+ task_id: str | None = None,
175
+ cloud_sync: CloudSync | None = None,
176
+ calculate_cost: bool = False,
177
+ display_files_in_done_text: bool = True,
178
+ include_tool_call_examples: bool = False,
179
+ vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
180
+ llm_timeout: int = 90,
181
+ step_timeout: int = 120,
182
+ preload: bool = True,
183
+ include_recent_events: bool = False,
184
+ allow_parallel_action_types: list[str] = ["extract_structured_data", "extract_content_from_file"],
185
+ **kwargs,
186
+ ):
187
+ if not isinstance(llm, BaseChatModel):
188
+ raise ValueError('invalid llm, must be from browser_use.llm')
189
+ # Check for deprecated planner parameters
190
+ planner_params = [
191
+ planner_llm,
192
+ use_vision_for_planner,
193
+ is_planner_reasoning,
194
+ extend_planner_system_message,
195
+ ]
196
+ if any(param is not None and param is not False for param in planner_params) or planner_interval != 1:
197
+ self.logger.warning(
198
+ '⚠️ Planner functionality has been removed in browser-use v0.3.3+. '
199
+ 'The planner_llm, use_vision_for_planner, planner_interval, is_planner_reasoning, '
200
+ 'and extend_planner_system_message parameters are deprecated and will be ignored. '
201
+ 'Please remove these parameters from your Agent() initialization.'
202
+ )
203
+
204
+ # Check for deprecated memory parameters
205
+ if kwargs.get('enable_memory', False) or kwargs.get('memory_config') is not None:
206
+ self.logger.warning(
207
+ 'Memory support has been removed as of version 0.3.2. '
208
+ 'The agent context for memory is significantly improved and no longer requires the old memory system. '
209
+ "Please remove the 'enable_memory' and 'memory_config' parameters."
210
+ )
211
+ kwargs['enable_memory'] = False
212
+ kwargs['memory_config'] = None
213
+
214
+ if page_extraction_llm is None:
215
+ page_extraction_llm = llm
216
+ if available_file_paths is None:
217
+ available_file_paths = []
218
+
219
+ self.id = task_id or uuid7str()
220
+ self.task_id: str = self.id
221
+ self.session_id: str = uuid7str()
222
+ self.allow_parallel_action_types = allow_parallel_action_types
223
+
224
+ # Initialize available file paths as direct attribute
225
+ self.available_file_paths = available_file_paths
226
+
227
+ # Temporary logger for initialization (will be replaced by property)
228
+ self._logger = None
229
+
230
+ # Core components
231
+ self.task = task
232
+ self.llm = llm
233
+ self.preload = preload
234
+ self.include_recent_events = include_recent_events
235
+ self.controller = (
236
+ controller if controller is not None else Controller(display_files_in_done_text=display_files_in_done_text)
237
+ )
238
+
239
+ # Structured output
240
+ self.output_model_schema = output_model_schema
241
+ if self.output_model_schema is not None:
242
+ self.controller.use_structured_output_action(self.output_model_schema)
243
+
244
+ self.sensitive_data = sensitive_data
245
+
246
+ self.settings = AgentSettings(
247
+ use_vision=use_vision,
248
+ vision_detail_level=vision_detail_level,
249
+ use_vision_for_planner=False, # Always False now (deprecated)
250
+ save_conversation_path=save_conversation_path,
251
+ save_conversation_path_encoding=save_conversation_path_encoding,
252
+ max_failures=max_failures,
253
+ retry_delay=retry_delay,
254
+ override_system_message=override_system_message,
255
+ extend_system_message=extend_system_message,
256
+ validate_output=validate_output,
257
+ generate_gif=generate_gif,
258
+ include_attributes=include_attributes,
259
+ max_actions_per_step=max_actions_per_step,
260
+ use_thinking=use_thinking,
261
+ flash_mode=flash_mode,
262
+ max_history_items=max_history_items,
263
+ page_extraction_llm=page_extraction_llm,
264
+ planner_llm=None, # Always None now (deprecated)
265
+ planner_interval=1, # Always 1 now (deprecated)
266
+ is_planner_reasoning=False, # Always False now (deprecated)
267
+ extend_planner_system_message=None, # Always None now (deprecated)
268
+ calculate_cost=calculate_cost,
269
+ include_tool_call_examples=include_tool_call_examples,
270
+ llm_timeout=llm_timeout,
271
+ step_timeout=step_timeout,
272
+ )
273
+
274
+ # Token cost service
275
+ self.token_cost_service = TokenCost(include_cost=calculate_cost)
276
+ self.token_cost_service.register_llm(llm)
277
+ self.token_cost_service.register_llm(page_extraction_llm)
278
+ # Note: No longer registering planner_llm (deprecated)
279
+
280
+ # Initialize state
281
+ self.state = injected_agent_state or AgentState()
282
+
283
+ # Initialize history
284
+ self.history = AgentHistoryList(history=[], usage=None)
285
+
286
+ # Initialize agent directory
287
+ import time
288
+
289
+ timestamp = int(time.time())
290
+ base_tmp = Path(tempfile.gettempdir())
291
+ self.agent_directory = base_tmp / f'browser_use_agent_{self.id}_{timestamp}'
292
+
293
+ # Initialize file system and screenshot service
294
+ self._set_file_system(file_system_path)
295
+ self._set_screenshot_service()
296
+
297
+ # Action setup
298
+ self._setup_action_models()
299
+ self._set_browser_use_version_and_source(source)
300
+ self.initial_actions = self._convert_initial_actions(initial_actions) if initial_actions else None
301
+
302
+ # Verify we can connect to the model
303
+ self._verify_and_setup_llm()
304
+
305
+ # TODO: move this logic to the LLMs
306
+ # Handle users trying to use use_vision=True with DeepSeek models
307
+ if 'deepseek' in self.llm.model.lower():
308
+ self.logger.warning(
309
+ '⚠️ DeepSeek models do not support use_vision=True yet. Setting use_vision=False for now...')
310
+ self.settings.use_vision = False
311
+ # Note: No longer checking planner_llm for DeepSeek (deprecated)
312
+
313
+ # Handle users trying to use use_vision=True with XAI models
314
+ if 'grok' in self.llm.model.lower():
315
+ self.logger.warning('⚠️ XAI models do not support use_vision=True yet. Setting use_vision=False for now...')
316
+ self.settings.use_vision = False
317
+ # Note: No longer checking planner_llm for XAI models (deprecated)
318
+
319
+ self.logger.info(f'🧠 Starting a browser-use version {self.version} with model={self.llm.model}')
320
+ self.logger.debug(
321
+ f'{" +vision" if self.settings.use_vision else ""}'
322
+ f' extraction_model={self.settings.page_extraction_llm.model if self.settings.page_extraction_llm else "Unknown"}'
323
+ # Note: No longer logging planner_model (deprecated)
324
+ f'{" +file_system" if self.file_system else ""}'
325
+ )
326
+
327
+ # Initialize available actions for system prompt (only non-filtered actions)
328
+ # These will be used for the system prompt to maintain caching
329
+ self.unfiltered_actions = self.controller.registry.get_prompt_description()
330
+
331
+ # Initialize message manager with state
332
+ # Initial system prompt with all actions - will be updated during each step
333
+ system_message = SystemPrompt(
334
+ action_description=self.unfiltered_actions,
335
+ max_actions_per_step=self.settings.max_actions_per_step,
336
+ override_system_message=override_system_message,
337
+ extend_system_message=extend_system_message,
338
+ use_thinking=self.settings.use_thinking,
339
+ flash_mode=self.settings.flash_mode,
340
+ ).get_system_message()
341
+ self.logger.debug(system_message)
342
+ self._message_manager = MessageManager(
343
+ task=task,
344
+ system_message=system_message,
345
+ file_system=self.file_system,
346
+ state=self.state.message_manager_state,
347
+ use_thinking=self.settings.use_thinking,
348
+ # Settings that were previously in MessageManagerSettings
349
+ include_attributes=self.settings.include_attributes,
350
+ sensitive_data=sensitive_data,
351
+ max_history_items=self.settings.max_history_items,
352
+ vision_detail_level=self.settings.vision_detail_level,
353
+ include_tool_call_examples=self.settings.include_tool_call_examples,
354
+ include_recent_events=self.include_recent_events,
355
+ )
356
+
357
+ browser_profile = browser_profile or DEFAULT_BROWSER_PROFILE
358
+
359
+ self.browser_session = browser_session or BrowserSession(
360
+ browser_profile=browser_profile,
361
+ id=uuid7str()[:-4] + self.id[-4:], # re-use the same 4-char suffix so they show up together in logs
362
+ )
363
+
364
+ if self.sensitive_data:
365
+ # Check if sensitive_data has domain-specific credentials
366
+ has_domain_specific_credentials = any(isinstance(v, dict) for v in self.sensitive_data.values())
367
+
368
+ # If no allowed_domains are configured, show a security warning
369
+ if not self.browser_profile.allowed_domains:
370
+ self.logger.error(
371
+ '⚠️⚠️⚠️ Agent(sensitive_data=••••••••) was provided but BrowserSession(allowed_domains=[...]) is not locked down! ⚠️⚠️⚠️\n'
372
+ ' ☠️ If the agent visits a malicious website and encounters a prompt-injection attack, your sensitive_data may be exposed!\n\n'
373
+ ' https://docs.browser-use.com/customize/browser-settings#restrict-urls\n'
374
+ 'Waiting 10 seconds before continuing... Press [Ctrl+C] to abort.'
375
+ )
376
+ if sys.stdin.isatty():
377
+ try:
378
+ time.sleep(10)
379
+ except KeyboardInterrupt:
380
+ print(
381
+ '\n\n 🛑 Exiting now... set BrowserSession(allowed_domains=["example.com", "example.org"]) to only domains you trust to see your sensitive_data.'
382
+ )
383
+ sys.exit(0)
384
+ else:
385
+ pass # no point waiting if we're not in an interactive shell
386
+ self.logger.warning(
387
+ '‼️ Continuing with insecure settings for now... but this will become a hard error in the future!'
388
+ )
389
+
390
+ # If we're using domain-specific credentials, validate domain patterns
391
+ elif has_domain_specific_credentials:
392
+ # For domain-specific format, ensure all domain patterns are included in allowed_domains
393
+ domain_patterns = [k for k, v in self.sensitive_data.items() if isinstance(v, dict)]
394
+
395
+ # Validate each domain pattern against allowed_domains
396
+ for domain_pattern in domain_patterns:
397
+ is_allowed = False
398
+ for allowed_domain in self.browser_profile.allowed_domains:
399
+ # Special cases that don't require URL matching
400
+ if domain_pattern == allowed_domain or allowed_domain == '*':
401
+ is_allowed = True
402
+ break
403
+
404
+ # Need to create example URLs to compare the patterns
405
+ # Extract the domain parts, ignoring scheme
406
+ pattern_domain = domain_pattern.split('://')[-1] if '://' in domain_pattern else domain_pattern
407
+ allowed_domain_part = allowed_domain.split('://')[
408
+ -1] if '://' in allowed_domain else allowed_domain
409
+
410
+ # Check if pattern is covered by an allowed domain
411
+ # Example: "google.com" is covered by "*.google.com"
412
+ if pattern_domain == allowed_domain_part or (
413
+ allowed_domain_part.startswith('*.')
414
+ and (
415
+ pattern_domain == allowed_domain_part[2:]
416
+ or pattern_domain.endswith('.' + allowed_domain_part[2:])
417
+ )
418
+ ):
419
+ is_allowed = True
420
+ break
421
+
422
+ if not is_allowed:
423
+ self.logger.warning(
424
+ f'⚠️ Domain pattern "{domain_pattern}" in sensitive_data is not covered by any pattern in allowed_domains={self.browser_profile.allowed_domains}\n'
425
+ f' This may be a security risk as credentials could be used on unintended domains.'
426
+ )
427
+
428
+ # Callbacks
429
+ self.register_new_step_callback = register_new_step_callback
430
+ self.register_done_callback = register_done_callback
431
+ self.register_external_agent_status_raise_error_callback = register_external_agent_status_raise_error_callback
432
+
433
+ # Context
434
+ self.context: Context | None = context
435
+
436
+ # Telemetry
437
+ self.telemetry = ProductTelemetry()
438
+
439
+ if self.settings.save_conversation_path:
440
+ self.settings.save_conversation_path = Path(self.settings.save_conversation_path).expanduser().resolve()
441
+ self.logger.info(f'💬 Saving conversation to {_log_pretty_path(self.settings.save_conversation_path)}')
442
+
443
+ # Initialize download tracking
444
+ assert self.browser_session is not None, 'BrowserSession is not set up'
445
+ self.has_downloads_path = self.browser_session.browser_profile.downloads_path is not None
446
+ if self.has_downloads_path:
447
+ self._last_known_downloads: list[str] = []
448
+ self.logger.debug('📁 Initialized download tracking for agent')
449
+
450
+ self._external_pause_event = asyncio.Event()
451
+ self._external_pause_event.set()
452
+
453
+ def _set_file_system(self, file_system_path: str | None = None) -> None:
454
+ # Check for conflicting parameters
455
+ if self.state.file_system_state and file_system_path:
456
+ raise ValueError(
457
+ 'Cannot provide both file_system_state (from agent state) and file_system_path. '
458
+ 'Either restore from existing state or create new file system at specified path, not both.'
459
+ )
460
+
461
+ # Check if we should restore from existing state first
462
+ if self.state.file_system_state:
463
+ try:
464
+ # Restore file system from state at the exact same location
465
+ self.file_system = CustomFileSystem.from_state(self.state.file_system_state)
466
+ # The parent directory of base_dir is the original file_system_path
467
+ self.file_system_path = str(self.file_system.base_dir)
468
+ self.logger.debug(f'💾 File system restored from state to: {self.file_system_path}')
469
+ return
470
+ except Exception as e:
471
+ self.logger.error(f'💾 Failed to restore file system from state: {e}')
472
+ raise e
473
+
474
+ # Initialize new file system
475
+ try:
476
+ if file_system_path:
477
+ self.file_system = CustomFileSystem(file_system_path)
478
+ self.file_system_path = file_system_path
479
+ else:
480
+ # Use the agent directory for file system
481
+ self.file_system = CustomFileSystem(self.agent_directory)
482
+ self.file_system_path = str(self.agent_directory)
483
+ except Exception as e:
484
+ self.logger.error(f'💾 Failed to initialize file system: {e}.')
485
+ raise e
486
+
487
+ # Save file system state to agent state
488
+ self.state.file_system_state = self.file_system.get_state()
489
+
490
+ self.logger.debug(f'💾 File system path: {self.file_system_path}')
491
+
492
+ @property
493
+ def logger(self) -> logging.Logger:
494
+ """Get instance-specific logger with task ID and browser session info"""
495
+ # Update target ID dynamically if available
496
+ _browser_session_id = self.browser_session.id if self.browser_session else self.id
497
+ _current_target_id = (
498
+ self.browser_session.agent_focus.target_id[-4:]
499
+ if self.browser_session and hasattr(self.browser_session,
500
+ 'agent_focus') and self.browser_session.agent_focus and hasattr(
501
+ self.browser_session.agent_focus, 'target_id')
502
+ else '--'
503
+ )
504
+ return logging.getLogger(
505
+ f'browser-use.Agent:{self.task_id[-4:]} on target:{_current_target_id} of browser:{_browser_session_id[-4:]}')
506
+
507
+ async def _finalize(self, browser_state_summary: BrowserStateSummary | None) -> None:
508
+ """Finalize the step with history, logging, and events"""
509
+ step_end_time = time.time()
510
+ if not self.state.last_result:
511
+ return
512
+
513
+ if browser_state_summary:
514
+ metadata = StepMetadata(
515
+ step_number=self.state.n_steps,
516
+ step_start_time=self.step_start_time,
517
+ step_end_time=step_end_time,
518
+ )
519
+
520
+ # Use _make_history_item like main branch
521
+ await self._make_history_item(self.state.last_model_output, browser_state_summary, self.state.last_result,
522
+ metadata)
523
+
524
+ # Log step completion summary
525
+ self._log_step_completion_summary(self.step_start_time, self.state.last_result)
526
+
527
+ # Save file system state after step completion
528
+ self.save_file_system_state()
529
+
530
+ # Emit both step created and executed events
531
+ if browser_state_summary and self.state.last_model_output:
532
+ # Extract key step data for the event
533
+ actions_data = []
534
+ if self.state.last_model_output.action:
535
+ for action in self.state.last_model_output.action:
536
+ action_dict = action.model_dump() if hasattr(action, 'model_dump') else {}
537
+ actions_data.append(action_dict)
538
+
539
+ # Increment step counter after step is fully completed
540
+ self.state.n_steps += 1
541
+
542
+ @observe(name='agent.run', metadata={'task': '{{task}}', 'debug': '{{debug}}'})
543
+ @time_execution_async('--run')
544
+ async def run(
545
+ self,
546
+ max_steps: int = 100,
547
+ on_step_start: AgentHookFunc | None = None,
548
+ on_step_end: AgentHookFunc | None = None,
549
+ ) -> AgentHistoryList[AgentStructuredOutput]:
550
+ """Execute the task with maximum number of steps"""
551
+
552
+ loop = asyncio.get_event_loop()
553
+ agent_run_error: str | None = None # Initialize error tracking variable
554
+ self._force_exit_telemetry_logged = False # ADDED: Flag for custom telemetry on force exit
555
+
556
+ # Set up the signal handler with callbacks specific to this agent
557
+ from browser_use.utils import SignalHandler
558
+
559
+ # Define the custom exit callback function for second CTRL+C
560
+ def on_force_exit_log_telemetry():
561
+ self._log_agent_event(max_steps=max_steps, agent_run_error='SIGINT: Cancelled by user')
562
+ # NEW: Call the flush method on the telemetry instance
563
+ if hasattr(self, 'telemetry') and self.telemetry:
564
+ self.telemetry.flush()
565
+ self._force_exit_telemetry_logged = True # Set the flag
566
+
567
+ signal_handler = SignalHandler(
568
+ loop=loop,
569
+ pause_callback=self.pause,
570
+ resume_callback=self.resume,
571
+ custom_exit_callback=on_force_exit_log_telemetry, # Pass the new telemetrycallback
572
+ exit_on_second_int=True,
573
+ )
574
+ signal_handler.register()
575
+
576
+ try:
577
+ self._log_agent_run()
578
+
579
+ self.logger.debug(
580
+ f'🔧 Agent setup: Task ID {self.task_id[-4:]}, Session ID {self.session_id[-4:]}, Browser Session ID {self.browser_session.id[-4:] if self.browser_session else "None"}'
581
+ )
582
+
583
+ # Initialize timing for session and task
584
+ self._session_start_time = time.time()
585
+ self._task_start_time = self._session_start_time # Initialize task start time
586
+
587
+ self.logger.debug('🔧 Browser session started with watchdogs attached')
588
+
589
+ # Check if task contains a URL and add it as an initial action (only if preload is enabled)
590
+ if self.preload:
591
+ initial_url = self._extract_url_from_task(self.task)
592
+ if initial_url:
593
+ self.logger.info(f'🔗 Found URL in task: {initial_url}, adding as initial action...')
594
+
595
+ # Create a go_to_url action for the initial URL
596
+ go_to_url_action = {
597
+ 'go_to_url': {
598
+ 'url': initial_url,
599
+ 'new_tab': False, # Navigate in current tab
600
+ }
601
+ }
602
+
603
+ # Add to initial_actions or create new list if none exist
604
+ if self.initial_actions:
605
+ # Convert back to dict format, prepend URL navigation, then convert back
606
+ initial_actions_dicts = []
607
+ for action in self.initial_actions:
608
+ action_data = action.model_dump(exclude_unset=True)
609
+ initial_actions_dicts.append(action_data)
610
+
611
+ # Prepend the go_to_url action
612
+ initial_actions_dicts = [go_to_url_action] + initial_actions_dicts
613
+
614
+ # Convert back to ActionModel instances
615
+ self.initial_actions = self._convert_initial_actions(initial_actions_dicts)
616
+ else:
617
+ # Create new initial_actions with just the go_to_url
618
+ self.initial_actions = self._convert_initial_actions([go_to_url_action])
619
+
620
+ self.logger.debug(f'✅ Added navigation to {initial_url} as initial action')
621
+
622
+ # Execute initial actions if provided
623
+ if self.initial_actions:
624
+ self.logger.debug(f'⚡ Executing {len(self.initial_actions)} initial actions...')
625
+ result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
626
+ self.state.last_result = result
627
+ self.logger.debug('✅ Initial actions completed')
628
+
629
+ self.logger.debug(f'🔄 Starting main execution loop with max {max_steps} steps...')
630
+ for step in range(max_steps):
631
+ # Replace the polling with clean pause-wait
632
+ if self.state.paused:
633
+ self.logger.debug(f'⏸️ Step {step}: Agent paused, waiting to resume...')
634
+ await self.wait_until_resumed()
635
+ signal_handler.reset()
636
+
637
+ # Check if we should stop due to too many failures
638
+ if self.state.consecutive_failures >= self.settings.max_failures:
639
+ self.logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
640
+ agent_run_error = f'Stopped due to {self.settings.max_failures} consecutive failures'
641
+ break
642
+
643
+ # Check control flags before each step
644
+ if self.state.stopped:
645
+ self.logger.info('🛑 Agent stopped')
646
+ agent_run_error = 'Agent stopped programmatically'
647
+ break
648
+
649
+ while self.state.paused:
650
+ await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
651
+ if self.state.stopped: # Allow stopping while paused
652
+ agent_run_error = 'Agent stopped programmatically while paused'
653
+ break
654
+
655
+ if on_step_start is not None:
656
+ await on_step_start(self)
657
+
658
+ self.logger.debug(f'🚶 Starting step {step + 1}/{max_steps}...')
659
+ step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
660
+
661
+ try:
662
+ await asyncio.wait_for(
663
+ self.step(step_info),
664
+ timeout=self.settings.step_timeout,
665
+ )
666
+ self.logger.debug(f'✅ Completed step {step + 1}/{max_steps}')
667
+ except TimeoutError:
668
+ # Handle step timeout gracefully
669
+ error_msg = f'Step {step + 1} timed out after {self.settings.step_timeout} seconds'
670
+ self.logger.error(f'⏰ {error_msg}')
671
+ self.state.consecutive_failures += 1
672
+ self.state.last_result = [ActionResult(error=error_msg)]
673
+
674
+ if on_step_end is not None:
675
+ await on_step_end(self)
676
+
677
+ if self.history.is_done():
678
+ self.logger.debug(f'🎯 Task completed after {step + 1} steps!')
679
+ await self.log_completion()
680
+
681
+ if self.register_done_callback:
682
+ if inspect.iscoroutinefunction(self.register_done_callback):
683
+ await self.register_done_callback(self.history)
684
+ else:
685
+ self.register_done_callback(self.history)
686
+
687
+ # Task completed
688
+ break
689
+ else:
690
+ agent_run_error = 'Failed to complete task in maximum steps'
691
+
692
+ self.history.add_item(
693
+ AgentHistory(
694
+ model_output=None,
695
+ result=[ActionResult(error=agent_run_error, include_in_memory=True)],
696
+ state=BrowserStateHistory(
697
+ url='',
698
+ title='',
699
+ tabs=[],
700
+ interacted_element=[],
701
+ screenshot_path=None,
702
+ ),
703
+ metadata=None,
704
+ )
705
+ )
706
+
707
+ self.logger.info(f'❌ {agent_run_error}')
708
+
709
+ self.logger.debug('📊 Collecting usage summary...')
710
+ self.history.usage = await self.token_cost_service.get_usage_summary()
711
+
712
+ # set the model output schema and call it on the fly
713
+ if self.history._output_model_schema is None and self.output_model_schema is not None:
714
+ self.history._output_model_schema = self.output_model_schema
715
+
716
+ self.logger.debug('🏁 Agent.run() completed successfully')
717
+ return self.history
718
+
719
+ except KeyboardInterrupt:
720
+ # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
721
+ self.logger.debug('Got KeyboardInterrupt during execution, returning current history')
722
+ agent_run_error = 'KeyboardInterrupt'
723
+
724
+ self.history.usage = await self.token_cost_service.get_usage_summary()
725
+
726
+ return self.history
727
+
728
+ except Exception as e:
729
+ self.logger.error(f'Agent run failed with exception: {e}', exc_info=True)
730
+ agent_run_error = str(e)
731
+ raise e
732
+
733
+ finally:
734
+ # Log token usage summary
735
+ await self.token_cost_service.log_usage_summary()
736
+
737
+ # Unregister signal handlers before cleanup
738
+ signal_handler.unregister()
739
+
740
+ if not self._force_exit_telemetry_logged: # MODIFIED: Check the flag
741
+ try:
742
+ self._log_agent_event(max_steps=max_steps, agent_run_error=agent_run_error)
743
+ except Exception as log_e: # Catch potential errors during logging itself
744
+ self.logger.error(f'Failed to log telemetry event: {log_e}', exc_info=True)
745
+ else:
746
+ # ADDED: Info message when custom telemetry for SIGINT was already logged
747
+ self.logger.debug('Telemetry for force exit (SIGINT) was logged by custom exit callback.')
748
+
749
+ # Generate GIF if needed before stopping event bus
750
+ if self.settings.generate_gif:
751
+ output_path: str = 'agent_history.gif'
752
+ if isinstance(self.settings.generate_gif, str):
753
+ output_path = self.settings.generate_gif
754
+
755
+ # Lazy import gif module to avoid heavy startup cost
756
+ from browser_use.agent.gif import create_history_gif
757
+
758
+ create_history_gif(task=self.task, history=self.history, output_path=output_path)
759
+
760
+ await self.close()
761
+
762
+ def _matches_action_type(self, action_type: str, allowed_pattern: str) -> bool:
763
+ """
764
+ Check if an action type matches an allowed pattern, supporting wildcards.
765
+
766
+ Args:
767
+ action_type: The actual action type (e.g., "mcp.filesystem.read_file")
768
+ allowed_pattern: The pattern to match (e.g., "mcp.filesystem*")
769
+
770
+ Returns:
771
+ True if the action type matches the pattern
772
+ """
773
+ if allowed_pattern.endswith('*'):
774
+ # Wildcard matching
775
+ prefix = allowed_pattern[:-1]
776
+ return action_type.startswith(prefix)
777
+ else:
778
+ # Exact matching
779
+ return action_type == allowed_pattern
780
+
781
+ def _is_action_parallel_allowed(self, action: ActionModel) -> bool:
782
+ """
783
+ Check if an action is allowed to be executed in parallel.
784
+
785
+ Args:
786
+ action: The action to check
787
+
788
+ Returns:
789
+ True if the action can be executed in parallel
790
+ """
791
+ action_data = action.model_dump(exclude_unset=True)
792
+ action_type = next(iter(action_data.keys())) if action_data else None
793
+
794
+ if not action_type:
795
+ return False
796
+
797
+ for allowed_pattern in self.allow_parallel_action_types:
798
+ if self._matches_action_type(action_type, allowed_pattern):
799
+ return True
800
+
801
+ return False
802
+
803
+ def _group_actions_for_parallel_execution(self, actions: list[ActionModel]) -> list[list[ActionModel]]:
804
+ """
805
+ Group consecutive actions that can be executed in parallel.
806
+
807
+ Args:
808
+ actions: List of actions to group
809
+
810
+ Returns:
811
+ List of action groups, where each group can be executed in parallel
812
+ """
813
+ if not actions:
814
+ return []
815
+
816
+ groups = []
817
+ current_group = [actions[0]]
818
+
819
+ for i in range(1, len(actions)):
820
+ current_action = actions[i]
821
+ previous_action = actions[i-1]
822
+
823
+ # Check if both current and previous actions can be executed in parallel
824
+ if (self._is_action_parallel_allowed(current_action) and
825
+ self._is_action_parallel_allowed(previous_action)):
826
+ # Add to current group
827
+ current_group.append(current_action)
828
+ else:
829
+ # Start a new group
830
+ groups.append(current_group)
831
+ current_group = [current_action]
832
+
833
+ # Add the last group
834
+ groups.append(current_group)
835
+
836
+ return groups
837
+
838
+ @observe_debug(ignore_input=True, ignore_output=True)
839
+ @time_execution_async('--multi_act')
840
+ async def multi_act(
841
+ self,
842
+ actions: list[ActionModel],
843
+ check_for_new_elements: bool = True,
844
+ ) -> list[ActionResult]:
845
+ """Execute multiple actions, with parallel execution for allowed action types"""
846
+ results: list[ActionResult] = []
847
+ time_elapsed = 0
848
+ total_actions = len(actions)
849
+
850
+ assert self.browser_session is not None, 'BrowserSession is not set up'
851
+ try:
852
+ if (
853
+ self.browser_session._cached_browser_state_summary is not None
854
+ and self.browser_session._cached_browser_state_summary.dom_state is not None
855
+ ):
856
+ cached_selector_map = dict(self.browser_session._cached_browser_state_summary.dom_state.selector_map)
857
+ cached_element_hashes = {e.parent_branch_hash() for e in cached_selector_map.values()}
858
+ else:
859
+ cached_selector_map = {}
860
+ cached_element_hashes = set()
861
+ except Exception as e:
862
+ self.logger.error(f'Error getting cached selector map: {e}')
863
+ cached_selector_map = {}
864
+ cached_element_hashes = set()
865
+
866
+ # Group actions for potential parallel execution
867
+ action_groups = self._group_actions_for_parallel_execution(actions)
868
+
869
+ # Track global action index for logging and DOM checks
870
+ global_action_index = 0
871
+
872
+ for group_index, action_group in enumerate(action_groups):
873
+ group_size = len(action_group)
874
+
875
+ # Check if this group can be executed in parallel
876
+ can_execute_in_parallel = (
877
+ group_size > 1 and
878
+ all(self._is_action_parallel_allowed(action) for action in action_group)
879
+ )
880
+
881
+ if can_execute_in_parallel:
882
+ self.logger.info(f'🚀 Executing {group_size} actions in parallel: group {group_index + 1}/{len(action_groups)}')
883
+ # Execute actions in parallel using asyncio.gather
884
+ parallel_results = await self._execute_actions_in_parallel(
885
+ action_group, global_action_index, total_actions,
886
+ cached_selector_map, cached_element_hashes, check_for_new_elements
887
+ )
888
+ results.extend(parallel_results)
889
+ global_action_index += group_size
890
+
891
+ # Check if any result indicates completion or error
892
+ if any(result.is_done or result.error for result in parallel_results):
893
+ break
894
+ else:
895
+ # Execute actions sequentially
896
+ for local_index, action in enumerate(action_group):
897
+ i = global_action_index + local_index
898
+
899
+ # Original sequential execution logic continues here...
900
+ if i > 0:
901
+ # ONLY ALLOW TO CALL `done` IF IT IS A SINGLE ACTION
902
+ if action.model_dump(exclude_unset=True).get('done') is not None:
903
+ msg = f'Done action is allowed only as a single action - stopped after action {i} / {total_actions}.'
904
+ self.logger.debug(msg)
905
+ break
906
+
907
+ # DOM synchronization check - verify element indexes are still valid AFTER first action
908
+ if action.get_index() is not None and i != 0:
909
+ result = await self._check_dom_synchronization(
910
+ action, i, total_actions, cached_selector_map, cached_element_hashes,
911
+ check_for_new_elements, actions
912
+ )
913
+ if result:
914
+ results.append(result)
915
+ break
916
+
917
+ # wait between actions (only after first action)
918
+ if i > 0:
919
+ await asyncio.sleep(self.browser_profile.wait_between_actions)
920
+
921
+ # Execute single action
922
+ try:
923
+ action_result = await self._execute_single_action(action, i, total_actions)
924
+ results.append(action_result)
925
+
926
+ if action_result.is_done or action_result.error or i == total_actions - 1:
927
+ break
928
+
929
+ except Exception as e:
930
+ self.logger.error(f'❌ Executing action {i + 1} failed: {type(e).__name__}: {e}')
931
+ raise e
932
+
933
+ global_action_index += len(action_group)
934
+
935
+ return results
936
+
937
+ async def _execute_actions_in_parallel(
938
+ self,
939
+ actions: list[ActionModel],
940
+ start_index: int,
941
+ total_actions: int,
942
+ cached_selector_map: dict,
943
+ cached_element_hashes: set,
944
+ check_for_new_elements: bool
945
+ ) -> list[ActionResult]:
946
+ """Execute a group of actions in parallel using asyncio.gather"""
947
+
948
+ async def execute_single_parallel_action(action: ActionModel, action_index: int) -> ActionResult:
949
+ """Execute a single action for parallel execution"""
950
+ await self._raise_if_stopped_or_paused()
951
+
952
+ # Get action info for logging
953
+ action_data = action.model_dump(exclude_unset=True)
954
+ action_name = next(iter(action_data.keys())) if action_data else 'unknown'
955
+ action_params = getattr(action, action_name, '') or str(action.model_dump(mode='json'))[:140].replace(
956
+ '"', ''
957
+ ).replace('{', '').replace('}', '').replace("'", '').strip().strip(',')
958
+ action_params = str(action_params)
959
+ action_params = f'{action_params[:122]}...' if len(action_params) > 128 else action_params
960
+
961
+ time_start = time.time()
962
+ blue = '\033[34m'
963
+ reset = '\033[0m'
964
+ self.logger.info(f' 🦾 {blue}[PARALLEL ACTION {action_index + 1}/{total_actions}]{reset} {action_params}')
965
+
966
+ # Execute the action
967
+ result = await self.controller.act(
968
+ action=action,
969
+ browser_session=self.browser_session,
970
+ file_system=self.file_system,
971
+ page_extraction_llm=self.settings.page_extraction_llm,
972
+ sensitive_data=self.sensitive_data,
973
+ available_file_paths=self.available_file_paths,
974
+ context=self.context,
975
+ )
976
+
977
+ time_end = time.time()
978
+ time_elapsed = time_end - time_start
979
+
980
+ green = '\033[92m'
981
+ self.logger.debug(
982
+ f'☑️ Parallel action {action_index + 1}/{total_actions}: {green}{action_params}{reset} in {time_elapsed:.2f}s'
983
+ )
984
+
985
+ return result
986
+
987
+ # Create tasks for parallel execution
988
+ tasks = [
989
+ execute_single_parallel_action(action, start_index + i)
990
+ for i, action in enumerate(actions)
991
+ ]
992
+
993
+ # Execute all tasks in parallel
994
+ parallel_results = await asyncio.gather(*tasks, return_exceptions=True)
995
+
996
+ # Process results and handle any exceptions
997
+ processed_results = []
998
+ for i, result in enumerate(parallel_results):
999
+ if isinstance(result, Exception):
1000
+ action_index = start_index + i
1001
+ self.logger.error(f'❌ Parallel action {action_index + 1} failed: {type(result).__name__}: {result}')
1002
+ raise result
1003
+ else:
1004
+ processed_results.append(result)
1005
+
1006
+ return processed_results
1007
+
1008
+ async def _check_dom_synchronization(
1009
+ self,
1010
+ action: ActionModel,
1011
+ action_index: int,
1012
+ total_actions: int,
1013
+ cached_selector_map: dict,
1014
+ cached_element_hashes: set,
1015
+ check_for_new_elements: bool,
1016
+ all_actions: list[ActionModel]
1017
+ ) -> ActionResult | None:
1018
+ """Check DOM synchronization and return result if page changed"""
1019
+ new_browser_state_summary = await self.browser_session.get_browser_state_summary(
1020
+ cache_clickable_elements_hashes=False,
1021
+ include_screenshot=False,
1022
+ )
1023
+ new_selector_map = new_browser_state_summary.dom_state.selector_map
1024
+
1025
+ # Detect index change after previous action
1026
+ orig_target = cached_selector_map.get(action.get_index())
1027
+ orig_target_hash = orig_target.parent_branch_hash() if orig_target else None
1028
+
1029
+ new_target = new_selector_map.get(action.get_index()) # type: ignore
1030
+ new_target_hash = new_target.parent_branch_hash() if new_target else None
1031
+
1032
+ def get_remaining_actions_str(actions: list[ActionModel], index: int) -> str:
1033
+ remaining_actions = []
1034
+ for remaining_action in actions[index:]:
1035
+ action_data = remaining_action.model_dump(exclude_unset=True)
1036
+ action_name = next(iter(action_data.keys())) if action_data else 'unknown'
1037
+ remaining_actions.append(action_name)
1038
+ return ', '.join(remaining_actions)
1039
+
1040
+ if orig_target_hash != new_target_hash:
1041
+ # Get names of remaining actions that won't be executed
1042
+ remaining_actions_str = get_remaining_actions_str(all_actions, action_index)
1043
+ msg = f'Page changed after action {action_index} / {total_actions}: actions {remaining_actions_str} were not executed'
1044
+ self.logger.info(msg)
1045
+ return ActionResult(
1046
+ extracted_content=msg,
1047
+ include_in_memory=True,
1048
+ long_term_memory=msg,
1049
+ )
1050
+
1051
+ # Check for new elements that appeared
1052
+ new_element_hashes = {e.parent_branch_hash() for e in new_selector_map.values()}
1053
+ if check_for_new_elements and not new_element_hashes.issubset(cached_element_hashes):
1054
+ # next action requires index but there are new elements on the page
1055
+ remaining_actions_str = get_remaining_actions_str(all_actions, action_index)
1056
+ msg = f'Something new appeared after action {action_index} / {total_actions}: actions {remaining_actions_str} were not executed'
1057
+ self.logger.info(msg)
1058
+ return ActionResult(
1059
+ extracted_content=msg,
1060
+ include_in_memory=True,
1061
+ long_term_memory=msg,
1062
+ )
1063
+
1064
+ return None
1065
+
1066
+ async def _execute_single_action(self, action: ActionModel, action_index: int, total_actions: int) -> ActionResult:
1067
+ """Execute a single action in sequential mode"""
1068
+ await self._raise_if_stopped_or_paused()
1069
+
1070
+ # Get action name from the action model
1071
+ action_data = action.model_dump(exclude_unset=True)
1072
+ action_name = next(iter(action_data.keys())) if action_data else 'unknown'
1073
+ action_params = getattr(action, action_name, '') or str(action.model_dump(mode='json'))[:140].replace(
1074
+ '"', ''
1075
+ ).replace('{', '').replace('}', '').replace("'", '').strip().strip(',')
1076
+ # Ensure action_params is always a string before checking length
1077
+ action_params = str(action_params)
1078
+ action_params = f'{action_params[:122]}...' if len(action_params) > 128 else action_params
1079
+
1080
+ time_start = time.time()
1081
+
1082
+ red = '\033[91m'
1083
+ green = '\033[92m'
1084
+ blue = '\033[34m'
1085
+ reset = '\033[0m'
1086
+
1087
+ self.logger.info(f' 🦾 {blue}[ACTION {action_index + 1}/{total_actions}]{reset} {action_params}')
1088
+
1089
+ result = await self.controller.act(
1090
+ action=action,
1091
+ browser_session=self.browser_session,
1092
+ file_system=self.file_system,
1093
+ page_extraction_llm=self.settings.page_extraction_llm,
1094
+ sensitive_data=self.sensitive_data,
1095
+ available_file_paths=self.available_file_paths,
1096
+ context=self.context,
1097
+ )
1098
+
1099
+ time_end = time.time()
1100
+ time_elapsed = time_end - time_start
1101
+
1102
+ self.logger.debug(
1103
+ f'☑️ Executed action {action_index + 1}/{total_actions}: {green}{action_params}{reset} in {time_elapsed:.2f}s'
1104
+ )
1105
+
1106
+ return result