vibesurf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (70) hide show
  1. vibe_surf/__init__.py +12 -0
  2. vibe_surf/_version.py +34 -0
  3. vibe_surf/agents/__init__.py +0 -0
  4. vibe_surf/agents/browser_use_agent.py +1106 -0
  5. vibe_surf/agents/prompts/__init__.py +1 -0
  6. vibe_surf/agents/prompts/vibe_surf_prompt.py +176 -0
  7. vibe_surf/agents/report_writer_agent.py +360 -0
  8. vibe_surf/agents/vibe_surf_agent.py +1632 -0
  9. vibe_surf/backend/__init__.py +0 -0
  10. vibe_surf/backend/api/__init__.py +3 -0
  11. vibe_surf/backend/api/activity.py +243 -0
  12. vibe_surf/backend/api/config.py +740 -0
  13. vibe_surf/backend/api/files.py +322 -0
  14. vibe_surf/backend/api/models.py +257 -0
  15. vibe_surf/backend/api/task.py +300 -0
  16. vibe_surf/backend/database/__init__.py +13 -0
  17. vibe_surf/backend/database/manager.py +129 -0
  18. vibe_surf/backend/database/models.py +164 -0
  19. vibe_surf/backend/database/queries.py +922 -0
  20. vibe_surf/backend/database/schemas.py +100 -0
  21. vibe_surf/backend/llm_config.py +182 -0
  22. vibe_surf/backend/main.py +137 -0
  23. vibe_surf/backend/migrations/__init__.py +16 -0
  24. vibe_surf/backend/migrations/init_db.py +303 -0
  25. vibe_surf/backend/migrations/seed_data.py +236 -0
  26. vibe_surf/backend/shared_state.py +601 -0
  27. vibe_surf/backend/utils/__init__.py +7 -0
  28. vibe_surf/backend/utils/encryption.py +164 -0
  29. vibe_surf/backend/utils/llm_factory.py +225 -0
  30. vibe_surf/browser/__init__.py +8 -0
  31. vibe_surf/browser/agen_browser_profile.py +130 -0
  32. vibe_surf/browser/agent_browser_session.py +416 -0
  33. vibe_surf/browser/browser_manager.py +296 -0
  34. vibe_surf/browser/utils.py +790 -0
  35. vibe_surf/browser/watchdogs/__init__.py +0 -0
  36. vibe_surf/browser/watchdogs/action_watchdog.py +291 -0
  37. vibe_surf/browser/watchdogs/dom_watchdog.py +954 -0
  38. vibe_surf/chrome_extension/background.js +558 -0
  39. vibe_surf/chrome_extension/config.js +48 -0
  40. vibe_surf/chrome_extension/content.js +284 -0
  41. vibe_surf/chrome_extension/dev-reload.js +47 -0
  42. vibe_surf/chrome_extension/icons/convert-svg.js +33 -0
  43. vibe_surf/chrome_extension/icons/logo-preview.html +187 -0
  44. vibe_surf/chrome_extension/icons/logo.png +0 -0
  45. vibe_surf/chrome_extension/manifest.json +53 -0
  46. vibe_surf/chrome_extension/popup.html +134 -0
  47. vibe_surf/chrome_extension/scripts/api-client.js +473 -0
  48. vibe_surf/chrome_extension/scripts/main.js +491 -0
  49. vibe_surf/chrome_extension/scripts/markdown-it.min.js +3 -0
  50. vibe_surf/chrome_extension/scripts/session-manager.js +599 -0
  51. vibe_surf/chrome_extension/scripts/ui-manager.js +3687 -0
  52. vibe_surf/chrome_extension/sidepanel.html +347 -0
  53. vibe_surf/chrome_extension/styles/animations.css +471 -0
  54. vibe_surf/chrome_extension/styles/components.css +670 -0
  55. vibe_surf/chrome_extension/styles/main.css +2307 -0
  56. vibe_surf/chrome_extension/styles/settings.css +1100 -0
  57. vibe_surf/cli.py +357 -0
  58. vibe_surf/controller/__init__.py +0 -0
  59. vibe_surf/controller/file_system.py +53 -0
  60. vibe_surf/controller/mcp_client.py +68 -0
  61. vibe_surf/controller/vibesurf_controller.py +616 -0
  62. vibe_surf/controller/views.py +37 -0
  63. vibe_surf/llm/__init__.py +21 -0
  64. vibe_surf/llm/openai_compatible.py +237 -0
  65. vibesurf-0.1.0.dist-info/METADATA +97 -0
  66. vibesurf-0.1.0.dist-info/RECORD +70 -0
  67. vibesurf-0.1.0.dist-info/WHEEL +5 -0
  68. vibesurf-0.1.0.dist-info/entry_points.txt +2 -0
  69. vibesurf-0.1.0.dist-info/licenses/LICENSE +201 -0
  70. vibesurf-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,790 @@
1
+ import base64
2
+ import io
3
+ import logging
4
+ from typing import Optional, Tuple
5
+
6
+ from PIL import Image, ImageDraw, ImageFont
7
+
8
+ from browser_use.dom.views import DOMSelectorMap
9
+ from browser_use.observability import observe_debug
10
+
11
+ import math
12
+ import base64
13
+ import os
14
+ import logging
15
+ import binascii # Import specifically for the error type
16
+ import pdb
17
+
18
+ from PIL import Image, ImageDraw, ImageFont
19
+ import random
20
+ import colorsys
21
+ import numpy as np
22
+ from typing import Optional, Tuple, List, Any
23
+ import io
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ # List of common font file names (Prioritize preferred ones first)
29
+ # Consider adding fonts known for broad Unicode coverage early (like Noto)
30
+ COMMON_FONT_FILES = [
31
+ "simsun.ttc",
32
+ "seguisb.ttf", # Segoe UI Semibold (Good UI Font on Windows)
33
+ "arial.ttf", # Arial (Very common, good compatibility)
34
+ "verdana.ttf", # Verdana (Good readability)
35
+ "tahoma.ttf", # Tahoma (Common Windows UI font)
36
+ "calibri.ttf", # Calibri (Modern default in Office)
37
+ "NotoSans-Regular.ttf", # Noto Sans Regular (Broad Unicode, often default name)
38
+ "NotoSansCJK-Regular.otf", # Google Noto Fonts (covers CJK) - OpenType
39
+ "DejaVuSans.ttf", # Common Linux font (Good coverage)
40
+ "ubuntu-regular.ttf", # Ubuntu font (Common on Ubuntu Linux)
41
+ " liberation-sans.ttf", # Liberation Sans (Common Linux alternative to Arial)
42
+ "msyh.ttc", "msyh.ttf", # Microsoft YaHei (Chinese - Simplified) - TTC or TTF
43
+ "simhei.ttf", # SimHei (Chinese - Simplified - often present)
44
+ "wqy-zenhei.ttc", # WenQuanYi Zen Hei (Linux Chinese) - TTC
45
+ "wqy-microhei.ttc", # WenQuanYi Micro Hei (Linux Chinese) - TTC
46
+ # Add Japanese, Korean, etc. specific fonts if needed
47
+ "msgothic.ttc", # MS Gothic (Japanese - older Windows) - TTC
48
+ "malgun.ttf", # Malgun Gothic (Korean - Windows)
49
+ "gulim.ttc", # Gulim (Korean - older Windows) - TTC
50
+ "AppleGothic.ttf", # Apple Gothic (macOS Korean)
51
+ "ヒラギノ角ゴ ProN W3.otf", # Hiragino Kaku Gothic ProN (macOS Japanese) - Use actual name if known
52
+ "songti.ttf", # Songti (Less common nowadays)
53
+ ]
54
+
55
+ # --- Font Directory Discovery ---
56
+
57
+ FONT_DIRS = []
58
+ if os.name == 'nt': # Windows
59
+ system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
60
+ FONT_DIRS.append(os.path.join(system_root, 'Fonts'))
61
+ # User-installed fonts
62
+ local_app_data = os.environ.get('LOCALAPPDATA')
63
+ if local_app_data:
64
+ FONT_DIRS.append(os.path.join(local_app_data, 'Microsoft\\Windows\\Fonts'))
65
+ elif os.name == 'posix':
66
+ # Common system-wide locations (Linux, macOS)
67
+ posix_system_dirs = [
68
+ '/usr/share/fonts',
69
+ '/usr/local/share/fonts',
70
+ '/Library/Fonts', # macOS system
71
+ '/System/Library/Fonts', # macOS system (usually contains essential fonts)
72
+ ]
73
+ # User-specific locations (Linux, macOS)
74
+ posix_user_dirs = [
75
+ os.path.expanduser('~/.fonts'),
76
+ os.path.expanduser('~/.local/share/fonts'),
77
+ os.path.expanduser('~/Library/Fonts'), # macOS user
78
+ ]
79
+
80
+ # Add existing system directories
81
+ for d in posix_system_dirs:
82
+ if os.path.isdir(d):
83
+ FONT_DIRS.append(d)
84
+ # Also check common subdirectories like truetype, opentype etc.
85
+ for subdir_type in ['truetype', 'opentype', 'TTF', 'OTF', 'type1', 'bitmap']:
86
+ potential_subdir = os.path.join(d, subdir_type)
87
+ if os.path.isdir(potential_subdir):
88
+ FONT_DIRS.append(potential_subdir)
89
+
90
+ # Add existing user directories
91
+ for d in posix_user_dirs:
92
+ if os.path.isdir(d):
93
+ FONT_DIRS.append(d)
94
+
95
+ # Remove duplicates and ensure directories exist (defensive check)
96
+ _unique_dirs = []
97
+ for d in FONT_DIRS:
98
+ if d not in _unique_dirs and os.path.isdir(d): # Check existence again just in case
99
+ _unique_dirs.append(d)
100
+ FONT_DIRS = _unique_dirs
101
+ # print(f"Searching for fonts in: {FONT_DIRS}") # Optional: for debugging
102
+
103
+
104
+ # --- Caching ---
105
+
106
+ # Cache found font paths (case-insensitive name -> actual path or None)
107
+ _font_path_cache = {}
108
+ # Cache loaded font objects ((actual_path, size) -> font_object)
109
+ _loaded_font_cache = {}
110
+
111
+
112
+ # --- Core Functions ---
113
+
114
+ def find_font_path(font_name):
115
+ """
116
+ Tries to find the full path for a given font file name, case-insensitively.
117
+ Uses a cache to store results of previous searches.
118
+ """
119
+ search_name_lower = font_name.lower()
120
+
121
+ # 1. Check cache first
122
+ if search_name_lower in _font_path_cache:
123
+ return _font_path_cache[search_name_lower] # Return cached path or None
124
+
125
+ # 2. Search in font directories
126
+ for font_dir in FONT_DIRS:
127
+ try:
128
+ # Use os.scandir for potentially better performance than os.listdir+os.path.isfile
129
+ # It yields DirEntry objects with useful attributes/methods.
130
+ # We still need os.walk for subdirectories. Let's stick to os.walk for simplicity
131
+ # unless performance on flat directories becomes a major issue.
132
+ for root, _, files in os.walk(font_dir, topdown=True): # topdown=True might find it faster if shallow
133
+ for file in files:
134
+ if file.lower() == search_name_lower:
135
+ found_path = os.path.join(root, file)
136
+ # Verify it's actually a file (os.walk should only list files, but belts and suspenders)
137
+ if os.path.isfile(found_path):
138
+ # Cache the successful result (using lowercase name as key)
139
+ _font_path_cache[search_name_lower] = found_path
140
+ # print(f"DEBUG: Found '{font_name}' at '{found_path}'") # Optional debug
141
+ return found_path
142
+ except OSError as e:
143
+ # logger.debug(f"Permission error or issue accessing {font_dir}: {e}") # Optional debug
144
+ continue # Ignore inaccessible directories or subdirectories
145
+
146
+ # 3. If not found after searching all directories, cache the failure (None)
147
+ # logger.debug(f"DEBUG: Could not find font '{font_name}' in any search directory.") # Optional debug
148
+ _font_path_cache[search_name_lower] = None
149
+ return None
150
+
151
+
152
+ def get_font(font_size):
153
+ """
154
+ Loads a preferred font from the COMMON_FONT_FILES list at the specified size.
155
+ Performs case-insensitive search and caches loaded fonts for efficiency.
156
+ Falls back to Pillow's default font if none of the preferred fonts are found/loadable.
157
+ """
158
+ global _loaded_font_cache # Allow modification of the global cache
159
+
160
+ # 1. Iterate through the preferred font list
161
+ for font_name in COMMON_FONT_FILES:
162
+ font_path = find_font_path(font_name) # Uses the case-insensitive search + path cache
163
+
164
+ if font_path:
165
+ # 2. Check loaded font cache ((path, size) -> font object)
166
+ cache_key = (font_path, font_size)
167
+ if cache_key in _loaded_font_cache:
168
+ # print(f"DEBUG: Cache hit for {font_path} size {font_size}") # Optional debug
169
+ return _loaded_font_cache[cache_key]
170
+
171
+ # 3. Try to load the font if found and not in cache
172
+ try:
173
+ font = ImageFont.truetype(font_path, font_size)
174
+ _loaded_font_cache[cache_key] = font # Cache the loaded font object
175
+ # logger.info(f"Loaded font: {font_path} at size {font_size}") # Info level might be too verbose
176
+ # print(f"Loaded font: {font_path} at size {font_size}") # Use print for simple feedback
177
+ return font
178
+ except IOError as e:
179
+ logger.warning(
180
+ f"Could not load font file '{font_path}' (found for '{font_name}') at size {font_size}. Reason: {e}")
181
+ except Exception as e:
182
+ logger.error(f"Unexpected error loading font {font_name} ({font_path}): {e}")
183
+ # Continue to next font
184
+ continue
185
+
186
+ # 4. Fallback to Pillow's default font if loop completes without success
187
+ # Use a specific key for the default font in the cache
188
+ default_cache_key = ("_pillow_default_", font_size) # Pillow's default doesn't really resize well
189
+ if default_cache_key in _loaded_font_cache:
190
+ return _loaded_font_cache[default_cache_key]
191
+
192
+ try:
193
+ logger.warning(
194
+ f"No suitable font found from preferred list in system paths. Using Pillow's default font (size {font_size} requested, but default font may not scale).")
195
+ # Note: Default font might not support all characters or sizing well.
196
+ font = ImageFont.load_default()
197
+ _loaded_font_cache[default_cache_key] = font # Cache the default font object
198
+ return font
199
+ except IOError as e:
200
+ logger.critical(
201
+ f"CRITICAL ERROR: Could not load any preferred fonts AND failed to load Pillow's default font! Reason: {e}")
202
+ return None
203
+
204
+
205
+ def check_overlap(box1: Tuple[float, float, float, float],
206
+ box2: Tuple[float, float, float, float]) -> bool:
207
+ """Checks if two rectangular boxes overlap. (Logic unchanged)"""
208
+ l1, t1, r1, b1 = box1
209
+ l2, t2, r2, b2 = box2
210
+ # Check for non-overlap conditions (original logic)
211
+ if r1 <= l2 or r2 <= l1 or b1 <= t2 or b2 <= t1:
212
+ return False
213
+ # Otherwise, they overlap
214
+ return True
215
+
216
+
217
+ def generate_distinct_colors(n):
218
+ """
219
+ Generates n visually distinct colors in RGB format using HSV color space.
220
+ Reorders the generated list deterministically by interleaving even-indexed
221
+ colors with reverse-ordered odd-indexed colors to improve adjacent contrast.
222
+ Example: [0, 1, 2, 3, 4, 5] -> [0, 5, 2, 3, 4, 1]
223
+
224
+ Args:
225
+ n: The number of distinct colors to generate.
226
+
227
+ Returns:
228
+ A list of n tuples, where each tuple represents an RGB color (int 0-255).
229
+ Returns an empty list if n <= 0.
230
+ """
231
+ if n <= 0:
232
+ return []
233
+
234
+ # --- Step 1: Generate colors based on Hue in HSV space ---
235
+ initial_colors = []
236
+ for i in range(n):
237
+ hue = i / n
238
+ # Use high saturation and value for bright colors (parameters from original code)
239
+ saturation = 0.7
240
+ value = 0.8
241
+ rgb_float = colorsys.hsv_to_rgb(hue, saturation, value)
242
+ rgb_int = tuple(int(c * 255) for c in rgb_float)
243
+ initial_colors.append(rgb_int)
244
+
245
+ # Handle cases with 0 or 1 color where reordering is not needed/possible
246
+ if n <= 1:
247
+ return initial_colors
248
+
249
+ # --- Step 2: Separate into even and odd indexed lists ---
250
+ # Colors originally at even indices (0, 2, 4, ...)
251
+ even_indexed_colors = initial_colors[::2]
252
+ # Colors originally at odd indices (1, 3, 5, ...)
253
+ odd_indexed_colors = initial_colors[1::2]
254
+
255
+ # --- Step 3: Reverse the odd indexed list ---
256
+ odd_indexed_colors.reverse() # Reverse in-place is efficient here
257
+
258
+ # --- Step 4: Interleave the lists ---
259
+ reordered_colors = []
260
+ len_odds = len(odd_indexed_colors)
261
+ len_evens = len(even_indexed_colors)
262
+
263
+ # Iterate up to the length of the shorter list (which is odd_indexed_colors)
264
+ for i in range(len_odds):
265
+ reordered_colors.append(even_indexed_colors[i])
266
+ reordered_colors.append(odd_indexed_colors[i])
267
+
268
+ # --- Step 5: Add any remaining element from the longer list ---
269
+ # If n is odd, the even_indexed_colors list will have one more element
270
+ if len_evens > len_odds:
271
+ reordered_colors.append(even_indexed_colors[-1]) # Append the last even element
272
+ random.shuffle(reordered_colors)
273
+ return reordered_colors
274
+
275
+ def calculate_label_placement(
276
+ corner: str,
277
+ outline_box: Tuple[float, float, float, float],
278
+ text_width: float, # Original name: width of the background box
279
+ text_height: float, # Original name: height of the background box
280
+ box_width: float, # Original name: width of the element's outline box
281
+ box_height: float, # Original name: height of the element's outline box
282
+ img_width: int,
283
+ img_height: int
284
+ ) -> Tuple[Optional[Tuple[float, float, float, float]], Optional[Tuple[float, float]]]:
285
+ """
286
+ Calculates the potential background box and text position reference for a label.
287
+ (Logic and parameters identical to original class method).
288
+
289
+ Returns:
290
+ A tuple containing:
291
+ - The calculated background box (l, t, r, b) clamped to image bounds, or None if invalid.
292
+ - The calculated reference position (x, y) (top-left of background), or None if invalid.
293
+ """
294
+ l_outline, t_outline, r_outline, b_outline = outline_box
295
+
296
+ # Determine if text should ideally be placed outside (Original Logic)
297
+ move_text_outside = (text_height >= (box_height * 0.5) or text_width >= (
298
+ box_width * 0.5)) and box_height > 0 and box_width > 0
299
+
300
+ bg_left, bg_top, bg_right, bg_bottom = 0.0, 0.0, 0.0, 0.0
301
+ # Text offset calculation is handled by the caller based on the returned reference point
302
+ # text_x_offset, text_y_offset = 0, 0 # Original logic didn't use these this way
303
+
304
+ # --- Calculate base positions based on corner (Original Logic) ---
305
+ if corner == 'top_right':
306
+ if move_text_outside: # Outside Top-Right
307
+ bg_left = r_outline
308
+ bg_top = t_outline - text_height
309
+ else: # Inside Top-Right
310
+ bg_left = r_outline - text_width
311
+ bg_top = t_outline
312
+ bg_right = bg_left + text_width
313
+ bg_bottom = bg_top + text_height
314
+
315
+ elif corner == 'bottom_right':
316
+ if move_text_outside: # Outside Bottom-Right
317
+ bg_left = r_outline
318
+ bg_top = b_outline
319
+ else: # Inside Bottom-Right
320
+ bg_left = r_outline - text_width
321
+ bg_top = b_outline - text_height
322
+ bg_right = bg_left + text_width
323
+ bg_bottom = bg_top + text_height
324
+
325
+ elif corner == 'bottom_left':
326
+ if move_text_outside: # Outside Bottom-Left
327
+ bg_left = l_outline - text_width
328
+ bg_top = b_outline
329
+ else: # Inside Bottom-Left
330
+ bg_left = l_outline
331
+ bg_top = b_outline - text_height
332
+ bg_right = bg_left + text_width
333
+ bg_bottom = bg_top + text_height
334
+
335
+ elif corner == 'top_left':
336
+ if move_text_outside: # Outside Top-Left
337
+ bg_left = l_outline - text_width
338
+ bg_top = t_outline - text_height
339
+ else: # Inside Top-Left
340
+ bg_left = l_outline
341
+ bg_top = t_outline
342
+ bg_right = bg_left + text_width
343
+ bg_bottom = bg_top + text_height
344
+ else:
345
+ logger.error(f"Invalid corner specified: {corner}")
346
+ return None, None
347
+
348
+ # --- Clamp background box to IMAGE boundaries (Original Logic) ---
349
+ final_bg_left = max(0.0, bg_left)
350
+ final_bg_top = max(0.0, bg_top)
351
+ final_bg_right = min(float(img_width), bg_right)
352
+ final_bg_bottom = min(float(img_height), bg_bottom)
353
+
354
+ # Check if clamping made the box invalid (Original Logic)
355
+ if final_bg_right <= final_bg_left or final_bg_bottom <= final_bg_top:
356
+ return None, None # Indicate invalid placement
357
+
358
+ # --- Calculate reference text position (Top-left of background box) ---
359
+ # The actual draw position will be offset slightly by the caller using '+1, +1' per original code
360
+ final_text_ref_x = final_bg_left
361
+ final_text_ref_y = final_bg_top
362
+
363
+ final_bg_box = (final_bg_left, final_bg_top, final_bg_right, final_bg_bottom)
364
+ final_text_ref_pos = (final_text_ref_x, final_text_ref_y)
365
+
366
+ return final_bg_box, final_text_ref_pos
367
+
368
+
369
+ def highlight_screenshot(screenshot_base64: str, elements: List[List[Any]]) -> str:
370
+ """
371
+ Draws highlighted bounding boxes with index numbers (avoiding label overlap)
372
+ on a screenshot, using standalone functions. **Parameters and core logic
373
+ are preserved exactly from the user's provided class-based version.**
374
+
375
+ Args:
376
+ screenshot_base64: The screenshot image encoded in base64.
377
+ elements: A list where each item is another list:
378
+ [highlight_index: int, box_coords: List[float]]
379
+ Box coordinates are [x1, y1, x2, y2] relative to the screenshot.
380
+
381
+ Returns:
382
+ A base64 encoded string of the highlighted screenshot (PNG format),
383
+ or the original base64 string if errors occur or no valid elements
384
+ are provided.
385
+ """
386
+ if not elements:
387
+ logger.warning("No elements provided to highlight.")
388
+ return screenshot_base64
389
+
390
+ # Filter elements based on the new list structure - basic validation
391
+ valid_elements = []
392
+ seen_indices = set()
393
+ for i, element_item in enumerate(elements):
394
+ if (isinstance(element_item, (list, tuple)) and len(element_item) >= 2 and
395
+ isinstance(element_item[0], int) and # Check index type
396
+ isinstance(element_item[1], (list, tuple)) and len(element_item[1]) == 4): # Check box structure
397
+ try:
398
+ # Validate box coords are numeric and index is unique
399
+ box_coords = [float(c) for c in element_item[1]]
400
+ highlight_index = element_item[0]
401
+ if highlight_index in seen_indices:
402
+ logger.warning(
403
+ f"Skipping element at raw index {i} due to duplicate highlight_index: {highlight_index}")
404
+ continue
405
+
406
+ # Check for non-negative index if required (original code didn't explicitly)
407
+ if highlight_index < 0:
408
+ logger.warning(
409
+ f"Skipping element at raw index {i} due to negative highlight_index: {highlight_index}")
410
+ continue
411
+
412
+ valid_elements.append([highlight_index, box_coords]) # Use validated coords
413
+ seen_indices.add(highlight_index)
414
+ except (ValueError, TypeError):
415
+ logger.warning(f"Skipping element at raw index {i} due to invalid box coordinates: {element_item[1]}")
416
+ else:
417
+ logger.warning(
418
+ f"Skipping element at raw index {i} due to invalid structure or types. Expected [int, [x1,y1,x2,y2]], got: {element_item}")
419
+
420
+ if not valid_elements:
421
+ logger.warning("No valid elements found after filtering.")
422
+ return screenshot_base64
423
+
424
+ # Sort elements by highlight_index (first item in inner list) - REQUIRED for consistent color
425
+ # The conversion function already sorts, but doing it again handles direct list input.
426
+ try:
427
+ valid_elements.sort(key=lambda el: el[0])
428
+ except Exception as e:
429
+ logger.error(f"Error sorting elements: {e}. Proceeding unsorted (color assignment may be inconsistent).")
430
+
431
+ # --- Image Loading ---
432
+ try:
433
+ image_data = base64.b64decode(screenshot_base64)
434
+ image = Image.open(io.BytesIO(image_data)).convert("RGBA")
435
+ except Exception as e:
436
+ logger.error(f"Error decoding or opening image: {e}")
437
+ return screenshot_base64
438
+
439
+ img_width, img_height = image.size
440
+ if img_width <= 0 or img_height <= 0:
441
+ logger.error(f"Invalid image dimensions: {image.size}")
442
+ return screenshot_base64
443
+
444
+ # --- Setup Drawing ---
445
+ fill_overlay = Image.new('RGBA', image.size, (255, 255, 255, 0))
446
+ draw_fill = ImageDraw.Draw(fill_overlay)
447
+
448
+ num_elements = len(valid_elements)
449
+ colors = generate_distinct_colors(num_elements)
450
+ fill_alpha = int(0.3 * 255) # ** PARAMETER FROM ORIGINAL CODE **
451
+
452
+ # --- Pass 1: Draw semi-transparent fills (Logic unchanged) ---
453
+ for i, element_item in enumerate(valid_elements):
454
+ highlight_index = element_item[0] # Now index 0
455
+ box_coords = element_item[1] # Now index 1
456
+ rel_left, rel_top, rel_right, rel_bottom = box_coords
457
+
458
+ rel_left = max(min(rel_left, img_width), 0)
459
+ rel_right = max(min(rel_right, img_width), 0)
460
+ rel_top = max(min(rel_top, img_height), 0)
461
+ rel_bottom = max(min(rel_bottom, img_height), 0)
462
+
463
+ # Validation and clipping (Logic unchanged)
464
+ if rel_right <= rel_left or rel_bottom <= rel_top:
465
+ logger.debug(
466
+ f"Skipping fill for element index {highlight_index} due to invalid box dimensions: {box_coords}")
467
+ continue
468
+ # if rel_right <= 0 or rel_bottom <= 0 or rel_left >= img_width or rel_top >= img_height:
469
+ # logger.debug(
470
+ # f"Skipping fill for element index {highlight_index} as it's outside image bounds: {box_coords}")
471
+ # continue
472
+
473
+ draw_box = (max(0.0, rel_left), max(0.0, rel_top),
474
+ min(float(img_width), rel_right), min(float(img_height), rel_bottom))
475
+
476
+ color_rgb = colors[i % num_elements] # Use 'i' from loop for color consistency
477
+ fill_color = (*color_rgb, fill_alpha)
478
+
479
+ try:
480
+ if draw_box[2] > draw_box[0] and draw_box[3] > draw_box[1]:
481
+ draw_fill.rectangle(draw_box, fill=fill_color)
482
+ except Exception as draw_e:
483
+ logger.error(f"Error drawing fill for element index {highlight_index}, Box: {draw_box}: {draw_e}")
484
+
485
+ # --- Composite the fill overlay (Logic unchanged) ---
486
+ try:
487
+ image = Image.alpha_composite(image, fill_overlay)
488
+ except ValueError as e:
489
+ logger.error(f"Error during alpha compositing: {e}. Check image modes.")
490
+ # Fallback: Continue drawing on the original image without the overlay
491
+ # Note: Fills will not be semi-transparent in this fallback case.
492
+ image = Image.open(io.BytesIO(image_data)).convert("RGBA") # Re-load original
493
+ logger.warning("Compositing failed. Drawing outlines/text on original image.")
494
+ # Intentionally not re-drawing fills here to avoid opaque blocks
495
+
496
+ draw_main = ImageDraw.Draw(image)
497
+
498
+ # --- Pass 2: Draw outlines and text (Parameters and logic identical to original) ---
499
+ placed_label_boxes: List[Tuple[float, float, float, float]] = []
500
+ corners_to_try = ['top_right', 'bottom_right', 'bottom_left', 'top_left'] # ** PARAMETER FROM ORIGINAL CODE **
501
+
502
+ for i, element_item in enumerate(valid_elements):
503
+ highlight_index = element_item[0]
504
+ box_coords = element_item[1]
505
+ label = str(highlight_index)
506
+ color_rgb = colors[i % num_elements] # Use 'i' from loop for color
507
+ outline_width = 2 # ** PARAMETER FROM ORIGINAL CODE **
508
+
509
+ rel_left, rel_top, rel_right, rel_bottom = box_coords
510
+
511
+ # Re-validate (Logic unchanged)
512
+ if rel_right <= rel_left or rel_bottom <= rel_top: continue
513
+ if rel_right <= 0 or rel_bottom <= 0 or rel_left >= img_width or rel_top >= img_height: continue
514
+
515
+ draw_box_outline = (max(0.0, rel_left), max(0.0, rel_top),
516
+ min(float(img_width), rel_right),
517
+ min(float(img_height), rel_bottom))
518
+
519
+ box_width = draw_box_outline[2] - draw_box_outline[0]
520
+ box_height = draw_box_outline[3] - draw_box_outline[1]
521
+ if box_width <= 0 or box_height <= 0: continue
522
+
523
+ # --- Dynamic Font Size Calculation (Formula from original code) ---
524
+ min_dim = min(box_width, box_height)
525
+ font_size = max(25, min(35, int(min_dim * 0.4))) # ** FORMULA FROM ORIGINAL CODE **
526
+ font = get_font(font_size)
527
+
528
+ if not font:
529
+ logger.warning(f"Could not load font for index {highlight_index}, skipping text label.")
530
+ try: # Still draw outline
531
+ if draw_box_outline[2] > draw_box_outline[0] and draw_box_outline[3] > draw_box_outline[1]:
532
+ draw_main.rectangle(draw_box_outline, outline=color_rgb, width=outline_width)
533
+ except Exception as draw_e:
534
+ logger.error(f"Error drawing outline for index {highlight_index} (no font): {draw_e}")
535
+ continue
536
+
537
+ # --- Estimate text bounding box size (Logic and padding from original code) ---
538
+ try:
539
+ text_bbox = draw_main.textbbox((0, 0), label, font=font, stroke_width=0, align="center", anchor='lt')
540
+ text_render_width = text_bbox[2] - text_bbox[0]
541
+ text_render_height = text_bbox[3] - text_bbox[1]
542
+ # Padding calculation from original code
543
+ render_width = min(text_render_width, text_render_width) # Original code had this redundancy
544
+ h_padding = render_width // 6 # ** FORMULA FROM ORIGINAL CODE **
545
+ w_padding = render_width // 6 # ** FORMULA FROM ORIGINAL CODE **
546
+ # Total background dimensions needed
547
+ label_bg_width = text_render_width + w_padding
548
+ label_bg_height = text_render_height + h_padding
549
+ except AttributeError: # Fallback logic from original code
550
+ logger.debug("Using font.getsize fallback for text dimensions.")
551
+ try:
552
+ text_render_width, text_render_height = draw_main.textlength(label, font=font), font.size
553
+ except AttributeError:
554
+ text_render_width = len(label) * font_size * 0.6
555
+ text_render_height = font_size
556
+ # Padding calculation from original code (repeated)
557
+ render_width = min(text_render_width, text_render_width)
558
+ h_padding = render_width // 6 # ** FORMULA FROM ORIGINAL CODE **
559
+ w_padding = render_width // 6 # ** FORMULA FROM ORIGINAL CODE **
560
+ label_bg_width = text_render_width + w_padding
561
+ label_bg_height = text_render_height + h_padding
562
+ except Exception as tb_e:
563
+ logger.error(f"Error calculating text size for index {highlight_index}: {tb_e}. Using estimate.")
564
+ # Fallback estimate from original code
565
+ label_bg_width = len(label) * font_size * 0.8
566
+ label_bg_height = font_size * 1.5
567
+
568
+ # --- Find Non-Overlapping Label Position (Logic unchanged) ---
569
+ chosen_label_bg_box = None
570
+ chosen_text_pos = None # Final position for draw_main.text()
571
+ found_non_overlapping_spot = False
572
+
573
+ for corner_choice in corners_to_try:
574
+ potential_bg_box, potential_text_ref_pos = calculate_label_placement(
575
+ corner=corner_choice,
576
+ outline_box=draw_box_outline,
577
+ text_width=label_bg_width, # Use calculated background width
578
+ text_height=label_bg_height, # Use calculated background height
579
+ box_width=box_width, # Use element box width
580
+ box_height=box_height, # Use element box height
581
+ img_width=img_width,
582
+ img_height=img_height
583
+ )
584
+
585
+ if potential_bg_box is None: continue
586
+
587
+ if potential_bg_box[0] < 0 or potential_bg_box[1] < 0 or potential_bg_box[2] >= img_width or \
588
+ potential_bg_box[3] >= img_height:
589
+ continue
590
+
591
+ overlaps = any(check_overlap(potential_bg_box, placed_box) for placed_box in placed_label_boxes)
592
+
593
+ if not overlaps:
594
+ chosen_label_bg_box = potential_bg_box
595
+ # Text position adjustment from original code
596
+ chosen_text_pos = (
597
+ potential_text_ref_pos[0] + w_padding // 2,
598
+ potential_text_ref_pos[1] + h_padding // 2) # ** OFFSET FROM ORIGINAL CODE **
599
+ found_non_overlapping_spot = True
600
+ break
601
+
602
+ # --- Default if all corners overlap (Logic unchanged) ---
603
+ if not found_non_overlapping_spot:
604
+ logger.debug(f"Could not avoid label overlap for index {highlight_index}. Defaulting to top-left.")
605
+ chosen_label_bg_box, potential_text_ref_pos = calculate_label_placement(
606
+ corner='top_left', # Default corner from original code
607
+ outline_box=draw_box_outline,
608
+ text_width=label_bg_width,
609
+ text_height=label_bg_height,
610
+ box_width=box_width,
611
+ box_height=box_height,
612
+ img_width=img_width,
613
+ img_height=img_height
614
+ )
615
+ if chosen_label_bg_box and potential_text_ref_pos:
616
+ # Text position adjustment from original code
617
+ chosen_text_pos = (
618
+ potential_text_ref_pos[0] + w_padding // 2,
619
+ potential_text_ref_pos[1] + h_padding // 2) # ** OFFSET FROM ORIGINAL CODE **
620
+ else:
621
+ logger.debug(f"Default top-left placement failed for index {highlight_index}. Skipping label.")
622
+ chosen_label_bg_box = None
623
+ chosen_text_pos = None
624
+
625
+ # --- Draw Outline, Label Background, and Text (Logic unchanged) ---
626
+ try:
627
+ # 1. Draw Outline
628
+ if draw_box_outline[2] > draw_box_outline[0] and draw_box_outline[3] > draw_box_outline[1]:
629
+ draw_main.rectangle(draw_box_outline, outline=color_rgb, width=outline_width)
630
+
631
+ # 2. Draw Label (if valid position found)
632
+ if chosen_label_bg_box and chosen_text_pos:
633
+ # Ensure background box is valid before drawing
634
+ if chosen_label_bg_box[2] > chosen_label_bg_box[0] and chosen_label_bg_box[3] > chosen_label_bg_box[1]:
635
+ draw_main.rectangle(chosen_label_bg_box, fill=color_rgb)
636
+
637
+ # Check text position is within image bounds before drawing
638
+ if chosen_text_pos[0] < img_width and chosen_text_pos[1] < img_height:
639
+ # Text drawing call from original code
640
+ draw_main.text(chosen_text_pos, label, fill="white", font=font, stroke_width=0, align="center",
641
+ anchor='lt')
642
+
643
+ # Add *after* successful drawing attempt (Logic unchanged)
644
+ placed_label_boxes.append(chosen_label_bg_box)
645
+ else:
646
+ logger.warning(
647
+ f"Skipping label for index {highlight_index} due to invalid final background box: {chosen_label_bg_box}")
648
+
649
+ except Exception as draw_e:
650
+ logger.error(
651
+ f"Error during final drawing for index {highlight_index}, Box: {draw_box_outline}, LabelBox: {chosen_label_bg_box}): {draw_e}")
652
+
653
+ # --- Encode final image (Logic unchanged) ---
654
+ try:
655
+ buffered = io.BytesIO()
656
+ image.save(buffered, format="PNG")
657
+ highlighted_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
658
+ return highlighted_base64
659
+ except Exception as e:
660
+ logger.error(f"Error encoding final image to base64: {e}")
661
+ return screenshot_base64
662
+
663
+
664
+ @observe_debug(ignore_input=True, ignore_output=True, name='create_highlighted_screenshot')
665
+ def create_highlighted_screenshot(
666
+ screenshot_b64: str,
667
+ selector_map: DOMSelectorMap,
668
+ device_pixel_ratio: float = 1.0,
669
+ viewport_offset_x: int = 0,
670
+ viewport_offset_y: int = 0,
671
+ ) -> str:
672
+ """Create a highlighted screenshot with bounding boxes around interactive elements.
673
+ Args:
674
+ screenshot_b64: Base64 encoded screenshot
675
+ selector_map: Map of interactive elements with their positions
676
+ device_pixel_ratio: Device pixel ratio for scaling coordinates
677
+ viewport_offset_x: X offset for viewport positioning
678
+ viewport_offset_y: Y offset for viewport positioning
679
+ Returns:
680
+ Base64 encoded highlighted screenshot
681
+ """
682
+ try:
683
+ # Decode screenshot
684
+ screenshot_data = base64.b64decode(screenshot_b64)
685
+ image = Image.open(io.BytesIO(screenshot_data)).convert('RGBA')
686
+
687
+ # Process each interactive element
688
+ valid_elements = []
689
+ for element_id, element in selector_map.items():
690
+ try:
691
+ # Use snapshot bounds (document coordinates) if available, otherwise absolute_position
692
+ bounds = None
693
+ if element.snapshot_node and element.snapshot_node.bounds:
694
+ bounds = element.snapshot_node.bounds
695
+ elif element.absolute_position:
696
+ bounds = element.absolute_position
697
+
698
+ if not bounds:
699
+ continue
700
+
701
+ # Convert from CSS pixels to device pixels for screenshot coordinates
702
+ # Note: bounds are already in CSS pixels, screenshot is in device pixels
703
+ x1 = int((bounds.x - viewport_offset_x) * device_pixel_ratio)
704
+ y1 = int((bounds.y - viewport_offset_y) * device_pixel_ratio)
705
+ x2 = int((bounds.x + bounds.width - viewport_offset_x) * device_pixel_ratio)
706
+ y2 = int((bounds.y + bounds.height - viewport_offset_y) * device_pixel_ratio)
707
+
708
+ # Ensure coordinates are within image bounds
709
+ img_width, img_height = image.size
710
+ x1 = max(0, min(x1, img_width))
711
+ y1 = max(0, min(y1, img_height))
712
+ x2 = max(x1, min(x2, img_width))
713
+ y2 = max(y1, min(y2, img_height))
714
+
715
+ # Skip if bounding box is too small or invalid
716
+ if x2 - x1 < 2 or y2 - y1 < 2:
717
+ continue
718
+
719
+ valid_elements.append([element_id, [x1, y1, x2, y2]])
720
+
721
+ except Exception as e:
722
+ logger.debug(f'Failed to draw highlight for element {element_id}: {e}')
723
+ continue
724
+
725
+ highlighted_b64 = highlight_screenshot(screenshot_b64, valid_elements)
726
+
727
+ logger.debug(f'Successfully created highlighted screenshot with {len(selector_map)} elements')
728
+ return highlighted_b64
729
+
730
+ except Exception as e:
731
+ logger.error(f'Failed to create highlighted screenshot: {e}')
732
+ # Return original screenshot on error
733
+ return screenshot_b64
734
+
735
+
736
+ async def get_viewport_info_from_cdp(cdp_session) -> Tuple[float, int, int]:
737
+ """Get viewport information from CDP session.
738
+ Returns:
739
+ Tuple of (device_pixel_ratio, scroll_x, scroll_y)
740
+ """
741
+ try:
742
+ # Get layout metrics which includes viewport info and device pixel ratio
743
+ metrics = await cdp_session.cdp_client.send.Page.getLayoutMetrics(session_id=cdp_session.session_id)
744
+
745
+ # Extract viewport information
746
+ visual_viewport = metrics.get('visualViewport', {})
747
+ css_visual_viewport = metrics.get('cssVisualViewport', {})
748
+ css_layout_viewport = metrics.get('cssLayoutViewport', {})
749
+
750
+ # Calculate device pixel ratio
751
+ css_width = css_visual_viewport.get('clientWidth', css_layout_viewport.get('clientWidth', 1280.0))
752
+ device_width = visual_viewport.get('clientWidth', css_width)
753
+ device_pixel_ratio = device_width / css_width if css_width > 0 else 1.0
754
+
755
+ # Get scroll position in CSS pixels
756
+ scroll_x = int(css_visual_viewport.get('pageX', 0))
757
+ scroll_y = int(css_visual_viewport.get('pageY', 0))
758
+
759
+ return float(device_pixel_ratio), scroll_x, scroll_y
760
+
761
+ except Exception as e:
762
+ logger.debug(f'Failed to get viewport info from CDP: {e}')
763
+ return 1.0, 0, 0
764
+
765
+
766
+ @observe_debug(ignore_input=True, ignore_output=True, name='create_highlighted_screenshot_async')
767
+ async def create_highlighted_screenshot_async(screenshot_b64: str, selector_map: DOMSelectorMap,
768
+ cdp_session=None) -> str:
769
+ """Async wrapper for creating highlighted screenshots.
770
+ Args:
771
+ screenshot_b64: Base64 encoded screenshot
772
+ selector_map: Map of interactive elements
773
+ cdp_session: CDP session for getting viewport info
774
+ Returns:
775
+ Base64 encoded highlighted screenshot
776
+ """
777
+ # Get viewport information if CDP session is available
778
+ device_pixel_ratio = 1.0
779
+ viewport_offset_x = 0
780
+ viewport_offset_y = 0
781
+
782
+ if cdp_session:
783
+ try:
784
+ device_pixel_ratio, viewport_offset_x, viewport_offset_y = await get_viewport_info_from_cdp(cdp_session)
785
+ except Exception as e:
786
+ logger.debug(f'Failed to get viewport info from CDP: {e}')
787
+
788
+ # Create highlighted screenshot (run in thread pool if needed for performance)
789
+ return create_highlighted_screenshot(screenshot_b64, selector_map, device_pixel_ratio, viewport_offset_x,
790
+ viewport_offset_y)