vibesurf 0.1.24__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (124) hide show
  1. {vibesurf-0.1.24 → vibesurf-0.1.25}/PKG-INFO +1 -1
  2. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/_version.py +3 -3
  3. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/llm/openai_compatible.py +0 -1
  4. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/finance_tools.py +75 -32
  5. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/vibesurf_tools.py +257 -27
  6. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.egg-info/PKG-INFO +1 -1
  7. {vibesurf-0.1.24 → vibesurf-0.1.25}/.env.example +0 -0
  8. {vibesurf-0.1.24 → vibesurf-0.1.25}/.github/workflows/publish.yml +0 -0
  9. {vibesurf-0.1.24 → vibesurf-0.1.25}/.gitignore +0 -0
  10. {vibesurf-0.1.24 → vibesurf-0.1.25}/.python-version +0 -0
  11. {vibesurf-0.1.24 → vibesurf-0.1.25}/LICENSE +0 -0
  12. {vibesurf-0.1.24 → vibesurf-0.1.25}/MANIFEST.in +0 -0
  13. {vibesurf-0.1.24 → vibesurf-0.1.25}/README.md +0 -0
  14. {vibesurf-0.1.24 → vibesurf-0.1.25}/docs/EXECUTABLE_BUILD.md +0 -0
  15. {vibesurf-0.1.24 → vibesurf-0.1.25}/docs/PYPI_SETUP.md +0 -0
  16. {vibesurf-0.1.24 → vibesurf-0.1.25}/pyproject.toml +0 -0
  17. {vibesurf-0.1.24 → vibesurf-0.1.25}/scripts/build-local.bat +0 -0
  18. {vibesurf-0.1.24 → vibesurf-0.1.25}/scripts/build-local.sh +0 -0
  19. {vibesurf-0.1.24 → vibesurf-0.1.25}/setup.cfg +0 -0
  20. {vibesurf-0.1.24 → vibesurf-0.1.25}/tests/test_agents.py +0 -0
  21. {vibesurf-0.1.24 → vibesurf-0.1.25}/tests/test_backend_api.py +0 -0
  22. {vibesurf-0.1.24 → vibesurf-0.1.25}/tests/test_browser.py +0 -0
  23. {vibesurf-0.1.24 → vibesurf-0.1.25}/tests/test_tools.py +0 -0
  24. {vibesurf-0.1.24 → vibesurf-0.1.25}/tests/test_voice_api.py +0 -0
  25. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/__init__.py +0 -0
  26. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/__init__.py +0 -0
  27. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/browser_use_agent.py +0 -0
  28. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/prompts/__init__.py +0 -0
  29. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/prompts/report_writer_prompt.py +0 -0
  30. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/prompts/vibe_surf_prompt.py +0 -0
  31. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/report_writer_agent.py +0 -0
  32. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/vibe_surf_agent.py +0 -0
  33. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/agents/views.py +0 -0
  34. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/__init__.py +0 -0
  35. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/__init__.py +0 -0
  36. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/activity.py +0 -0
  37. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/agent.py +0 -0
  38. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/browser.py +0 -0
  39. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/config.py +0 -0
  40. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/files.py +0 -0
  41. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/models.py +0 -0
  42. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/task.py +0 -0
  43. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/api/voices.py +0 -0
  44. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/__init__.py +0 -0
  45. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/manager.py +0 -0
  46. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/migrations/v001_initial_schema.sql +0 -0
  47. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/migrations/v002_add_agent_mode.sql +0 -0
  48. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/migrations/v003_fix_task_status_case.sql +0 -0
  49. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/migrations/v004_add_voice_profiles.sql +0 -0
  50. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/models.py +0 -0
  51. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/queries.py +0 -0
  52. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/database/schemas.py +0 -0
  53. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/llm_config.py +0 -0
  54. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/main.py +0 -0
  55. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/shared_state.py +0 -0
  56. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/utils/__init__.py +0 -0
  57. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/utils/encryption.py +0 -0
  58. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/utils/llm_factory.py +0 -0
  59. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/backend/voice_model_config.py +0 -0
  60. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/__init__.py +0 -0
  61. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/agen_browser_profile.py +0 -0
  62. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/agent_browser_session.py +0 -0
  63. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/browser_manager.py +0 -0
  64. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/utils.py +0 -0
  65. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/watchdogs/__init__.py +0 -0
  66. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/watchdogs/action_watchdog.py +0 -0
  67. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/browser/watchdogs/dom_watchdog.py +0 -0
  68. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/background.js +0 -0
  69. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/config.js +0 -0
  70. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/content.js +0 -0
  71. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/dev-reload.js +0 -0
  72. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/icons/logo.icns +0 -0
  73. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/icons/logo.png +0 -0
  74. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/manifest.json +0 -0
  75. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/permission-iframe.html +0 -0
  76. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/permission-request.html +0 -0
  77. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/popup.html +0 -0
  78. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/api-client.js +0 -0
  79. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/file-manager.js +0 -0
  80. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/history-manager.js +0 -0
  81. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/main.js +0 -0
  82. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/markdown-it.min.js +0 -0
  83. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/modal-manager.js +0 -0
  84. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/permission-iframe-request.js +0 -0
  85. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/permission-request.js +0 -0
  86. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/session-manager.js +0 -0
  87. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/settings-manager.js +0 -0
  88. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/ui-manager.js +0 -0
  89. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/user-settings-storage.js +0 -0
  90. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/scripts/voice-recorder.js +0 -0
  91. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/sidepanel.html +0 -0
  92. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/activity.css +0 -0
  93. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/animations.css +0 -0
  94. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/base.css +0 -0
  95. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/components.css +0 -0
  96. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/history-modal.css +0 -0
  97. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/input.css +0 -0
  98. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/layout.css +0 -0
  99. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/responsive.css +0 -0
  100. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/settings-environment.css +0 -0
  101. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/settings-forms.css +0 -0
  102. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/settings-modal.css +0 -0
  103. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/settings-profiles.css +0 -0
  104. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/settings-responsive.css +0 -0
  105. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/settings-utilities.css +0 -0
  106. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/chrome_extension/styles/variables.css +0 -0
  107. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/cli.py +0 -0
  108. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/common.py +0 -0
  109. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/llm/__init__.py +0 -0
  110. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/logger.py +0 -0
  111. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/__init__.py +0 -0
  112. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/browser_use_tools.py +0 -0
  113. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/file_system.py +0 -0
  114. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/mcp_client.py +0 -0
  115. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/report_writer_tools.py +0 -0
  116. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/vibesurf_registry.py +0 -0
  117. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/views.py +0 -0
  118. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibe_surf/tools/voice_asr.py +0 -0
  119. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.egg-info/SOURCES.txt +0 -0
  120. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.egg-info/dependency_links.txt +0 -0
  121. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.egg-info/entry_points.txt +0 -0
  122. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.egg-info/requires.txt +0 -0
  123. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.egg-info/top_level.txt +0 -0
  124. {vibesurf-0.1.24 → vibesurf-0.1.25}/vibesurf.spec +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibesurf
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: VibeSurf: A powerful browser assistant for vibe surfing
5
5
  Author: Shao Warm
6
6
  License: Apache-2.0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.24'
32
- __version_tuple__ = version_tuple = (0, 1, 24)
31
+ __version__ = version = '0.1.25'
32
+ __version_tuple__ = version_tuple = (0, 1, 25)
33
33
 
34
- __commit_id__ = commit_id = 'gebf62a182'
34
+ __commit_id__ = commit_id = 'gb2d55c9c9'
@@ -337,7 +337,6 @@ class ChatOpenAICompatible(ChatOpenAI):
337
337
  try:
338
338
  parsed = output_format.model_validate_json(output_content)
339
339
  except Exception as e:
340
- pdb.set_trace()
341
340
  repair_content = repair_json(output_content)
342
341
  parsed = output_format.model_validate_json(repair_content)
343
342
 
@@ -8,6 +8,8 @@ from typing import Dict, List, Any, Optional, Union
8
8
  from datetime import datetime, timedelta
9
9
  import yfinance as yf
10
10
  import pandas as pd
11
+ from datetime import datetime
12
+
11
13
  from vibe_surf.logger import get_logger
12
14
 
13
15
  logger = get_logger(__name__)
@@ -445,33 +447,58 @@ class FinanceMarkdownFormatter:
445
447
  return "No news available.\n"
446
448
 
447
449
  markdown = f"**Total News Articles:** {len(news)}\n\n"
448
- pdb.set_trace()
449
450
  for i, article in enumerate(news, 1):
450
451
  if isinstance(article, dict):
451
- # Try different possible field names for title
452
- title = (article.get('title') or
453
- article.get('headline') or
454
- article.get('summary') or
452
+ # Handle new yfinance news structure with nested 'content'
453
+ content = article.get('content', article) # Fallback to article itself for backwards compatibility
454
+
455
+ # Extract title
456
+ title = (content.get('title') or
457
+ content.get('headline') or
458
+ content.get('summary') or
459
+ article.get('title') or # Fallback to old format
455
460
  'No title available')
456
461
 
457
- # Try different possible field names for link/URL
458
- link = (article.get('link') or
459
- article.get('url') or
460
- article.get('guid') or '')
462
+ # Extract content type if available
463
+ content_type = content.get('contentType', '')
464
+ type_emoji = "🎥" if content_type == "VIDEO" else "📰"
461
465
 
462
- # Try different possible field names for publisher
463
- publisher = (article.get('publisher') or
464
- article.get('source') or
465
- article.get('author') or
466
- 'Unknown')
466
+ # Extract link/URL - try new nested structure first
467
+ link = ''
468
+ if 'canonicalUrl' in content and isinstance(content['canonicalUrl'], dict):
469
+ link = content['canonicalUrl'].get('url', '')
470
+ elif 'clickThroughUrl' in content and isinstance(content['clickThroughUrl'], dict):
471
+ link = content['clickThroughUrl'].get('url', '')
472
+ else:
473
+ # Fallback to old format
474
+ link = (content.get('link') or
475
+ content.get('url') or
476
+ content.get('guid') or
477
+ article.get('link') or '')
478
+
479
+ # Extract publisher - try new nested structure first
480
+ publisher = 'Unknown'
481
+ if 'provider' in content and isinstance(content['provider'], dict):
482
+ publisher = content['provider'].get('displayName', 'Unknown')
483
+ else:
484
+ # Fallback to old format
485
+ publisher = (content.get('publisher') or
486
+ content.get('source') or
487
+ content.get('author') or
488
+ article.get('publisher') or
489
+ 'Unknown')
467
490
 
468
- # Try different possible field names for timestamp
469
- publish_time = (article.get('providerPublishTime') or
470
- article.get('timestamp') or
471
- article.get('pubDate') or
472
- article.get('published') or '')
491
+ # Extract publication time
492
+ publish_time = (content.get('pubDate') or
493
+ content.get('providerPublishTime') or
494
+ content.get('timestamp') or
495
+ content.get('published') or
496
+ article.get('providerPublishTime') or '')
473
497
 
474
- markdown += f"### {i}. {title}\n"
498
+ # Format the article
499
+ markdown += f"### {type_emoji} {i}. {title}\n"
500
+ if content_type:
501
+ markdown += f"**Type:** {content_type}\n"
475
502
  markdown += f"**Publisher:** {publisher}\n"
476
503
 
477
504
  if publish_time:
@@ -481,11 +508,16 @@ class FinanceMarkdownFormatter:
481
508
  dt = datetime.fromtimestamp(publish_time)
482
509
  markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M')}\n"
483
510
  elif isinstance(publish_time, str):
484
- # Try to parse string timestamp
511
+ # Try to parse ISO format first (new format)
485
512
  try:
486
- publish_time_int = int(float(publish_time))
487
- dt = datetime.fromtimestamp(publish_time_int)
488
- markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M')}\n"
513
+ if publish_time.endswith('Z'):
514
+ dt = datetime.fromisoformat(publish_time.replace('Z', '+00:00'))
515
+ markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M UTC')}\n"
516
+ else:
517
+ # Try to parse as Unix timestamp
518
+ publish_time_int = int(float(publish_time))
519
+ dt = datetime.fromtimestamp(publish_time_int)
520
+ markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M')}\n"
489
521
  except:
490
522
  markdown += f"**Published:** {publish_time}\n"
491
523
  except Exception as e:
@@ -496,14 +528,25 @@ class FinanceMarkdownFormatter:
496
528
  markdown += f"**Link:** {link}\n"
497
529
 
498
530
  # Add summary or description if available
499
- summary = (article.get('summary') or
500
- article.get('description') or
501
- article.get('snippet') or '')
531
+ summary = (content.get('summary') or
532
+ content.get('description') or
533
+ content.get('snippet') or
534
+ article.get('summary') or '')
502
535
  if summary and summary != title:
536
+ # Clean HTML tags from description if present
537
+ import re
538
+ clean_summary = re.sub(r'<[^>]+>', '', summary)
539
+ clean_summary = re.sub(r'\s+', ' ', clean_summary).strip()
540
+
503
541
  # Limit summary length
504
- if len(summary) > 200:
505
- summary = summary[:200] + "..."
506
- markdown += f"**Summary:** {summary}\n"
542
+ if len(clean_summary) > 300:
543
+ clean_summary = clean_summary[:300] + "..."
544
+ markdown += f"**Summary:** {clean_summary}\n"
545
+
546
+ # Add metadata if available
547
+ if 'metadata' in content and isinstance(content['metadata'], dict):
548
+ if content['metadata'].get('editorsPick'):
549
+ markdown += f"**Editor's Pick:** ✅\n"
507
550
 
508
551
  markdown += "\n"
509
552
 
@@ -514,10 +557,10 @@ class FinanceMarkdownFormatter:
514
557
  """Format dividend data as markdown"""
515
558
  if dividends.empty:
516
559
  return "No dividend data available.\n"
517
-
560
+
518
561
  markdown = f"**Total Dividends Recorded:** {len(dividends)}\n"
519
562
  markdown += f"**Date Range:** {dividends.index.min().strftime('%Y-%m-%d')} to {dividends.index.max().strftime('%Y-%m-%d')}\n\n"
520
-
563
+
521
564
  # Recent dividends (last 10)
522
565
  recent_dividends = dividends.tail(10)
523
566
  markdown += "### 💰 Recent Dividends\n\n"
@@ -196,6 +196,7 @@ class VibeSurfTools:
196
196
  raise RuntimeError("LLM is required for skill_search")
197
197
 
198
198
  # Step 1: Use LLM to analyze user intent and generate different search tasks
199
+ query_num = 6
199
200
  from datetime import datetime
200
201
  analysis_prompt = f"""
201
202
  Analyze the user query and generate 5 different Google search strategies to comprehensively find relevant information.
@@ -204,13 +205,13 @@ Current Time: {datetime.now().isoformat()}
204
205
 
205
206
  User Query: "{params.query}"
206
207
 
207
- Generate 5 different search queries that approach this topic from different angles. Each search should be:
208
+ Generate {query_num} different search queries that approach this topic from different angles. Each search should be:
208
209
  1. Specific and concrete (good for Google search)
209
210
  2. Different from the others (different perspectives/aspects)
210
211
  3. Likely to return valuable, unique information
211
212
 
212
- Return your response as a JSON array of 5 search query strings.
213
- Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
213
+ Return your response as a JSON array of {query_num} search query strings.
214
+ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5", "query 6"]
214
215
  """
215
216
 
216
217
  from browser_use.llm.messages import SystemMessage, UserMessage
@@ -225,7 +226,7 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
225
226
  search_queries = json.loads(response.completion.strip())
226
227
  if not isinstance(search_queries, list):
227
228
  raise ValueError("Invalid search queries format")
228
- search_queries = search_queries[:5]
229
+ search_queries = search_queries[:query_num]
229
230
  except (json.JSONDecodeError, ValueError):
230
231
  # Fallback to simple queries if parsing fails
231
232
  try:
@@ -258,7 +259,6 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
258
259
  search_tasks.append(self._perform_google_search(browser_session, query, llm))
259
260
 
260
261
  search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
261
-
262
262
  # Step 4: Aggregate and filter results
263
263
  all_results = []
264
264
  for i, result in enumerate(search_results):
@@ -268,18 +268,24 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
268
268
  if result:
269
269
  all_results.extend(result)
270
270
 
271
- # Step 5: Use LLM to deduplicate and rank top 10 results
272
- if all_results:
271
+ # Step 4.5: Rule-based deduplication to reduce LLM processing load
272
+ # if all_results:
273
+ # deduplicated_results = self._rule_based_deduplication(all_results)
274
+ # logger.info(f"Rule-based deduplication: {len(all_results)} -> {len(deduplicated_results)} results")
275
+ # else:
276
+ # deduplicated_results = []
277
+
278
+ # Step 5: Use LLM only for final ranking and selection (much smaller dataset now)
279
+ if all_results and len(all_results) > 10:
280
+ # Only use LLM if we have more than 10 results to rank
273
281
  ranking_prompt = f"""
274
- Given these search results for the query "{params.query}", please:
275
- 1. Remove duplicates (same or very similar content)
276
- 2. Rank by relevance and value to the user
277
- 3. Select the TOP 10 most relevant and valuable results
282
+ Rank these search results for the query "{params.query}" by relevance and value.
283
+ Select the TOP 10 most relevant and valuable results.
278
284
 
279
- Search Results:
285
+ Search Results ({len(all_results)} total):
280
286
  {json.dumps(all_results, indent=2)}
281
287
 
282
- Return the top 10 results as a JSON array, with each result containing:
288
+ Return the top 10 results as a JSON array with each result containing:
283
289
  - title: string
284
290
  - url: string
285
291
  - summary: string (brief description of why this result is valuable)
@@ -289,7 +295,7 @@ Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
289
295
 
290
296
  ranking_response = await llm.ainvoke([
291
297
  SystemMessage(
292
- content="You are an expert at evaluating and ranking search results for relevance and value."),
298
+ content="You are an expert at ranking search results for relevance and value."),
293
299
  UserMessage(content=ranking_prompt)
294
300
  ])
295
301
 
@@ -297,9 +303,21 @@ Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
297
303
  top_results = json.loads(ranking_response.completion.strip())
298
304
  if not isinstance(top_results, list):
299
305
  raise ValueError("Invalid ranking results format")
306
+ top_results = top_results[:10] # Ensure max 10 results
300
307
  except (json.JSONDecodeError, ValueError):
301
- # Fallback to first 10 results if ranking fails
302
- top_results = all_results[:10]
308
+ try:
309
+ top_results = repair_json(ranking_response.completion.strip())
310
+ if isinstance(top_results, list):
311
+ top_results = top_results[:10]
312
+ else:
313
+ top_results = all_results[:10]
314
+ except Exception:
315
+ # Fallback to first 10 deduplicated results
316
+ top_results = all_results[:10]
317
+ elif all_results:
318
+ # If we have 10 or fewer results, skip LLM ranking
319
+ top_results = all_results[:10]
320
+ logger.info(f"Skipping LLM ranking for {len(all_results)} results (≤10)")
303
321
  else:
304
322
  top_results = []
305
323
 
@@ -694,7 +712,7 @@ Please fix the error and generate corrected JavaScript code:"""
694
712
  elif isinstance(value, (dict, list)):
695
713
  # Complex objects - should be serialized by returnByValue
696
714
  try:
697
- result_text = json.dumps(value, ensure_ascii=False)
715
+ result_text = json.dumps(value, ensure_ascii=False, indent=2)
698
716
  except (TypeError, ValueError):
699
717
  # Fallback for non-serializable objects
700
718
  result_text = str(value)
@@ -729,7 +747,7 @@ The result is empty or not useful. Please generate improved JavaScript code that
729
747
  result_text = result_text[:30000] + '\n... [Truncated after 30000 characters]'
730
748
 
731
749
  # Success! Return the result
732
- msg = f'Requirement: {params.code_requirement}\n\nGenerated Code (Iteration {iteration}): \n```javascript\n{generated_js_code}\n```\nResult: {result_text}'
750
+ msg = f'Generated Code (Iteration {iteration}): \n```javascript\n{generated_js_code}\n```\nResult:\n```json\n {result_text}\n```\n'
733
751
  logger.info(f'✅ Skill Code succeeded on iteration {iteration}')
734
752
 
735
753
  return ActionResult(
@@ -907,19 +925,164 @@ Please generate alternative JavaScript code that avoids this system error:"""
907
925
  return ActionResult(error=error_msg)
908
926
 
909
927
 
928
+ async def _extract_google_results_rule_based(self, browser_session):
929
+ """Rule-based extraction of Google search results using JavaScript"""
930
+ try:
931
+ cdp_session = await browser_session.get_or_create_cdp_session()
932
+
933
+ # JavaScript code to extract Google search results using DOM selectors
934
+ js_extraction_code = """
935
+ (function() {
936
+ try {
937
+ const results = [];
938
+
939
+ // Multiple selector strategies for different Google layouts
940
+ const selectors = [
941
+ 'div[data-sokoban-container] div[data-sokoban-feature]', // Standard results
942
+ 'div.g:not(.g-blk)', // Classic results container
943
+ '.tF2Cxc', // Modern result container
944
+ 'div[data-ved] h3', // Result titles
945
+ ];
946
+
947
+ let resultElements = [];
948
+
949
+ // Try each selector until we find results
950
+ for (const selector of selectors) {
951
+ const elements = document.querySelectorAll(selector);
952
+ if (elements.length > 0) {
953
+ resultElements = Array.from(elements).slice(0, 10); // Get up to 10 results
954
+ break;
955
+ }
956
+ }
957
+
958
+ // If no results found with specific selectors, try broader search
959
+ if (resultElements.length === 0) {
960
+ // Look for any divs containing h3 elements (likely search results)
961
+ const h3Elements = document.querySelectorAll('h3');
962
+ resultElements = Array.from(h3Elements)
963
+ .map(h3 => h3.closest('div'))
964
+ .filter(div => div && div.querySelector('a[href]'))
965
+ .slice(0, 10);
966
+ }
967
+
968
+ for (let i = 0; i < Math.min(resultElements.length, 10); i++) {
969
+ const element = resultElements[i];
970
+
971
+ // Extract title
972
+ let title = '';
973
+ const titleSelectors = ['h3', '[role="heading"]', 'a > span', '.LC20lb'];
974
+ for (const sel of titleSelectors) {
975
+ const titleEl = element.querySelector(sel);
976
+ if (titleEl && titleEl.textContent.trim()) {
977
+ title = titleEl.textContent.trim();
978
+ break;
979
+ }
980
+ }
981
+
982
+ // Extract URL
983
+ let url = '';
984
+ const linkSelectors = ['a[href^="http"]', 'a[href^="/url?q="]', 'a[href]'];
985
+ for (const sel of linkSelectors) {
986
+ const linkEl = element.querySelector(sel);
987
+ if (linkEl && linkEl.href) {
988
+ url = linkEl.href;
989
+ // Clean Google redirect URLs
990
+ if (url.includes('/url?q=')) {
991
+ const urlMatch = url.match(/[?&]q=([^&]*)/);
992
+ if (urlMatch) {
993
+ url = decodeURIComponent(urlMatch[1]);
994
+ }
995
+ }
996
+ break;
997
+ }
998
+ }
999
+
1000
+ // Extract summary/description
1001
+ let summary = '';
1002
+ const summarySelectors = [
1003
+ '.VwiC3b', // Description text
1004
+ '.yXK7lf', // Snippet text
1005
+ '[data-content-feature="1"] span',
1006
+ '.s', // Classic description
1007
+ 'span:not(:has(a))'
1008
+ ];
1009
+ for (const sel of summarySelectors) {
1010
+ const summaryEl = element.querySelector(sel);
1011
+ if (summaryEl && summaryEl.textContent.trim() && summaryEl.textContent.length > 10) {
1012
+ summary = summaryEl.textContent.trim();
1013
+ break;
1014
+ }
1015
+ }
1016
+
1017
+ // Only add if we have at least title or URL
1018
+ if (title || url) {
1019
+ results.push({
1020
+ title: title || 'No title',
1021
+ url: url || 'No URL',
1022
+ summary: summary || 'No description available'
1023
+ });
1024
+ }
1025
+ }
1026
+
1027
+ return JSON.stringify(results);
1028
+
1029
+ } catch (e) {
1030
+ return JSON.stringify([{
1031
+ title: 'Error extracting results',
1032
+ url: window.location.href,
1033
+ summary: 'JavaScript extraction failed: ' + e.message
1034
+ }]);
1035
+ }
1036
+ })()
1037
+ """
1038
+
1039
+ # Execute JavaScript to extract results
1040
+ result = await cdp_session.cdp_client.send.Runtime.evaluate(
1041
+ params={'expression': js_extraction_code, 'returnByValue': True, 'awaitPromise': True},
1042
+ session_id=cdp_session.session_id,
1043
+ )
1044
+
1045
+ if result.get('exceptionDetails'):
1046
+ logger.warning(f"JavaScript extraction failed: {result['exceptionDetails']}")
1047
+ return []
1048
+
1049
+ result_data = result.get('result', {})
1050
+ value = result_data.get('value', '[]')
1051
+
1052
+ try:
1053
+ extracted_results = json.loads(value)
1054
+ return extracted_results if isinstance(extracted_results, list) else []
1055
+ except (json.JSONDecodeError, ValueError):
1056
+ logger.warning(f"Failed to parse extraction results: {value}")
1057
+ return []
1058
+
1059
+ except Exception as e:
1060
+ logger.error(f"Rule-based extraction failed: {e}")
1061
+ return []
1062
+
910
1063
  async def _perform_google_search(self, browser_session, query: str, llm: BaseChatModel):
911
- """Helper method to perform Google search and extract top 5 results"""
1064
+ """Helper method to perform Google search and extract top 5 results using rule-based extraction"""
912
1065
  try:
913
1066
  # Navigate to Google search
914
1067
  search_url = f'https://www.google.com/search?q={query}&udm=14'
915
1068
  await browser_session.navigate_to_url(search_url, new_tab=False)
916
1069
 
917
1070
  # Wait a moment for page to load
918
- await asyncio.sleep(1)
919
-
920
- # Extract structured content
1071
+ await asyncio.sleep(2)
1072
+
1073
+ # Use rule-based extraction first (much faster than LLM)
1074
+ search_ret_len = 10
1075
+ results = await self._extract_google_results_rule_based(browser_session)
1076
+ if results and len(results) > 0:
1077
+ # Rule-based extraction succeeded
1078
+ logger.info(f"Rule-based extraction found {len(results)} results for query: {query}")
1079
+ return results[:search_ret_len] # Return top 6 results
1080
+
1081
+ # Fallback to LLM extraction if rule-based fails
1082
+ logger.warning(f"Rule-based extraction failed for query '{query}', falling back to LLM")
1083
+
921
1084
  extraction_query = f"""
922
- Extract the top 5 search results from this Google search page. For each result, provide:
1085
+ Extract the top {search_ret_len} search results from this Google search page. For each result, provide:
923
1086
  - title: The clickable title/headline
924
1087
  - url: The website URL
925
1088
  - summary: A brief description of what this result contains
@@ -930,18 +1093,17 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
930
1093
  results_text = await self._extract_structured_content(browser_session, extraction_query, llm)
931
1094
 
932
1095
  # Try to parse JSON results
933
- import json
934
1096
  try:
935
1097
  results = json.loads(results_text.strip())
936
1098
  if isinstance(results, list):
937
- return results[:5] # Ensure max 5 results
1099
+ return results[:search_ret_len] # Ensure max 5 results
938
1100
  except (json.JSONDecodeError, ValueError):
939
1101
  try:
940
1102
  results = repair_json(results_text.strip())
941
1103
  if isinstance(results, list):
942
- return results[:5] # Ensure max 5 results
1104
+ return results[:search_ret_len] # Ensure max 5 results
943
1105
  except Exception as e:
944
- logger.warning(f"Failed to parse JSON from search results: {results_text}")
1106
+ logger.warning(f"Failed to parse JSON from LLM search results: {results_text}")
945
1107
 
946
1108
  # Fallback: return raw text as single result
947
1109
  current_url = await browser_session.get_current_page_url()
@@ -955,6 +1117,74 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
955
1117
  logger.error(f"Google search failed for query '{query}': {e}")
956
1118
  return []
957
1119
 
1120
+ def _rule_based_deduplication(self, results):
1121
+ """Rule-based deduplication to reduce dataset before LLM processing"""
1122
+ if not results:
1123
+ return []
1124
+
1125
+ deduplicated = []
1126
+ seen_urls = set()
1127
+ seen_titles = set()
1128
+
1129
+ for result in results:
1130
+ url = result.get('url', '').strip()
1131
+ title = result.get('title', '').strip().lower()
1132
+
1133
+ # Skip results with missing essential data
1134
+ if not url or not title or url == 'No URL' or title == 'no title':
1135
+ continue
1136
+
1137
+ # Normalize URL for comparison (remove fragments, query params for deduplication)
1138
+ normalized_url = url.split('#')[0].split('?')[0].lower()
1139
+
1140
+ # Check for duplicate URLs
1141
+ if normalized_url in seen_urls:
1142
+ continue
1143
+
1144
+ # Check for very similar titles (basic similarity)
1145
+ title_normalized = ''.join(c for c in title if c.isalnum()).lower()
1146
+ if len(title_normalized) > 10: # Only check titles with substantial content
1147
+ similar_found = False
1148
+ for seen_title in seen_titles:
1149
+ # Simple similarity check: if 80% of characters match
1150
+ if len(title_normalized) > 0 and len(seen_title) > 0:
1151
+ common_chars = sum(1 for c in title_normalized if c in seen_title)
1152
+ similarity = common_chars / max(len(title_normalized), len(seen_title))
1153
+ if similarity > 0.8:
1154
+ similar_found = True
1155
+ break
1156
+
1157
+ if similar_found:
1158
+ continue
1159
+
1160
+ # Add to deduplicated results
1161
+ seen_urls.add(normalized_url)
1162
+ seen_titles.add(title_normalized)
1163
+ deduplicated.append(result)
1164
+
1165
+ # Sort by relevance indicators (prioritize results with longer summaries, non-generic titles)
1166
+ def relevance_score(result):
1167
+ score = 0
1168
+ title = result.get('title', '')
1169
+ summary = result.get('summary', '')
1170
+
1171
+ # Longer summaries are typically more informative
1172
+ score += min(len(summary), 200) / 10
1173
+
1174
+ # Non-generic titles score higher
1175
+ generic_terms = ['search results', 'no title', 'error', 'loading']
1176
+ if not any(term in title.lower() for term in generic_terms):
1177
+ score += 10
1178
+
1179
+ # Prefer results with actual descriptions
1180
+ if summary and summary != 'No description available' and len(summary) > 20:
1181
+ score += 5
1182
+
1183
+ return score
1184
+
1185
+ deduplicated.sort(key=relevance_score, reverse=True)
1186
+ return deduplicated
1187
+
958
1188
  async def _extract_structured_content(self, browser_session, query: str, llm: BaseChatModel):
959
1189
  """Helper method to extract structured content from current page"""
960
1190
  MAX_CHAR_LIMIT = 30000
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibesurf
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: VibeSurf: A powerful browser assistant for vibe surfing
5
5
  Author: Shao Warm
6
6
  License: Apache-2.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes