PyPI - vibesurf - Versions diffs - 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl - Mend

vibesurf 0.1.24py3-none-any.whl → 0.1.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vibesurf might be problematic. Click here for more details.

Files changed (14) hide show

vibe_surf/_version.py +2 -2
vibe_surf/backend/api/task.py +1 -1
vibe_surf/backend/database/queries.py +2 -2
vibe_surf/backend/utils/llm_factory.py +1 -1
vibe_surf/cli.py +1 -1
vibe_surf/llm/openai_compatible.py +1 -2
vibe_surf/tools/finance_tools.py +75 -32
vibe_surf/tools/vibesurf_tools.py +286 -36
{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/METADATA +1 -1
{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/RECORD +14 -14
{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/WHEEL +0 -0
{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/entry_points.txt +0 -0
{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/licenses/LICENSE +0 -0
{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/top_level.txt +0 -0

vibe_surf/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.24'
-__version_tuple__ = version_tuple = (0, 1, 24)
+__version__ = version = '0.1.26'
+__version_tuple__ = version_tuple = (0, 1, 26)
 __commit_id__ = commit_id = None

vibe_surf/backend/api/task.py CHANGED Viewed

@@ -104,7 +104,7 @@ async def submit_task(
             logger.info("Using default empty MCP server configuration")
         # DEBUG: Log the type and content of mcp_server_config
-        logger.info(f"mcp_server_config type: {type(mcp_server_config)}, value: {mcp_server_config}")
+        logger.debug(f"mcp_server_config type: {type(mcp_server_config)}, value: {mcp_server_config}")
         # Create initial task record in database
         from ..database.queries import TaskQueries

vibe_surf/backend/database/queries.py CHANGED Viewed

@@ -486,13 +486,13 @@ class TaskQueries:
                 return existing_task
             else:
                 # DEBUG: Log the type and content of mcp_server_config before saving
-                logger.info(
+                logger.debug(
                     f"Creating task with mcp_server_config type: {type(mcp_server_config)}, value: {mcp_server_config}")
                 # Serialize mcp_server_config to JSON string if it's a dict
                 if isinstance(mcp_server_config, dict):
                     mcp_server_config_json = json.dumps(mcp_server_config)
-                    logger.info(f"Converted dict to JSON string: {mcp_server_config_json}")
+                    logger.debug(f"Converted dict to JSON string: {mcp_server_config_json}")
                 else:
                     mcp_server_config_json = mcp_server_config

vibe_surf/backend/utils/llm_factory.py CHANGED Viewed

@@ -58,7 +58,7 @@ def create_llm_from_profile(llm_profile) -> BaseChatModel:
             "deepseek": ["temperature"],
             "aws_bedrock": ["temperature"],
             "anthropic_bedrock": ["temperature"],
-            "openai_compatible": ["temperature"]
+            "openai_compatible": ["temperature", "max_tokens"]
         }
         # Build common parameters based on provider support

vibe_surf/cli.py CHANGED Viewed

@@ -325,7 +325,7 @@ def start_backend(port: int) -> None:
         console.print("[yellow]📝 Press Ctrl+C to stop the server[/yellow]\n")
         # Run the server
-        uvicorn.run(app, host="127.0.0.1", port=port, log_level="info")
+        uvicorn.run(app, host="127.0.0.1", port=port, log_level="error")
     except KeyboardInterrupt:
         console.print("\n[yellow]🛑 Server stopped by user[/yellow]")

vibe_surf/llm/openai_compatible.py CHANGED Viewed

@@ -76,7 +76,7 @@ class ChatOpenAICompatible(ChatOpenAI):
     The class automatically detects the model type and applies appropriate fixes.
     """
-    max_completion_tokens: int | None = 16000
+    max_completion_tokens: int | None = 8192
     def _is_gemini_model(self) -> bool:
         """Check if the current model is a Gemini model."""
@@ -337,7 +337,6 @@ class ChatOpenAICompatible(ChatOpenAI):
                 try:
                     parsed = output_format.model_validate_json(output_content)
                 except Exception as e:
-                    pdb.set_trace()
                     repair_content = repair_json(output_content)
                     parsed = output_format.model_validate_json(repair_content)

vibe_surf/tools/finance_tools.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import Dict, List, Any, Optional, Union
 from datetime import datetime, timedelta
 import yfinance as yf
 import pandas as pd
+from datetime import datetime
 from vibe_surf.logger import get_logger
 logger = get_logger(__name__)
@@ -445,33 +447,58 @@ class FinanceMarkdownFormatter:
             return "No news available.\n"
         markdown = f"**Total News Articles:** {len(news)}\n\n"
-        pdb.set_trace()
         for i, article in enumerate(news, 1):
             if isinstance(article, dict):
-                # Try different possible field names for title
-                title = (article.get('title') or
-                        article.get('headline') or
-                        article.get('summary') or
+                # Handle new yfinance news structure with nested 'content'
+                content = article.get('content', article)  # Fallback to article itself for backwards compatibility
+                # Extract title
+                title = (content.get('title') or
+                        content.get('headline') or
+                        content.get('summary') or
+                        article.get('title') or  # Fallback to old format
                         'No title available')
-                # Try different possible field names for link/URL
-                link = (article.get('link') or
-                       article.get('url') or
-                       article.get('guid') or '')
+                # Extract content type if available
+                content_type = content.get('contentType', '')
+                type_emoji = "🎥" if content_type == "VIDEO" else "📰"
-                # Try different possible field names for publisher
-                publisher = (article.get('publisher') or
-                           article.get('source') or
-                           article.get('author') or
-                           'Unknown')
+                # Extract link/URL - try new nested structure first
+                link = ''
+                if 'canonicalUrl' in content and isinstance(content['canonicalUrl'], dict):
+                    link = content['canonicalUrl'].get('url', '')
+                elif 'clickThroughUrl' in content and isinstance(content['clickThroughUrl'], dict):
+                    link = content['clickThroughUrl'].get('url', '')
+                else:
+                    # Fallback to old format
+                    link = (content.get('link') or
+                           content.get('url') or
+                           content.get('guid') or
+                           article.get('link') or '')
+                # Extract publisher - try new nested structure first
+                publisher = 'Unknown'
+                if 'provider' in content and isinstance(content['provider'], dict):
+                    publisher = content['provider'].get('displayName', 'Unknown')
+                else:
+                    # Fallback to old format
+                    publisher = (content.get('publisher') or
+                               content.get('source') or
+                               content.get('author') or
+                               article.get('publisher') or
+                               'Unknown')
-                # Try different possible field names for timestamp
-                publish_time = (article.get('providerPublishTime') or
-                              article.get('timestamp') or
-                              article.get('pubDate') or
-                              article.get('published') or '')
+                # Extract publication time
+                publish_time = (content.get('pubDate') or
+                              content.get('providerPublishTime') or
+                              content.get('timestamp') or
+                              content.get('published') or
+                              article.get('providerPublishTime') or '')
-                markdown += f"### {i}. {title}\n"
+                # Format the article
+                markdown += f"### {type_emoji} {i}. {title}\n"
+                if content_type:
+                    markdown += f"**Type:** {content_type}\n"
                 markdown += f"**Publisher:** {publisher}\n"
                 if publish_time:
@@ -481,11 +508,16 @@ class FinanceMarkdownFormatter:
                             dt = datetime.fromtimestamp(publish_time)
                             markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M')}\n"
                         elif isinstance(publish_time, str):
-                            # Try to parse string timestamp
+                            # Try to parse ISO format first (new format)
                             try:
-                                publish_time_int = int(float(publish_time))
-                                dt = datetime.fromtimestamp(publish_time_int)
-                                markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M')}\n"
+                                if publish_time.endswith('Z'):
+                                    dt = datetime.fromisoformat(publish_time.replace('Z', '+00:00'))
+                                    markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M UTC')}\n"
+                                else:
+                                    # Try to parse as Unix timestamp
+                                    publish_time_int = int(float(publish_time))
+                                    dt = datetime.fromtimestamp(publish_time_int)
+                                    markdown += f"**Published:** {dt.strftime('%Y-%m-%d %H:%M')}\n"
                             except:
                                 markdown += f"**Published:** {publish_time}\n"
                     except Exception as e:
@@ -496,14 +528,25 @@ class FinanceMarkdownFormatter:
                     markdown += f"**Link:** {link}\n"
                 # Add summary or description if available
-                summary = (article.get('summary') or
-                          article.get('description') or
-                          article.get('snippet') or '')
+                summary = (content.get('summary') or
+                          content.get('description') or
+                          content.get('snippet') or
+                          article.get('summary') or '')
                 if summary and summary != title:
+                    # Clean HTML tags from description if present
+                    import re
+                    clean_summary = re.sub(r'<[^>]+>', '', summary)
+                    clean_summary = re.sub(r'\s+', ' ', clean_summary).strip()
                     # Limit summary length
-                    if len(summary) > 200:
-                        summary = summary[:200] + "..."
-                    markdown += f"**Summary:** {summary}\n"
+                    if len(clean_summary) > 300:
+                        clean_summary = clean_summary[:300] + "..."
+                    markdown += f"**Summary:** {clean_summary}\n"
+                # Add metadata if available
+                if 'metadata' in content and isinstance(content['metadata'], dict):
+                    if content['metadata'].get('editorsPick'):
+                        markdown += f"**Editor's Pick:** ✅\n"
                 markdown += "\n"
@@ -514,10 +557,10 @@ class FinanceMarkdownFormatter:
         """Format dividend data as markdown"""
         if dividends.empty:
             return "No dividend data available.\n"
         markdown = f"**Total Dividends Recorded:** {len(dividends)}\n"
         markdown += f"**Date Range:** {dividends.index.min().strftime('%Y-%m-%d')} to {dividends.index.max().strftime('%Y-%m-%d')}\n\n"
         # Recent dividends (last 10)
         recent_dividends = dividends.tail(10)
         markdown += "### 💰 Recent Dividends\n\n"

vibe_surf/tools/vibesurf_tools.py CHANGED Viewed

@@ -196,6 +196,7 @@ class VibeSurfTools:
                     raise RuntimeError("LLM is required for skill_search")
                 # Step 1: Use LLM to analyze user intent and generate different search tasks
+                query_num = 6
                 from datetime import datetime
                 analysis_prompt = f"""
 Analyze the user query and generate 5 different Google search strategies to comprehensively find relevant information.
@@ -204,13 +205,13 @@ Current Time: {datetime.now().isoformat()}
 User Query: "{params.query}"
-Generate 5 different search queries that approach this topic from different angles. Each search should be:
+Generate {query_num} different search queries that approach this topic from different angles. Each search should be:
 1. Specific and concrete (good for Google search)
 2. Different from the others (different perspectives/aspects)
 3. Likely to return valuable, unique information
-Return your response as a JSON array of 5 search query strings.
-Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
+Return your response as a JSON array of {query_num} search query strings.
+Example format: ["query 1", "query 2", "query 3", "query 4", "query 5", "query 6"]
 """
                 from browser_use.llm.messages import SystemMessage, UserMessage
@@ -225,12 +226,14 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
                     search_queries = json.loads(response.completion.strip())
                     if not isinstance(search_queries, list):
                         raise ValueError("Invalid search queries format")
-                    search_queries = search_queries[:5]
+                    search_queries = search_queries[:query_num]
                 except (json.JSONDecodeError, ValueError):
                     # Fallback to simple queries if parsing fails
                     try:
                         from json_repair import repair_json
-                        search_queries = repair_json(response.completion.strip())
+                        search_queries_s = repair_json(response.completion.strip())
+                        search_queries = json.loads(search_queries_s)
+                        search_queries = search_queries[:query_num]
                     except Exception as e:
                         search_queries = [
                             params.query,
@@ -243,7 +246,7 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
                 # Step 2: Create browser sessions for parallel searching
                 register_sessions = []
-                for i, query in enumerate(search_queries):
+                for i, query in enumerate(search_queries[:query_num]):
                     agent_id = f"search_agent_{i + 1:03d}"
                     register_sessions.append(
                         browser_manager.register_agent(agent_id, target_id=None)
@@ -258,7 +261,6 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
                     search_tasks.append(self._perform_google_search(browser_session, query, llm))
                 search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
                 # Step 4: Aggregate and filter results
                 all_results = []
                 for i, result in enumerate(search_results):
@@ -268,38 +270,74 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
                     if result:
                         all_results.extend(result)
-                # Step 5: Use LLM to deduplicate and rank top 10 results
-                if all_results:
+                # Step 4.5: Rule-based deduplication to reduce LLM processing load
+                # if all_results:
+                #     deduplicated_results = self._rule_based_deduplication(all_results)
+                #     logger.info(f"Rule-based deduplication: {len(all_results)} -> {len(deduplicated_results)} results")
+                # else:
+                #     deduplicated_results = []
+                # Step 5: Use LLM only for final ranking and selection (much smaller dataset now)
+                if all_results and len(all_results) > 10:
+                    # Only use LLM if we have more than 10 results to rank
+                    # Create indexed results for LLM prompt
+                    indexed_results = []
+                    for i, result in enumerate(all_results):
+                        indexed_results.append({
+                            "index": i,
+                            "title": result.get('title', 'Unknown Title'),
+                            "url": result.get('url', 'No URL'),
+                            "summary": result.get('summary', 'No summary available')
+                        })
                     ranking_prompt = f"""
-Given these search results for the query "{params.query}", please:
-1. Remove duplicates (same or very similar content)
-2. Rank by relevance and value to the user
-3. Select the TOP 10 most relevant and valuable results
+Rank these search results for the query "{params.query}" by relevance and value.
+Select the TOP 10 most relevant and valuable results.
-Search Results:
-{json.dumps(all_results, indent=2)}
+Search Results ({len(indexed_results)} total):
+{json.dumps(indexed_results, indent=2, ensure_ascii=False)}
-Return the top 10 results as a JSON array, with each result containing:
-- title: string
-- url: string
-- summary: string (brief description of why this result is valuable)
+Return ONLY the indices of the top 10 results as a JSON array of numbers.
+For example: [0, 5, 2, 8, 1, 9, 3, 7, 4, 6]
-Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
+Format: [index1, index2, index3, ...]
 """
                     ranking_response = await llm.ainvoke([
                         SystemMessage(
-                            content="You are an expert at evaluating and ranking search results for relevance and value."),
+                            content="You are an expert at ranking search results for relevance and value. Return only the indices of the top results."),
                         UserMessage(content=ranking_prompt)
                     ])
                     try:
-                        top_results = json.loads(ranking_response.completion.strip())
-                        if not isinstance(top_results, list):
+                        selected_indices = json.loads(ranking_response.completion.strip())
+                        if not isinstance(selected_indices, list):
                             raise ValueError("Invalid ranking results format")
+                        # Ensure indices are valid and limit to 10
+                        valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
+                        if valid_indices:
+                            top_results = [all_results[i] for i in valid_indices]
+                        else:
+                            top_results = all_results[:10]
                     except (json.JSONDecodeError, ValueError):
-                        # Fallback to first 10 results if ranking fails
-                        top_results = all_results[:10]
+                        try:
+                            selected_indices_s = repair_json(ranking_response.completion.strip())
+                            selected_indices = json.loads(selected_indices_s)
+                            if isinstance(selected_indices, list):
+                                valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
+                                if valid_indices:
+                                    top_results = [all_results[i] for i in valid_indices]
+                                else:
+                                    top_results = all_results[:10]
+                            else:
+                                top_results = all_results[:10]
+                        except Exception:
+                            # Fallback to first 10 results
+                            top_results = all_results[:10]
+                elif all_results:
+                    # If we have 10 or fewer results, skip LLM ranking
+                    top_results = all_results[:10]
+                    logger.info(f"Skipping LLM ranking for {len(all_results)} results (≤10)")
                 else:
                     top_results = []
@@ -694,7 +732,7 @@ Please fix the error and generate corrected JavaScript code:"""
                             elif isinstance(value, (dict, list)):
                                 # Complex objects - should be serialized by returnByValue
                                 try:
-                                    result_text = json.dumps(value, ensure_ascii=False)
+                                    result_text = json.dumps(value, ensure_ascii=False, indent=2)
                                 except (TypeError, ValueError):
                                     # Fallback for non-serializable objects
                                     result_text = str(value)
@@ -729,7 +767,7 @@ The result is empty or not useful. Please generate improved JavaScript code that
                                 result_text = result_text[:30000] + '\n... [Truncated after 30000 characters]'
                             # Success! Return the result
-                            msg = f'Requirement: {params.code_requirement}\n\nGenerated Code (Iteration {iteration}): \n```javascript\n{generated_js_code}\n```\nResult: {result_text}'
+                            msg = f'Generated Code (Iteration {iteration}): \n```javascript\n{generated_js_code}\n```\nResult:\n```json\n {result_text}\n```\n'
                             logger.info(f'✅ Skill Code succeeded on iteration {iteration}')
                             return ActionResult(
@@ -907,19 +945,164 @@ Please generate alternative JavaScript code that avoids this system error:"""
                 return ActionResult(error=error_msg)
+    async def _extract_google_results_rule_based(self, browser_session):
+        """Rule-based extraction of Google search results using JavaScript"""
+        try:
+            cdp_session = await browser_session.get_or_create_cdp_session()
+            # JavaScript code to extract Google search results using DOM selectors
+            js_extraction_code = """
+(function() {
+    try {
+        const results = [];
+        // Multiple selector strategies for different Google layouts
+        const selectors = [
+            'div[data-sokoban-container] div[data-sokoban-feature]', // Standard results
+            'div.g:not(.g-blk)', // Classic results container
+            '.tF2Cxc', // Modern result container
+            'div[data-ved] h3', // Result titles
+        ];
+        let resultElements = [];
+        // Try each selector until we find results
+        for (const selector of selectors) {
+            const elements = document.querySelectorAll(selector);
+            if (elements.length > 0) {
+                resultElements = Array.from(elements).slice(0, 10); // Get up to 10 results
+                break;
+            }
+        }
+        // If no results found with specific selectors, try broader search
+        if (resultElements.length === 0) {
+            // Look for any divs containing h3 elements (likely search results)
+            const h3Elements = document.querySelectorAll('h3');
+            resultElements = Array.from(h3Elements)
+                .map(h3 => h3.closest('div'))
+                .filter(div => div && div.querySelector('a[href]'))
+                .slice(0, 10);
+        }
+        for (let i = 0; i < Math.min(resultElements.length, 10); i++) {
+            const element = resultElements[i];
+            // Extract title
+            let title = '';
+            const titleSelectors = ['h3', '[role="heading"]', 'a > span', '.LC20lb'];
+            for (const sel of titleSelectors) {
+                const titleEl = element.querySelector(sel);
+                if (titleEl && titleEl.textContent.trim()) {
+                    title = titleEl.textContent.trim();
+                    break;
+                }
+            }
+            // Extract URL
+            let url = '';
+            const linkSelectors = ['a[href^="http"]', 'a[href^="/url?q="]', 'a[href]'];
+            for (const sel of linkSelectors) {
+                const linkEl = element.querySelector(sel);
+                if (linkEl && linkEl.href) {
+                    url = linkEl.href;
+                    // Clean Google redirect URLs
+                    if (url.includes('/url?q=')) {
+                        const urlMatch = url.match(/[?&]q=([^&]*)/);
+                        if (urlMatch) {
+                            url = decodeURIComponent(urlMatch[1]);
+                        }
+                    }
+                    break;
+                }
+            }
+            // Extract summary/description
+            let summary = '';
+            const summarySelectors = [
+                '.VwiC3b', // Description text
+                '.yXK7lf', // Snippet text
+                '[data-content-feature="1"] span',
+                '.s', // Classic description
+                'span:not(:has(a))'
+            ];
+            for (const sel of summarySelectors) {
+                const summaryEl = element.querySelector(sel);
+                if (summaryEl && summaryEl.textContent.trim() && summaryEl.textContent.length > 10) {
+                    summary = summaryEl.textContent.trim();
+                    break;
+                }
+            }
+            // Only add if we have at least title or URL
+            if (title || url) {
+                results.push({
+                    title: title || 'No title',
+                    url: url || 'No URL',
+                    summary: summary || 'No description available'
+                });
+            }
+        }
+        return JSON.stringify(results);
+    } catch (e) {
+        return JSON.stringify([{
+            title: 'Error extracting results',
+            url: window.location.href,
+            summary: 'JavaScript extraction failed: ' + e.message
+        }]);
+    }
+})()
+"""
+            # Execute JavaScript to extract results
+            result = await cdp_session.cdp_client.send.Runtime.evaluate(
+                params={'expression': js_extraction_code, 'returnByValue': True, 'awaitPromise': True},
+                session_id=cdp_session.session_id,
+            )
+            if result.get('exceptionDetails'):
+                logger.warning(f"JavaScript extraction failed: {result['exceptionDetails']}")
+                return []
+            result_data = result.get('result', {})
+            value = result_data.get('value', '[]')
+            try:
+                extracted_results = json.loads(value)
+                return extracted_results if isinstance(extracted_results, list) else []
+            except (json.JSONDecodeError, ValueError):
+                logger.warning(f"Failed to parse extraction results: {value}")
+                return []
+        except Exception as e:
+            logger.error(f"Rule-based extraction failed: {e}")
+            return []
     async def _perform_google_search(self, browser_session, query: str, llm: BaseChatModel):
-        """Helper method to perform Google search and extract top 5 results"""
+        """Helper method to perform Google search and extract top 5 results using rule-based extraction"""
         try:
             # Navigate to Google search
             search_url = f'https://www.google.com/search?q={query}&udm=14'
             await browser_session.navigate_to_url(search_url, new_tab=False)
             # Wait a moment for page to load
-            await asyncio.sleep(1)
-            # Extract structured content
+            await asyncio.sleep(2)
+            # Use rule-based extraction first (much faster than LLM)
+            search_ret_len = 10
+            results = await self._extract_google_results_rule_based(browser_session)
+            if results and len(results) > 0:
+                # Rule-based extraction succeeded
+                logger.debug(f"Rule-based extraction found {len(results)} results for query: {query}")
+                return results[:search_ret_len]  # Return top 6 results
+            # Fallback to LLM extraction if rule-based fails
+            logger.warning(f"Rule-based extraction failed for query '{query}', falling back to LLM")
             extraction_query = f"""
-Extract the top 5 search results from this Google search page. For each result, provide:
+Extract the top {search_ret_len} search results from this Google search page. For each result, provide:
 - title: The clickable title/headline
 - url: The website URL
 - summary: A brief description of what this result contains
@@ -930,18 +1113,17 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
             results_text = await self._extract_structured_content(browser_session, extraction_query, llm)
             # Try to parse JSON results
-            import json
             try:
                 results = json.loads(results_text.strip())
                 if isinstance(results, list):
-                    return results[:5]  # Ensure max 5 results
+                    return results[:search_ret_len]  # Ensure max 5 results
             except (json.JSONDecodeError, ValueError):
                 try:
                     results = repair_json(results_text.strip())
                     if isinstance(results, list):
-                        return results[:5]  # Ensure max 5 results
+                        return results[:search_ret_len]  # Ensure max 5 results
                 except Exception as e:
-                    logger.warning(f"Failed to parse JSON from search results: {results_text}")
+                    logger.warning(f"Failed to parse JSON from LLM search results: {results_text}")
             # Fallback: return raw text as single result
             current_url = await browser_session.get_current_page_url()
@@ -955,6 +1137,74 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
             logger.error(f"Google search failed for query '{query}': {e}")
             return []
+    def _rule_based_deduplication(self, results):
+        """Rule-based deduplication to reduce dataset before LLM processing"""
+        if not results:
+            return []
+        deduplicated = []
+        seen_urls = set()
+        seen_titles = set()
+        for result in results:
+            url = result.get('url', '').strip()
+            title = result.get('title', '').strip().lower()
+            # Skip results with missing essential data
+            if not url or not title or url == 'No URL' or title == 'no title':
+                continue
+            # Normalize URL for comparison (remove fragments, query params for deduplication)
+            normalized_url = url.split('#')[0].split('?')[0].lower()
+            # Check for duplicate URLs
+            if normalized_url in seen_urls:
+                continue
+            # Check for very similar titles (basic similarity)
+            title_normalized = ''.join(c for c in title if c.isalnum()).lower()
+            if len(title_normalized) > 10:  # Only check titles with substantial content
+                similar_found = False
+                for seen_title in seen_titles:
+                    # Simple similarity check: if 80% of characters match
+                    if len(title_normalized) > 0 and len(seen_title) > 0:
+                        common_chars = sum(1 for c in title_normalized if c in seen_title)
+                        similarity = common_chars / max(len(title_normalized), len(seen_title))
+                        if similarity > 0.8:
+                            similar_found = True
+                            break
+                if similar_found:
+                    continue
+            # Add to deduplicated results
+            seen_urls.add(normalized_url)
+            seen_titles.add(title_normalized)
+            deduplicated.append(result)
+        # Sort by relevance indicators (prioritize results with longer summaries, non-generic titles)
+        def relevance_score(result):
+            score = 0
+            title = result.get('title', '')
+            summary = result.get('summary', '')
+            # Longer summaries are typically more informative
+            score += min(len(summary), 200) / 10
+            # Non-generic titles score higher
+            generic_terms = ['search results', 'no title', 'error', 'loading']
+            if not any(term in title.lower() for term in generic_terms):
+                score += 10
+            # Prefer results with actual descriptions
+            if summary and summary != 'No description available' and len(summary) > 20:
+                score += 5
+            return score
+        deduplicated.sort(key=relevance_score, reverse=True)
+        return deduplicated
     async def _extract_structured_content(self, browser_session, query: str, llm: BaseChatModel):
         """Helper method to extract structured content from current page"""
         MAX_CHAR_LIMIT = 30000

{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vibesurf
-Version: 0.1.24
+Version: 0.1.26
 Summary: VibeSurf: A powerful browser assistant for vibe surfing
 Author: Shao Warm
 License: Apache-2.0

{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 vibe_surf/__init__.py,sha256=WtduuMFGauMD_9dpk4fnRnLTAP6ka9Lfu0feAFNzLfo,339
-vibe_surf/_version.py,sha256=IV4a2R7tlzuACf6FAyPEbprLKNroeE-n_UPSKi1QJSc,706
-vibe_surf/cli.py,sha256=pbep2dBeQqralZ8AggkH4h2nayBarbdN8lhZxo35gNU,16689
+vibe_surf/_version.py,sha256=Y9o7KiJWiG6n9XbSpMICgNgajFRbL4an-gN1BQc-jwM,706
+vibe_surf/cli.py,sha256=KAmUBsXfS-NkMp3ITxzNXwtFeKVmXJUDZiWqLcIC0BI,16690
 vibe_surf/common.py,sha256=_WWMxen5wFwzUjEShn3yDVC1OBFUiJ6Vccadi6tuG6w,1215
 vibe_surf/logger.py,sha256=k53MFA96QX6t9OfcOf1Zws8PP0OOqjVJfhUD3Do9lKw,3043
 vibe_surf/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,12 +23,12 @@ vibe_surf/backend/api/browser.py,sha256=NXedyZG3NIVRIx5O7d9mHwVWX-Q4_KsX5mSgfKt8
 vibe_surf/backend/api/config.py,sha256=vKY6ZnKZeazQP9qqUEiQvP9HoPtJbAzETORuPWZomGw,27272
 vibe_surf/backend/api/files.py,sha256=kJMG9MWECKXwGh64Q6xvAzNjeZGcLhIEnn65HiMZHKE,11762
 vibe_surf/backend/api/models.py,sha256=n_bu8vavvO8bIKA1WUAbaGPFeZKeamMJelDWU3DlFJc,10533
-vibe_surf/backend/api/task.py,sha256=vpQMOn6YBuD_16jzfUajUvBYaydC0jj8Ny3WOJDVuck,14359
+vibe_surf/backend/api/task.py,sha256=CYx8FNN04XM8cH9BjAuMb-E7GUTxi4pl0OsT_1KnDBQ,14360
 vibe_surf/backend/api/voices.py,sha256=YfPCqnR7EAYh2nfMRIpB0xEo6_giTtxrcSeobU3HQHg,17098
 vibe_surf/backend/database/__init__.py,sha256=XhmcscnhgMhUyXML7m4SnuQIqkFpyY_zJ0D3yYa2RqQ,239
 vibe_surf/backend/database/manager.py,sha256=Okmr6yG2aycmatONRMyRbHe6l53RkFIPeMxxPSD3ycY,11884
 vibe_surf/backend/database/models.py,sha256=Z5_RqGyD4ER5bsrYjc2iso9yPo7zfAqxNeVDGtZqotw,8887
-vibe_surf/backend/database/queries.py,sha256=0-RKjbHY3G5Y5_QrTtvl-nHs0KPlygmwm0ZOdbsvINY,41155
+vibe_surf/backend/database/queries.py,sha256=6SsAxTr-ocQ189xQ5m0L3BsgUdkGtmt2TcrXP-JIbrw,41157
 vibe_surf/backend/database/schemas.py,sha256=OPnpRKwYG1Cu8geJ6pajiEDF8x8mRestXnAfI4Gy18w,3402
 vibe_surf/backend/database/migrations/v001_initial_schema.sql,sha256=MC2fa1WHUEhHhdOTxz0qB4RI7JdGRpiGXZ77ytl3LRQ,4345
 vibe_surf/backend/database/migrations/v002_add_agent_mode.sql,sha256=jKnW28HsphUeU9kudEx9QaLnUh8swmmOt-hFsZJay24,251
@@ -36,7 +36,7 @@ vibe_surf/backend/database/migrations/v003_fix_task_status_case.sql,sha256=npzRg
 vibe_surf/backend/database/migrations/v004_add_voice_profiles.sql,sha256=-9arjQBF-OxvFIOwkEl7JJJRDTS_nJ8GNX3T7bJgVq0,1321
 vibe_surf/backend/utils/__init__.py,sha256=V8leMFp7apAglUAoCHPZrNNcRHthSLYIudIJE5qwjb0,184
 vibe_surf/backend/utils/encryption.py,sha256=CjLNh_n0Luhfa-6BB-icfzkiiDqj5b4Gu6MADU3p2eM,3754
-vibe_surf/backend/utils/llm_factory.py,sha256=KF84YYgPaOF0_1P_IF0cAtY1kua0D-8gEP2NoSu2UZM,9033
+vibe_surf/backend/utils/llm_factory.py,sha256=XIJYc9Lh_L2vbwlAe96PrjptlzJtLOjCGNdHEx6fThk,9047
 vibe_surf/browser/__init__.py,sha256=_UToO2fZfSCrfjOcxhn4Qq7ZLbYeyPuUUEmqIva-Yv8,325
 vibe_surf/browser/agen_browser_profile.py,sha256=J06hCBJSJ-zAFVM9yDFz8UpmiLuFyWke1EMekpU45eo,5871
 vibe_surf/browser/agent_browser_session.py,sha256=xV0nHo_TCb7b7QYhIee4cLzH-1rqJswYwH7GEwyQmqc,33980
@@ -85,20 +85,20 @@ vibe_surf/chrome_extension/styles/settings-responsive.css,sha256=jLE0yG15n2aI6_6
 vibe_surf/chrome_extension/styles/settings-utilities.css,sha256=3PuQS2857kg83d5erLbLdo_7J95-qV-qyNWS5M-w1oQ,505
 vibe_surf/chrome_extension/styles/variables.css,sha256=enjyhsa0PeU3b-3uiXa-VkV-1-h2-Ai3m4KpmC2k0rY,2984
 vibe_surf/llm/__init__.py,sha256=_vDVPo6STf343p1SgMQrF5023hicAx0g83pK2Gbk4Ek,601
-vibe_surf/llm/openai_compatible.py,sha256=7e0XC-Mtz8MmgQZHH8tx8H_VXB6MLvMhDy1qKbESmVo,16149
+vibe_surf/llm/openai_compatible.py,sha256=i0a5OLaL6QIlacVyctOG09vKr3KOi8T8Izp1v7xkD5I,16112
 vibe_surf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vibe_surf/tools/browser_use_tools.py,sha256=tacxKUJL6uOt04f52_iIw1cs-FT-mBgIPmAsIc4Hww0,23730
 vibe_surf/tools/file_system.py,sha256=Tw_6J5QjCahQ3fd26CXziF1zPvRxhYM0889oK4bDhlU,19304
-vibe_surf/tools/finance_tools.py,sha256=pwPSBb0HwCDTdKZNAS5NPE8-rM1Nz57foj9XyKgQmI4,24803
+vibe_surf/tools/finance_tools.py,sha256=E8rmblp57e_cp0tFbdZ7BY3_upNlk4Whk0bYc_SFCJE,27284
 vibe_surf/tools/mcp_client.py,sha256=OeCoTgyx4MoY7JxXndK6pGHIoyFOhf5r7XCbx25y1Ec,2446
 vibe_surf/tools/report_writer_tools.py,sha256=2CyTTXOahTKZo7XwyWDDhJ--1mRA0uTtUWxu_DACAY0,776
 vibe_surf/tools/vibesurf_registry.py,sha256=Z-8d9BrJl3RFMEK0Tw1Q5xNHX2kZGsnIGCTBZ3RM-pw,2159
-vibe_surf/tools/vibesurf_tools.py,sha256=KMf9J_GDo9MbjBruv6-aHi5srR2pvlvW3uegihAMRIc,79994
+vibe_surf/tools/vibesurf_tools.py,sha256=O8y1noWyY8y-j8I7vF4oOaVDybINNXiNXWNwGJJ5xsM,91500
 vibe_surf/tools/views.py,sha256=AEAPzML-lqWJ7dBMjXTl7o-rk4hp5PGaPRqLyilJUl8,7789
 vibe_surf/tools/voice_asr.py,sha256=AJG0yq_Jq-j8ulDlbPhVFfK1jch9_ASesis73iki9II,4702
-vibesurf-0.1.24.dist-info/licenses/LICENSE,sha256=czn6QYya0-jhLnStD9JqnMS-hwP5wRByipkrGTvoXLI,11355
-vibesurf-0.1.24.dist-info/METADATA,sha256=Ck-enMQ77f9ekeLQG9xzNGX3mOuDhqIXiXdA3_Zcq4I,5190
-vibesurf-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-vibesurf-0.1.24.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
-vibesurf-0.1.24.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
-vibesurf-0.1.24.dist-info/RECORD,,
+vibesurf-0.1.26.dist-info/licenses/LICENSE,sha256=czn6QYya0-jhLnStD9JqnMS-hwP5wRByipkrGTvoXLI,11355
+vibesurf-0.1.26.dist-info/METADATA,sha256=8Bdh3-15Hl0KbmB0ghk9KNQiveDnVGnhzZ-4Dv0MVjc,5190
+vibesurf-0.1.26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+vibesurf-0.1.26.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
+vibesurf-0.1.26.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
+vibesurf-0.1.26.dist-info/RECORD,,

{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/WHEEL RENAMED Viewed

File without changes

{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{vibesurf-0.1.24.dist-info → vibesurf-0.1.26.dist-info}/top_level.txt RENAMED Viewed

File without changes

vibesurf 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl

Potentially problematic release.

vibesurf 0.1.24py3-none-any.whl → 0.1.26py3-none-any.whl