vibesurf 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vibesurf might be problematic. Click here for more details.
- vibe_surf/_version.py +2 -2
- vibe_surf/agents/vibe_surf_agent.py +1 -1
- vibe_surf/backend/api/config.py +3 -1
- vibe_surf/backend/utils/llm_factory.py +2 -1
- vibe_surf/llm/openai_compatible.py +34 -10
- vibe_surf/tools/file_system.py +2 -2
- vibe_surf/tools/finance_tools.py +629 -0
- vibe_surf/tools/report_writer_tools.py +2 -1
- vibe_surf/tools/vibesurf_tools.py +348 -29
- vibe_surf/tools/views.py +33 -0
- {vibesurf-0.1.23.dist-info → vibesurf-0.1.25.dist-info}/METADATA +2 -1
- {vibesurf-0.1.23.dist-info → vibesurf-0.1.25.dist-info}/RECORD +16 -15
- {vibesurf-0.1.23.dist-info → vibesurf-0.1.25.dist-info}/WHEEL +0 -0
- {vibesurf-0.1.23.dist-info → vibesurf-0.1.25.dist-info}/entry_points.txt +0 -0
- {vibesurf-0.1.23.dist-info → vibesurf-0.1.25.dist-info}/licenses/LICENSE +0 -0
- {vibesurf-0.1.23.dist-info → vibesurf-0.1.25.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,8 @@ import json
|
|
|
8
8
|
import enum
|
|
9
9
|
import base64
|
|
10
10
|
import mimetypes
|
|
11
|
+
import yfinance as yf
|
|
12
|
+
import pprint
|
|
11
13
|
from json_repair import repair_json
|
|
12
14
|
from datetime import datetime
|
|
13
15
|
from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar
|
|
@@ -29,7 +31,8 @@ from browser_use.tools.views import NoParamsAction
|
|
|
29
31
|
from vibe_surf.browser.agent_browser_session import AgentBrowserSession
|
|
30
32
|
from vibe_surf.tools.views import HoverAction, ExtractionAction, FileExtractionAction, BrowserUseAgentExecution, \
|
|
31
33
|
ReportWriterTask, TodoGenerateAction, TodoModifyAction, VibeSurfDoneAction, SkillSearchAction, SkillCrawlAction, \
|
|
32
|
-
SkillSummaryAction, SkillTakeScreenshotAction, SkillDeepResearchAction, SkillCodeAction
|
|
34
|
+
SkillSummaryAction, SkillTakeScreenshotAction, SkillDeepResearchAction, SkillCodeAction, SkillFinanceAction
|
|
35
|
+
from vibe_surf.tools.finance_tools import FinanceDataRetriever, FinanceMarkdownFormatter, FinanceMethod
|
|
33
36
|
from vibe_surf.tools.mcp_client import CustomMCPClient
|
|
34
37
|
from vibe_surf.tools.file_system import CustomFileSystem
|
|
35
38
|
from vibe_surf.browser.browser_manager import BrowserManager
|
|
@@ -193,6 +196,7 @@ class VibeSurfTools:
|
|
|
193
196
|
raise RuntimeError("LLM is required for skill_search")
|
|
194
197
|
|
|
195
198
|
# Step 1: Use LLM to analyze user intent and generate different search tasks
|
|
199
|
+
query_num = 6
|
|
196
200
|
from datetime import datetime
|
|
197
201
|
analysis_prompt = f"""
|
|
198
202
|
Analyze the user query and generate 5 different Google search strategies to comprehensively find relevant information.
|
|
@@ -201,13 +205,13 @@ Current Time: {datetime.now().isoformat()}
|
|
|
201
205
|
|
|
202
206
|
User Query: "{params.query}"
|
|
203
207
|
|
|
204
|
-
Generate
|
|
208
|
+
Generate {query_num} different search queries that approach this topic from different angles. Each search should be:
|
|
205
209
|
1. Specific and concrete (good for Google search)
|
|
206
210
|
2. Different from the others (different perspectives/aspects)
|
|
207
211
|
3. Likely to return valuable, unique information
|
|
208
212
|
|
|
209
|
-
Return your response as a JSON array of
|
|
210
|
-
Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
|
|
213
|
+
Return your response as a JSON array of {query_num} search query strings.
|
|
214
|
+
Example format: ["query 1", "query 2", "query 3", "query 4", "query 5", "query 6"]
|
|
211
215
|
"""
|
|
212
216
|
|
|
213
217
|
from browser_use.llm.messages import SystemMessage, UserMessage
|
|
@@ -222,7 +226,7 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
|
|
|
222
226
|
search_queries = json.loads(response.completion.strip())
|
|
223
227
|
if not isinstance(search_queries, list):
|
|
224
228
|
raise ValueError("Invalid search queries format")
|
|
225
|
-
search_queries = search_queries[:
|
|
229
|
+
search_queries = search_queries[:query_num]
|
|
226
230
|
except (json.JSONDecodeError, ValueError):
|
|
227
231
|
# Fallback to simple queries if parsing fails
|
|
228
232
|
try:
|
|
@@ -255,7 +259,6 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
|
|
|
255
259
|
search_tasks.append(self._perform_google_search(browser_session, query, llm))
|
|
256
260
|
|
|
257
261
|
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
258
|
-
|
|
259
262
|
# Step 4: Aggregate and filter results
|
|
260
263
|
all_results = []
|
|
261
264
|
for i, result in enumerate(search_results):
|
|
@@ -265,18 +268,24 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5"]
|
|
|
265
268
|
if result:
|
|
266
269
|
all_results.extend(result)
|
|
267
270
|
|
|
268
|
-
# Step 5:
|
|
269
|
-
if all_results:
|
|
271
|
+
# Step 4.5: Rule-based deduplication to reduce LLM processing load
|
|
272
|
+
# if all_results:
|
|
273
|
+
# deduplicated_results = self._rule_based_deduplication(all_results)
|
|
274
|
+
# logger.info(f"Rule-based deduplication: {len(all_results)} -> {len(deduplicated_results)} results")
|
|
275
|
+
# else:
|
|
276
|
+
# deduplicated_results = []
|
|
277
|
+
|
|
278
|
+
# Step 5: Use LLM only for final ranking and selection (much smaller dataset now)
|
|
279
|
+
if all_results and len(all_results) > 10:
|
|
280
|
+
# Only use LLM if we have more than 10 results to rank
|
|
270
281
|
ranking_prompt = f"""
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
2. Rank by relevance and value to the user
|
|
274
|
-
3. Select the TOP 10 most relevant and valuable results
|
|
282
|
+
Rank these search results for the query "{params.query}" by relevance and value.
|
|
283
|
+
Select the TOP 10 most relevant and valuable results.
|
|
275
284
|
|
|
276
|
-
Search Results:
|
|
285
|
+
Search Results ({len(all_results)} total):
|
|
277
286
|
{json.dumps(all_results, indent=2)}
|
|
278
287
|
|
|
279
|
-
Return the top 10 results as a JSON array
|
|
288
|
+
Return the top 10 results as a JSON array with each result containing:
|
|
280
289
|
- title: string
|
|
281
290
|
- url: string
|
|
282
291
|
- summary: string (brief description of why this result is valuable)
|
|
@@ -286,7 +295,7 @@ Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
|
|
|
286
295
|
|
|
287
296
|
ranking_response = await llm.ainvoke([
|
|
288
297
|
SystemMessage(
|
|
289
|
-
content="You are an expert at
|
|
298
|
+
content="You are an expert at ranking search results for relevance and value."),
|
|
290
299
|
UserMessage(content=ranking_prompt)
|
|
291
300
|
])
|
|
292
301
|
|
|
@@ -294,9 +303,21 @@ Format: [{{"title": "...", "url": "...", "summary": "..."}}, ...]
|
|
|
294
303
|
top_results = json.loads(ranking_response.completion.strip())
|
|
295
304
|
if not isinstance(top_results, list):
|
|
296
305
|
raise ValueError("Invalid ranking results format")
|
|
306
|
+
top_results = top_results[:10] # Ensure max 10 results
|
|
297
307
|
except (json.JSONDecodeError, ValueError):
|
|
298
|
-
|
|
299
|
-
|
|
308
|
+
try:
|
|
309
|
+
top_results = repair_json(ranking_response.completion.strip())
|
|
310
|
+
if isinstance(top_results, list):
|
|
311
|
+
top_results = top_results[:10]
|
|
312
|
+
else:
|
|
313
|
+
top_results = all_results[:10]
|
|
314
|
+
except Exception:
|
|
315
|
+
# Fallback to first 10 deduplicated results
|
|
316
|
+
top_results = all_results[:10]
|
|
317
|
+
elif all_results:
|
|
318
|
+
# If we have 10 or fewer results, skip LLM ranking
|
|
319
|
+
top_results = all_results[:10]
|
|
320
|
+
logger.info(f"Skipping LLM ranking for {len(all_results)} results (≤10)")
|
|
300
321
|
else:
|
|
301
322
|
top_results = []
|
|
302
323
|
|
|
@@ -691,7 +712,7 @@ Please fix the error and generate corrected JavaScript code:"""
|
|
|
691
712
|
elif isinstance(value, (dict, list)):
|
|
692
713
|
# Complex objects - should be serialized by returnByValue
|
|
693
714
|
try:
|
|
694
|
-
result_text = json.dumps(value, ensure_ascii=False)
|
|
715
|
+
result_text = json.dumps(value, ensure_ascii=False, indent=2)
|
|
695
716
|
except (TypeError, ValueError):
|
|
696
717
|
# Fallback for non-serializable objects
|
|
697
718
|
result_text = str(value)
|
|
@@ -726,7 +747,7 @@ The result is empty or not useful. Please generate improved JavaScript code that
|
|
|
726
747
|
result_text = result_text[:30000] + '\n... [Truncated after 30000 characters]'
|
|
727
748
|
|
|
728
749
|
# Success! Return the result
|
|
729
|
-
msg = f'
|
|
750
|
+
msg = f'Generated Code (Iteration {iteration}): \n```javascript\n{generated_js_code}\n```\nResult:\n```json\n {result_text}\n```\n'
|
|
730
751
|
logger.info(f'✅ Skill Code succeeded on iteration {iteration}')
|
|
731
752
|
|
|
732
753
|
return ActionResult(
|
|
@@ -818,19 +839,250 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
818
839
|
include_extracted_content_only_once=True,
|
|
819
840
|
)
|
|
820
841
|
|
|
842
|
+
@self.registry.action(
|
|
843
|
+
'Skill: Get comprehensive financial data for stocks - retrieve company information, historical prices, news, earnings, dividends, analyst recommendations and other financial data using Yahoo Finance. Available methods include: get_info (company info), get_history (price history), get_news (latest news), get_dividends (dividend history), get_earnings (earnings data), get_recommendations (analyst recommendations), get_balance_sheet (balance sheet data), get_income_stmt (income statement), get_cashflow (cash flow statement), get_fast_info (quick stats), get_institutional_holders (institutional ownership), get_major_holders (major shareholders), get_sustainability (ESG data), get_upgrades_downgrades (analyst upgrades/downgrades), and more. If no methods specified, defaults to get_info.',
|
|
844
|
+
param_model=SkillFinanceAction,
|
|
845
|
+
)
|
|
846
|
+
async def skill_finance(
|
|
847
|
+
params: SkillFinanceAction,
|
|
848
|
+
):
|
|
849
|
+
"""
|
|
850
|
+
Skill: Get comprehensive financial data using Yahoo Finance
|
|
851
|
+
|
|
852
|
+
Available methods include:
|
|
853
|
+
- get_info: Company information including sector, industry, market cap, business summary
|
|
854
|
+
- get_history: Historical stock prices and volume data over time periods
|
|
855
|
+
- get_news: Latest news articles about the company
|
|
856
|
+
- get_dividends: Historical dividend payments and yield data
|
|
857
|
+
- get_earnings: Quarterly and annual earnings data and growth trends
|
|
858
|
+
- get_recommendations: Analyst recommendations, price targets, and ratings
|
|
859
|
+
- get_balance_sheet: Company balance sheet data (assets, liabilities, equity)
|
|
860
|
+
- get_income_stmt: Income statement data (revenue, expenses, profit)
|
|
861
|
+
- get_cashflow: Cash flow statement data (operating, investing, financing)
|
|
862
|
+
- get_fast_info: Quick statistics like current price, volume, market cap
|
|
863
|
+
- get_institutional_holders: Institutional ownership and holdings data
|
|
864
|
+
- get_major_holders: Major shareholders and insider ownership percentages
|
|
865
|
+
- get_sustainability: ESG (Environmental, Social, Governance) scores and data
|
|
866
|
+
- get_upgrades_downgrades: Recent analyst upgrades and downgrades
|
|
867
|
+
- get_splits: Historical stock splits and stock split dates
|
|
868
|
+
- get_actions: Corporate actions including dividends and splits
|
|
869
|
+
- get_sec_filings: Recent SEC filings and regulatory documents
|
|
870
|
+
- get_calendar: Upcoming earnings dates and events
|
|
871
|
+
- get_mutualfund_holders: Mutual fund ownership data
|
|
872
|
+
- get_insider_purchases: Recent insider buying activity
|
|
873
|
+
- get_insider_transactions: All insider trading transactions
|
|
874
|
+
- get_shares: Outstanding shares and float data
|
|
875
|
+
"""
|
|
876
|
+
try:
|
|
877
|
+
# Default to get_info if no methods specified
|
|
878
|
+
methods = params.methods if params.methods else [FinanceMethod.GET_INFO]
|
|
879
|
+
|
|
880
|
+
# Convert string methods to FinanceMethod enum if needed
|
|
881
|
+
if methods and isinstance(methods[0], str):
|
|
882
|
+
try:
|
|
883
|
+
methods = [FinanceMethod(method) for method in methods]
|
|
884
|
+
except ValueError as e:
|
|
885
|
+
available_methods = [method.value for method in FinanceMethod]
|
|
886
|
+
return ActionResult(
|
|
887
|
+
error=f'Invalid method in {methods}. Available methods: {available_methods}'
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
# Create data retriever with symbol
|
|
891
|
+
retriever = FinanceDataRetriever(params.symbol)
|
|
892
|
+
|
|
893
|
+
# Convert FinanceMethod enum values to strings for the retriever
|
|
894
|
+
method_strings = [method.value for method in methods]
|
|
895
|
+
|
|
896
|
+
# Retrieve financial data
|
|
897
|
+
financial_data = retriever.get_finance_data(
|
|
898
|
+
methods=method_strings,
|
|
899
|
+
period=getattr(params, 'period', '1y'),
|
|
900
|
+
start_date=getattr(params, 'start_date', None),
|
|
901
|
+
end_date=getattr(params, 'end_date', None),
|
|
902
|
+
interval=getattr(params, 'interval', '1d'),
|
|
903
|
+
num_news=getattr(params, 'num_news', 5)
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
# Format as markdown using the static method
|
|
907
|
+
markdown_content = FinanceMarkdownFormatter.format_finance_data(
|
|
908
|
+
symbol=params.symbol,
|
|
909
|
+
results=financial_data,
|
|
910
|
+
methods=method_strings
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
method_names = [method.value for method in methods]
|
|
914
|
+
logger.info(f'💹 Comprehensive finance data retrieved for {params.symbol} with methods: {method_names}')
|
|
915
|
+
|
|
916
|
+
return ActionResult(
|
|
917
|
+
extracted_content=markdown_content,
|
|
918
|
+
include_extracted_content_only_once=True,
|
|
919
|
+
long_term_memory=f'Retrieved comprehensive financial data for {params.symbol} using methods: {", ".join(method_names)}',
|
|
920
|
+
)
|
|
921
|
+
|
|
922
|
+
except Exception as e:
|
|
923
|
+
error_msg = f'❌ Failed to retrieve financial data for {params.symbol}: {str(e)}'
|
|
924
|
+
logger.error(error_msg)
|
|
925
|
+
return ActionResult(error=error_msg)
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
async def _extract_google_results_rule_based(self, browser_session):
|
|
929
|
+
"""Rule-based extraction of Google search results using JavaScript"""
|
|
930
|
+
try:
|
|
931
|
+
cdp_session = await browser_session.get_or_create_cdp_session()
|
|
932
|
+
|
|
933
|
+
# JavaScript code to extract Google search results using DOM selectors
|
|
934
|
+
js_extraction_code = """
|
|
935
|
+
(function() {
|
|
936
|
+
try {
|
|
937
|
+
const results = [];
|
|
938
|
+
|
|
939
|
+
// Multiple selector strategies for different Google layouts
|
|
940
|
+
const selectors = [
|
|
941
|
+
'div[data-sokoban-container] div[data-sokoban-feature]', // Standard results
|
|
942
|
+
'div.g:not(.g-blk)', // Classic results container
|
|
943
|
+
'.tF2Cxc', // Modern result container
|
|
944
|
+
'div[data-ved] h3', // Result titles
|
|
945
|
+
];
|
|
946
|
+
|
|
947
|
+
let resultElements = [];
|
|
948
|
+
|
|
949
|
+
// Try each selector until we find results
|
|
950
|
+
for (const selector of selectors) {
|
|
951
|
+
const elements = document.querySelectorAll(selector);
|
|
952
|
+
if (elements.length > 0) {
|
|
953
|
+
resultElements = Array.from(elements).slice(0, 10); // Get up to 10 results
|
|
954
|
+
break;
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
// If no results found with specific selectors, try broader search
|
|
959
|
+
if (resultElements.length === 0) {
|
|
960
|
+
// Look for any divs containing h3 elements (likely search results)
|
|
961
|
+
const h3Elements = document.querySelectorAll('h3');
|
|
962
|
+
resultElements = Array.from(h3Elements)
|
|
963
|
+
.map(h3 => h3.closest('div'))
|
|
964
|
+
.filter(div => div && div.querySelector('a[href]'))
|
|
965
|
+
.slice(0, 10);
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
for (let i = 0; i < Math.min(resultElements.length, 10); i++) {
|
|
969
|
+
const element = resultElements[i];
|
|
970
|
+
|
|
971
|
+
// Extract title
|
|
972
|
+
let title = '';
|
|
973
|
+
const titleSelectors = ['h3', '[role="heading"]', 'a > span', '.LC20lb'];
|
|
974
|
+
for (const sel of titleSelectors) {
|
|
975
|
+
const titleEl = element.querySelector(sel);
|
|
976
|
+
if (titleEl && titleEl.textContent.trim()) {
|
|
977
|
+
title = titleEl.textContent.trim();
|
|
978
|
+
break;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
// Extract URL
|
|
983
|
+
let url = '';
|
|
984
|
+
const linkSelectors = ['a[href^="http"]', 'a[href^="/url?q="]', 'a[href]'];
|
|
985
|
+
for (const sel of linkSelectors) {
|
|
986
|
+
const linkEl = element.querySelector(sel);
|
|
987
|
+
if (linkEl && linkEl.href) {
|
|
988
|
+
url = linkEl.href;
|
|
989
|
+
// Clean Google redirect URLs
|
|
990
|
+
if (url.includes('/url?q=')) {
|
|
991
|
+
const urlMatch = url.match(/[?&]q=([^&]*)/);
|
|
992
|
+
if (urlMatch) {
|
|
993
|
+
url = decodeURIComponent(urlMatch[1]);
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
break;
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// Extract summary/description
|
|
1001
|
+
let summary = '';
|
|
1002
|
+
const summarySelectors = [
|
|
1003
|
+
'.VwiC3b', // Description text
|
|
1004
|
+
'.yXK7lf', // Snippet text
|
|
1005
|
+
'[data-content-feature="1"] span',
|
|
1006
|
+
'.s', // Classic description
|
|
1007
|
+
'span:not(:has(a))'
|
|
1008
|
+
];
|
|
1009
|
+
for (const sel of summarySelectors) {
|
|
1010
|
+
const summaryEl = element.querySelector(sel);
|
|
1011
|
+
if (summaryEl && summaryEl.textContent.trim() && summaryEl.textContent.length > 10) {
|
|
1012
|
+
summary = summaryEl.textContent.trim();
|
|
1013
|
+
break;
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
// Only add if we have at least title or URL
|
|
1018
|
+
if (title || url) {
|
|
1019
|
+
results.push({
|
|
1020
|
+
title: title || 'No title',
|
|
1021
|
+
url: url || 'No URL',
|
|
1022
|
+
summary: summary || 'No description available'
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
return JSON.stringify(results);
|
|
1028
|
+
|
|
1029
|
+
} catch (e) {
|
|
1030
|
+
return JSON.stringify([{
|
|
1031
|
+
title: 'Error extracting results',
|
|
1032
|
+
url: window.location.href,
|
|
1033
|
+
summary: 'JavaScript extraction failed: ' + e.message
|
|
1034
|
+
}]);
|
|
1035
|
+
}
|
|
1036
|
+
})()
|
|
1037
|
+
"""
|
|
1038
|
+
|
|
1039
|
+
# Execute JavaScript to extract results
|
|
1040
|
+
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
1041
|
+
params={'expression': js_extraction_code, 'returnByValue': True, 'awaitPromise': True},
|
|
1042
|
+
session_id=cdp_session.session_id,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
if result.get('exceptionDetails'):
|
|
1046
|
+
logger.warning(f"JavaScript extraction failed: {result['exceptionDetails']}")
|
|
1047
|
+
return []
|
|
1048
|
+
|
|
1049
|
+
result_data = result.get('result', {})
|
|
1050
|
+
value = result_data.get('value', '[]')
|
|
1051
|
+
|
|
1052
|
+
try:
|
|
1053
|
+
extracted_results = json.loads(value)
|
|
1054
|
+
return extracted_results if isinstance(extracted_results, list) else []
|
|
1055
|
+
except (json.JSONDecodeError, ValueError):
|
|
1056
|
+
logger.warning(f"Failed to parse extraction results: {value}")
|
|
1057
|
+
return []
|
|
1058
|
+
|
|
1059
|
+
except Exception as e:
|
|
1060
|
+
logger.error(f"Rule-based extraction failed: {e}")
|
|
1061
|
+
return []
|
|
1062
|
+
|
|
821
1063
|
async def _perform_google_search(self, browser_session, query: str, llm: BaseChatModel):
|
|
822
|
-
"""Helper method to perform Google search and extract top 5 results"""
|
|
1064
|
+
"""Helper method to perform Google search and extract top 5 results using rule-based extraction"""
|
|
823
1065
|
try:
|
|
824
1066
|
# Navigate to Google search
|
|
825
1067
|
search_url = f'https://www.google.com/search?q={query}&udm=14'
|
|
826
1068
|
await browser_session.navigate_to_url(search_url, new_tab=False)
|
|
827
1069
|
|
|
828
1070
|
# Wait a moment for page to load
|
|
829
|
-
await asyncio.sleep(
|
|
830
|
-
|
|
831
|
-
#
|
|
1071
|
+
await asyncio.sleep(2)
|
|
1072
|
+
|
|
1073
|
+
# Use rule-based extraction first (much faster than LLM)
|
|
1074
|
+
search_ret_len = 10
|
|
1075
|
+
results = await self._extract_google_results_rule_based(browser_session)
|
|
1076
|
+
if results and len(results) > 0:
|
|
1077
|
+
# Rule-based extraction succeeded
|
|
1078
|
+
logger.info(f"Rule-based extraction found {len(results)} results for query: {query}")
|
|
1079
|
+
return results[:search_ret_len] # Return top 6 results
|
|
1080
|
+
|
|
1081
|
+
# Fallback to LLM extraction if rule-based fails
|
|
1082
|
+
logger.warning(f"Rule-based extraction failed for query '{query}', falling back to LLM")
|
|
1083
|
+
|
|
832
1084
|
extraction_query = f"""
|
|
833
|
-
Extract the top
|
|
1085
|
+
Extract the top {search_ret_len} search results from this Google search page. For each result, provide:
|
|
834
1086
|
- title: The clickable title/headline
|
|
835
1087
|
- url: The website URL
|
|
836
1088
|
- summary: A brief description of what this result contains
|
|
@@ -841,18 +1093,17 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
|
|
|
841
1093
|
results_text = await self._extract_structured_content(browser_session, extraction_query, llm)
|
|
842
1094
|
|
|
843
1095
|
# Try to parse JSON results
|
|
844
|
-
import json
|
|
845
1096
|
try:
|
|
846
1097
|
results = json.loads(results_text.strip())
|
|
847
1098
|
if isinstance(results, list):
|
|
848
|
-
return results[:
|
|
1099
|
+
return results[:search_ret_len] # Ensure max 5 results
|
|
849
1100
|
except (json.JSONDecodeError, ValueError):
|
|
850
1101
|
try:
|
|
851
1102
|
results = repair_json(results_text.strip())
|
|
852
1103
|
if isinstance(results, list):
|
|
853
|
-
return results[:
|
|
1104
|
+
return results[:search_ret_len] # Ensure max 5 results
|
|
854
1105
|
except Exception as e:
|
|
855
|
-
logger.warning(f"Failed to parse JSON from search results: {results_text}")
|
|
1106
|
+
logger.warning(f"Failed to parse JSON from LLM search results: {results_text}")
|
|
856
1107
|
|
|
857
1108
|
# Fallback: return raw text as single result
|
|
858
1109
|
current_url = await browser_session.get_current_page_url()
|
|
@@ -866,6 +1117,74 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
|
|
|
866
1117
|
logger.error(f"Google search failed for query '{query}': {e}")
|
|
867
1118
|
return []
|
|
868
1119
|
|
|
1120
|
+
def _rule_based_deduplication(self, results):
|
|
1121
|
+
"""Rule-based deduplication to reduce dataset before LLM processing"""
|
|
1122
|
+
if not results:
|
|
1123
|
+
return []
|
|
1124
|
+
|
|
1125
|
+
deduplicated = []
|
|
1126
|
+
seen_urls = set()
|
|
1127
|
+
seen_titles = set()
|
|
1128
|
+
|
|
1129
|
+
for result in results:
|
|
1130
|
+
url = result.get('url', '').strip()
|
|
1131
|
+
title = result.get('title', '').strip().lower()
|
|
1132
|
+
|
|
1133
|
+
# Skip results with missing essential data
|
|
1134
|
+
if not url or not title or url == 'No URL' or title == 'no title':
|
|
1135
|
+
continue
|
|
1136
|
+
|
|
1137
|
+
# Normalize URL for comparison (remove fragments, query params for deduplication)
|
|
1138
|
+
normalized_url = url.split('#')[0].split('?')[0].lower()
|
|
1139
|
+
|
|
1140
|
+
# Check for duplicate URLs
|
|
1141
|
+
if normalized_url in seen_urls:
|
|
1142
|
+
continue
|
|
1143
|
+
|
|
1144
|
+
# Check for very similar titles (basic similarity)
|
|
1145
|
+
title_normalized = ''.join(c for c in title if c.isalnum()).lower()
|
|
1146
|
+
if len(title_normalized) > 10: # Only check titles with substantial content
|
|
1147
|
+
similar_found = False
|
|
1148
|
+
for seen_title in seen_titles:
|
|
1149
|
+
# Simple similarity check: if 80% of characters match
|
|
1150
|
+
if len(title_normalized) > 0 and len(seen_title) > 0:
|
|
1151
|
+
common_chars = sum(1 for c in title_normalized if c in seen_title)
|
|
1152
|
+
similarity = common_chars / max(len(title_normalized), len(seen_title))
|
|
1153
|
+
if similarity > 0.8:
|
|
1154
|
+
similar_found = True
|
|
1155
|
+
break
|
|
1156
|
+
|
|
1157
|
+
if similar_found:
|
|
1158
|
+
continue
|
|
1159
|
+
|
|
1160
|
+
# Add to deduplicated results
|
|
1161
|
+
seen_urls.add(normalized_url)
|
|
1162
|
+
seen_titles.add(title_normalized)
|
|
1163
|
+
deduplicated.append(result)
|
|
1164
|
+
|
|
1165
|
+
# Sort by relevance indicators (prioritize results with longer summaries, non-generic titles)
|
|
1166
|
+
def relevance_score(result):
|
|
1167
|
+
score = 0
|
|
1168
|
+
title = result.get('title', '')
|
|
1169
|
+
summary = result.get('summary', '')
|
|
1170
|
+
|
|
1171
|
+
# Longer summaries are typically more informative
|
|
1172
|
+
score += min(len(summary), 200) / 10
|
|
1173
|
+
|
|
1174
|
+
# Non-generic titles score higher
|
|
1175
|
+
generic_terms = ['search results', 'no title', 'error', 'loading']
|
|
1176
|
+
if not any(term in title.lower() for term in generic_terms):
|
|
1177
|
+
score += 10
|
|
1178
|
+
|
|
1179
|
+
# Prefer results with actual descriptions
|
|
1180
|
+
if summary and summary != 'No description available' and len(summary) > 20:
|
|
1181
|
+
score += 5
|
|
1182
|
+
|
|
1183
|
+
return score
|
|
1184
|
+
|
|
1185
|
+
deduplicated.sort(key=relevance_score, reverse=True)
|
|
1186
|
+
return deduplicated
|
|
1187
|
+
|
|
869
1188
|
async def _extract_structured_content(self, browser_session, query: str, llm: BaseChatModel):
|
|
870
1189
|
"""Helper method to extract structured content from current page"""
|
|
871
1190
|
MAX_CHAR_LIMIT = 30000
|
|
@@ -1379,7 +1698,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
|
|
|
1379
1698
|
async def write_file(
|
|
1380
1699
|
file_path: str,
|
|
1381
1700
|
content: str,
|
|
1382
|
-
file_system:
|
|
1701
|
+
file_system: CustomFileSystem,
|
|
1383
1702
|
append: bool = False,
|
|
1384
1703
|
trailing_newline: bool = True,
|
|
1385
1704
|
leading_newline: bool = False,
|
vibe_surf/tools/views.py
CHANGED
|
@@ -178,3 +178,36 @@ class SkillCodeAction(BaseModel):
|
|
|
178
178
|
max_length=4,
|
|
179
179
|
description='Optional 4 character Tab ID to execute code on specific tab',
|
|
180
180
|
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class SkillFinanceAction(BaseModel):
|
|
184
|
+
"""Parameters for skill_finance action"""
|
|
185
|
+
symbol: str = Field(
|
|
186
|
+
description='Stock symbol to retrieve financial data for (e.g., AAPL, GOOG, TSLA)',
|
|
187
|
+
)
|
|
188
|
+
methods: list[str] | None = Field(
|
|
189
|
+
default=None,
|
|
190
|
+
description='List of finance methods to retrieve. Common methods: get_info (basic company info), get_history (stock price history), get_news (latest news), get_dividends (dividend history), get_earnings (earnings data), get_fast_info (quick stats), get_recommendations (analyst recommendations), get_financials (income statement), get_balance_sheet (balance sheet), get_cashflow (cash flow). If empty, defaults to get_info. Full list available in FinanceMethod enum.',
|
|
191
|
+
)
|
|
192
|
+
period: str = Field(
|
|
193
|
+
default='1y',
|
|
194
|
+
description='Time period for historical data (1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max)',
|
|
195
|
+
)
|
|
196
|
+
start_date: str | None = Field(
|
|
197
|
+
default=None,
|
|
198
|
+
description='Start date for historical data (YYYY-MM-DD format). Use with end_date instead of period.',
|
|
199
|
+
)
|
|
200
|
+
end_date: str | None = Field(
|
|
201
|
+
default=None,
|
|
202
|
+
description='End date for historical data (YYYY-MM-DD format). Use with start_date instead of period.',
|
|
203
|
+
)
|
|
204
|
+
interval: str = Field(
|
|
205
|
+
default='1d',
|
|
206
|
+
description='Data interval for historical data (1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo)',
|
|
207
|
+
)
|
|
208
|
+
num_news: int = Field(
|
|
209
|
+
default=5,
|
|
210
|
+
description='Number of news articles to retrieve when get_news method is selected',
|
|
211
|
+
ge=1,
|
|
212
|
+
le=20,
|
|
213
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vibesurf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.25
|
|
4
4
|
Summary: VibeSurf: A powerful browser assistant for vibe surfing
|
|
5
5
|
Author: Shao Warm
|
|
6
6
|
License: Apache-2.0
|
|
@@ -43,6 +43,7 @@ Requires-Dist: nanoid>=2.0.0
|
|
|
43
43
|
Requires-Dist: markdownify>=1.2.0
|
|
44
44
|
Requires-Dist: pathvalidate>=3.3.1
|
|
45
45
|
Requires-Dist: dashscope>=1.24.5
|
|
46
|
+
Requires-Dist: yfinance>=0.2.66
|
|
46
47
|
Dynamic: license-file
|
|
47
48
|
|
|
48
49
|
# VibeSurf: A powerful browser assistant for vibe surfing
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
vibe_surf/__init__.py,sha256=WtduuMFGauMD_9dpk4fnRnLTAP6ka9Lfu0feAFNzLfo,339
|
|
2
|
-
vibe_surf/_version.py,sha256=
|
|
2
|
+
vibe_surf/_version.py,sha256=VIORluFSyo8DggJNI3m2ltXngK-bmCHX8hSwlGrwopY,706
|
|
3
3
|
vibe_surf/cli.py,sha256=pbep2dBeQqralZ8AggkH4h2nayBarbdN8lhZxo35gNU,16689
|
|
4
4
|
vibe_surf/common.py,sha256=_WWMxen5wFwzUjEShn3yDVC1OBFUiJ6Vccadi6tuG6w,1215
|
|
5
5
|
vibe_surf/logger.py,sha256=k53MFA96QX6t9OfcOf1Zws8PP0OOqjVJfhUD3Do9lKw,3043
|
|
6
6
|
vibe_surf/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
vibe_surf/agents/browser_use_agent.py,sha256=jeUYV7yk6vyycw6liju_597GdjB3CW_B2wEhn2F0ekk,45957
|
|
8
8
|
vibe_surf/agents/report_writer_agent.py,sha256=pCF2k6VLyO-sSviGBqqIyVD3SLqaZtSqiW3kvNfPY1I,20967
|
|
9
|
-
vibe_surf/agents/vibe_surf_agent.py,sha256=
|
|
9
|
+
vibe_surf/agents/vibe_surf_agent.py,sha256=sTUO4xAiznr7RRzdrRYzXENos9XovicZw8ow2UuJsyI,74286
|
|
10
10
|
vibe_surf/agents/views.py,sha256=yHjNJloa-aofVTGyuRy08tBYP_Y3XLqt1DUWOUmHRng,4825
|
|
11
11
|
vibe_surf/agents/prompts/__init__.py,sha256=l4ieA0D8kLJthyNN85FKLNe4ExBa3stY3l-aImLDRD0,36
|
|
12
12
|
vibe_surf/agents/prompts/report_writer_prompt.py,sha256=sZE8MUT1CDLmRzbnbEQzAvTwJjpITgh2Q8g1_eXmkzE,4454
|
|
@@ -20,7 +20,7 @@ vibe_surf/backend/api/__init__.py,sha256=XxF1jUOORpLYCfFuPrrnUGRnOrr6ClH0_MNPU-4
|
|
|
20
20
|
vibe_surf/backend/api/activity.py,sha256=_cnHusqolt5Hf3KdAf6FK-3sBc-TSaadmb5dJxGI57A,9398
|
|
21
21
|
vibe_surf/backend/api/agent.py,sha256=ISsG3FUIYoUCGcoQAfV3T6mtJSKHxC809p4bqjzjqlU,1199
|
|
22
22
|
vibe_surf/backend/api/browser.py,sha256=NXedyZG3NIVRIx5O7d9mHwVWX-Q4_KsX5mSgfKt8UEA,2122
|
|
23
|
-
vibe_surf/backend/api/config.py,sha256=
|
|
23
|
+
vibe_surf/backend/api/config.py,sha256=vKY6ZnKZeazQP9qqUEiQvP9HoPtJbAzETORuPWZomGw,27272
|
|
24
24
|
vibe_surf/backend/api/files.py,sha256=kJMG9MWECKXwGh64Q6xvAzNjeZGcLhIEnn65HiMZHKE,11762
|
|
25
25
|
vibe_surf/backend/api/models.py,sha256=n_bu8vavvO8bIKA1WUAbaGPFeZKeamMJelDWU3DlFJc,10533
|
|
26
26
|
vibe_surf/backend/api/task.py,sha256=vpQMOn6YBuD_16jzfUajUvBYaydC0jj8Ny3WOJDVuck,14359
|
|
@@ -36,7 +36,7 @@ vibe_surf/backend/database/migrations/v003_fix_task_status_case.sql,sha256=npzRg
|
|
|
36
36
|
vibe_surf/backend/database/migrations/v004_add_voice_profiles.sql,sha256=-9arjQBF-OxvFIOwkEl7JJJRDTS_nJ8GNX3T7bJgVq0,1321
|
|
37
37
|
vibe_surf/backend/utils/__init__.py,sha256=V8leMFp7apAglUAoCHPZrNNcRHthSLYIudIJE5qwjb0,184
|
|
38
38
|
vibe_surf/backend/utils/encryption.py,sha256=CjLNh_n0Luhfa-6BB-icfzkiiDqj5b4Gu6MADU3p2eM,3754
|
|
39
|
-
vibe_surf/backend/utils/llm_factory.py,sha256=
|
|
39
|
+
vibe_surf/backend/utils/llm_factory.py,sha256=KF84YYgPaOF0_1P_IF0cAtY1kua0D-8gEP2NoSu2UZM,9033
|
|
40
40
|
vibe_surf/browser/__init__.py,sha256=_UToO2fZfSCrfjOcxhn4Qq7ZLbYeyPuUUEmqIva-Yv8,325
|
|
41
41
|
vibe_surf/browser/agen_browser_profile.py,sha256=J06hCBJSJ-zAFVM9yDFz8UpmiLuFyWke1EMekpU45eo,5871
|
|
42
42
|
vibe_surf/browser/agent_browser_session.py,sha256=xV0nHo_TCb7b7QYhIee4cLzH-1rqJswYwH7GEwyQmqc,33980
|
|
@@ -85,19 +85,20 @@ vibe_surf/chrome_extension/styles/settings-responsive.css,sha256=jLE0yG15n2aI6_6
|
|
|
85
85
|
vibe_surf/chrome_extension/styles/settings-utilities.css,sha256=3PuQS2857kg83d5erLbLdo_7J95-qV-qyNWS5M-w1oQ,505
|
|
86
86
|
vibe_surf/chrome_extension/styles/variables.css,sha256=enjyhsa0PeU3b-3uiXa-VkV-1-h2-Ai3m4KpmC2k0rY,2984
|
|
87
87
|
vibe_surf/llm/__init__.py,sha256=_vDVPo6STf343p1SgMQrF5023hicAx0g83pK2Gbk4Ek,601
|
|
88
|
-
vibe_surf/llm/openai_compatible.py,sha256=
|
|
88
|
+
vibe_surf/llm/openai_compatible.py,sha256=rZfqjUggvftGGy76HQO3r3sumd2i_3iAuL52JyxkQUY,16113
|
|
89
89
|
vibe_surf/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
90
|
vibe_surf/tools/browser_use_tools.py,sha256=tacxKUJL6uOt04f52_iIw1cs-FT-mBgIPmAsIc4Hww0,23730
|
|
91
|
-
vibe_surf/tools/file_system.py,sha256=
|
|
91
|
+
vibe_surf/tools/file_system.py,sha256=Tw_6J5QjCahQ3fd26CXziF1zPvRxhYM0889oK4bDhlU,19304
|
|
92
|
+
vibe_surf/tools/finance_tools.py,sha256=E8rmblp57e_cp0tFbdZ7BY3_upNlk4Whk0bYc_SFCJE,27284
|
|
92
93
|
vibe_surf/tools/mcp_client.py,sha256=OeCoTgyx4MoY7JxXndK6pGHIoyFOhf5r7XCbx25y1Ec,2446
|
|
93
|
-
vibe_surf/tools/report_writer_tools.py,sha256=
|
|
94
|
+
vibe_surf/tools/report_writer_tools.py,sha256=2CyTTXOahTKZo7XwyWDDhJ--1mRA0uTtUWxu_DACAY0,776
|
|
94
95
|
vibe_surf/tools/vibesurf_registry.py,sha256=Z-8d9BrJl3RFMEK0Tw1Q5xNHX2kZGsnIGCTBZ3RM-pw,2159
|
|
95
|
-
vibe_surf/tools/vibesurf_tools.py,sha256=
|
|
96
|
-
vibe_surf/tools/views.py,sha256=
|
|
96
|
+
vibe_surf/tools/vibesurf_tools.py,sha256=USmSqSc03h-FsuzvOcN_S8f3hHVJ-WEx0V5V8RxskoE,90101
|
|
97
|
+
vibe_surf/tools/views.py,sha256=AEAPzML-lqWJ7dBMjXTl7o-rk4hp5PGaPRqLyilJUl8,7789
|
|
97
98
|
vibe_surf/tools/voice_asr.py,sha256=AJG0yq_Jq-j8ulDlbPhVFfK1jch9_ASesis73iki9II,4702
|
|
98
|
-
vibesurf-0.1.
|
|
99
|
-
vibesurf-0.1.
|
|
100
|
-
vibesurf-0.1.
|
|
101
|
-
vibesurf-0.1.
|
|
102
|
-
vibesurf-0.1.
|
|
103
|
-
vibesurf-0.1.
|
|
99
|
+
vibesurf-0.1.25.dist-info/licenses/LICENSE,sha256=czn6QYya0-jhLnStD9JqnMS-hwP5wRByipkrGTvoXLI,11355
|
|
100
|
+
vibesurf-0.1.25.dist-info/METADATA,sha256=cplQA5KwaRfM0Hy8l7TFXjkxsxmF-xyL2WbJWZ7FOUg,5190
|
|
101
|
+
vibesurf-0.1.25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
102
|
+
vibesurf-0.1.25.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
|
|
103
|
+
vibesurf-0.1.25.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
|
|
104
|
+
vibesurf-0.1.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|