vibesurf 0.1.32__py3-none-any.whl → 0.1.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vibesurf might be problematic. Click here for more details.
- vibe_surf/_version.py +2 -2
- vibe_surf/agents/browser_use_agent.py +1 -1
- vibe_surf/agents/prompts/vibe_surf_prompt.py +6 -0
- vibe_surf/agents/report_writer_agent.py +50 -0
- vibe_surf/agents/vibe_surf_agent.py +55 -0
- vibe_surf/backend/api/composio.py +952 -0
- vibe_surf/backend/database/migrations/v005_add_composio_integration.sql +33 -0
- vibe_surf/backend/database/migrations/v006_add_credentials_table.sql +26 -0
- vibe_surf/backend/database/models.py +53 -1
- vibe_surf/backend/database/queries.py +312 -2
- vibe_surf/backend/main.py +28 -0
- vibe_surf/backend/shared_state.py +123 -9
- vibe_surf/chrome_extension/scripts/api-client.js +32 -0
- vibe_surf/chrome_extension/scripts/settings-manager.js +954 -1
- vibe_surf/chrome_extension/sidepanel.html +190 -0
- vibe_surf/chrome_extension/styles/settings-integrations.css +927 -0
- vibe_surf/chrome_extension/styles/settings-modal.css +7 -3
- vibe_surf/chrome_extension/styles/settings-responsive.css +37 -5
- vibe_surf/cli.py +98 -3
- vibe_surf/telemetry/__init__.py +60 -0
- vibe_surf/telemetry/service.py +112 -0
- vibe_surf/telemetry/views.py +156 -0
- vibe_surf/tools/composio_client.py +456 -0
- vibe_surf/tools/mcp_client.py +21 -2
- vibe_surf/tools/vibesurf_tools.py +290 -87
- vibe_surf/tools/views.py +16 -0
- vibe_surf/tools/website_api/youtube/client.py +35 -13
- vibe_surf/utils.py +13 -0
- {vibesurf-0.1.32.dist-info → vibesurf-0.1.34.dist-info}/METADATA +11 -9
- {vibesurf-0.1.32.dist-info → vibesurf-0.1.34.dist-info}/RECORD +34 -25
- {vibesurf-0.1.32.dist-info → vibesurf-0.1.34.dist-info}/WHEEL +0 -0
- {vibesurf-0.1.32.dist-info → vibesurf-0.1.34.dist-info}/entry_points.txt +0 -0
- {vibesurf-0.1.32.dist-info → vibesurf-0.1.34.dist-info}/licenses/LICENSE +0 -0
- {vibesurf-0.1.32.dist-info → vibesurf-0.1.34.dist-info}/top_level.txt +0 -0
|
@@ -32,9 +32,10 @@ from vibe_surf.browser.agent_browser_session import AgentBrowserSession
|
|
|
32
32
|
from vibe_surf.tools.views import HoverAction, ExtractionAction, FileExtractionAction, BrowserUseAgentExecution, \
|
|
33
33
|
ReportWriterTask, TodoGenerateAction, TodoModifyAction, VibeSurfDoneAction, SkillSearchAction, SkillCrawlAction, \
|
|
34
34
|
SkillSummaryAction, SkillTakeScreenshotAction, SkillDeepResearchAction, SkillCodeAction, SkillFinanceAction, \
|
|
35
|
-
SkillXhsAction, SkillDouyinAction, SkillYoutubeAction, SkillWeiboAction
|
|
35
|
+
SkillXhsAction, SkillDouyinAction, SkillYoutubeAction, SkillWeiboAction, GrepContentAction
|
|
36
36
|
from vibe_surf.tools.finance_tools import FinanceDataRetriever, FinanceMarkdownFormatter, FinanceMethod
|
|
37
37
|
from vibe_surf.tools.mcp_client import CustomMCPClient
|
|
38
|
+
from vibe_surf.tools.composio_client import ComposioClient
|
|
38
39
|
from vibe_surf.tools.file_system import CustomFileSystem
|
|
39
40
|
from vibe_surf.browser.browser_manager import BrowserManager
|
|
40
41
|
from vibe_surf.tools.vibesurf_registry import VibeSurfRegistry
|
|
@@ -166,7 +167,8 @@ def convert_selector_map_for_llm(selector_map) -> dict:
|
|
|
166
167
|
|
|
167
168
|
|
|
168
169
|
class VibeSurfTools:
|
|
169
|
-
def __init__(self, exclude_actions: list[str] = [], mcp_server_config: Optional[Dict[str, Any]] = None
|
|
170
|
+
def __init__(self, exclude_actions: list[str] = [], mcp_server_config: Optional[Dict[str, Any]] = None,
|
|
171
|
+
composio_client: ComposioClient = None):
|
|
170
172
|
self.registry = VibeSurfRegistry(exclude_actions)
|
|
171
173
|
self._register_file_actions()
|
|
172
174
|
self._register_browser_use_agent()
|
|
@@ -176,6 +178,7 @@ class VibeSurfTools:
|
|
|
176
178
|
self._register_skills()
|
|
177
179
|
self.mcp_server_config = mcp_server_config
|
|
178
180
|
self.mcp_clients: Dict[str, MCPClient] = {}
|
|
181
|
+
self.composio_client: ComposioClient = composio_client
|
|
179
182
|
|
|
180
183
|
def _register_skills(self):
|
|
181
184
|
@self.registry.action(
|
|
@@ -290,7 +293,7 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5", "query 6
|
|
|
290
293
|
"url": result.get('url', 'No URL'),
|
|
291
294
|
"summary": result.get('summary', 'No summary available')
|
|
292
295
|
})
|
|
293
|
-
|
|
296
|
+
|
|
294
297
|
ranking_prompt = f"""
|
|
295
298
|
Rank these search results for the query "{params.query}" by relevance and value.
|
|
296
299
|
Select the TOP 10 most relevant and valuable results.
|
|
@@ -315,7 +318,8 @@ Format: [index1, index2, index3, ...]
|
|
|
315
318
|
if not isinstance(selected_indices, list):
|
|
316
319
|
raise ValueError("Invalid ranking results format")
|
|
317
320
|
# Ensure indices are valid and limit to 10
|
|
318
|
-
valid_indices = [i for i in selected_indices if
|
|
321
|
+
valid_indices = [i for i in selected_indices if
|
|
322
|
+
isinstance(i, int) and 0 <= i < len(all_results)][:10]
|
|
319
323
|
if valid_indices:
|
|
320
324
|
top_results = [all_results[i] for i in valid_indices]
|
|
321
325
|
else:
|
|
@@ -325,7 +329,8 @@ Format: [index1, index2, index3, ...]
|
|
|
325
329
|
selected_indices_s = repair_json(ranking_response.completion.strip())
|
|
326
330
|
selected_indices = json.loads(selected_indices_s)
|
|
327
331
|
if isinstance(selected_indices, list):
|
|
328
|
-
valid_indices = [i for i in selected_indices if
|
|
332
|
+
valid_indices = [i for i in selected_indices if
|
|
333
|
+
isinstance(i, int) and 0 <= i < len(all_results)][:10]
|
|
329
334
|
if valid_indices:
|
|
330
335
|
top_results = [all_results[i] for i in valid_indices]
|
|
331
336
|
else:
|
|
@@ -897,7 +902,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
897
902
|
try:
|
|
898
903
|
# Default to get_info if no methods specified
|
|
899
904
|
methods = params.methods if params.methods else [FinanceMethod.GET_INFO]
|
|
900
|
-
|
|
905
|
+
|
|
901
906
|
# Convert string methods to FinanceMethod enum if needed
|
|
902
907
|
if methods and isinstance(methods[0], str):
|
|
903
908
|
try:
|
|
@@ -907,13 +912,13 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
907
912
|
return ActionResult(
|
|
908
913
|
error=f'Invalid method in {methods}. Available methods: {available_methods}'
|
|
909
914
|
)
|
|
910
|
-
|
|
915
|
+
|
|
911
916
|
# Create data retriever with symbol
|
|
912
917
|
retriever = FinanceDataRetriever(params.symbol)
|
|
913
|
-
|
|
918
|
+
|
|
914
919
|
# Convert FinanceMethod enum values to strings for the retriever
|
|
915
920
|
method_strings = [method.value for method in methods]
|
|
916
|
-
|
|
921
|
+
|
|
917
922
|
# Retrieve financial data
|
|
918
923
|
financial_data = retriever.get_finance_data(
|
|
919
924
|
methods=method_strings,
|
|
@@ -923,29 +928,28 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
923
928
|
interval=getattr(params, 'interval', '1d'),
|
|
924
929
|
num_news=getattr(params, 'num_news', 5)
|
|
925
930
|
)
|
|
926
|
-
|
|
931
|
+
|
|
927
932
|
# Format as markdown using the static method
|
|
928
933
|
markdown_content = FinanceMarkdownFormatter.format_finance_data(
|
|
929
934
|
symbol=params.symbol,
|
|
930
935
|
results=financial_data,
|
|
931
936
|
methods=method_strings
|
|
932
937
|
)
|
|
933
|
-
|
|
938
|
+
|
|
934
939
|
method_names = [method.value for method in methods]
|
|
935
940
|
logger.info(f'💹 Comprehensive finance data retrieved for {params.symbol} with methods: {method_names}')
|
|
936
|
-
|
|
941
|
+
|
|
937
942
|
return ActionResult(
|
|
938
943
|
extracted_content=markdown_content,
|
|
939
944
|
include_extracted_content_only_once=True,
|
|
940
945
|
long_term_memory=f'Retrieved comprehensive financial data for {params.symbol} using methods: {", ".join(method_names)}',
|
|
941
946
|
)
|
|
942
|
-
|
|
947
|
+
|
|
943
948
|
except Exception as e:
|
|
944
949
|
error_msg = f'❌ Failed to retrieve financial data for {params.symbol}: {str(e)}'
|
|
945
950
|
logger.error(error_msg)
|
|
946
951
|
return ActionResult(error=error_msg, extracted_content=error_msg)
|
|
947
952
|
|
|
948
|
-
|
|
949
953
|
@self.registry.action(
|
|
950
954
|
'Skill: Xiaohongshu API - Access Xiaohongshu (Little Red Book) platform data including search, content details, comments, user profiles, and recommendations. Methods: search_content_by_keyword, fetch_content_details, fetch_all_content_comments, get_user_profile, fetch_all_user_content, get_home_recommendations.',
|
|
951
955
|
param_model=SkillXhsAction,
|
|
@@ -968,11 +972,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
968
972
|
"""
|
|
969
973
|
try:
|
|
970
974
|
from vibe_surf.tools.website_api.xhs.client import XiaoHongShuApiClient
|
|
971
|
-
|
|
975
|
+
|
|
972
976
|
# Initialize client
|
|
973
977
|
xhs_client = XiaoHongShuApiClient(browser_session=browser_manager.main_browser_session)
|
|
974
978
|
await xhs_client.setup()
|
|
975
|
-
|
|
979
|
+
|
|
976
980
|
# Parse params JSON string
|
|
977
981
|
import json
|
|
978
982
|
from json_repair import repair_json
|
|
@@ -980,7 +984,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
980
984
|
method_params = json.loads(params.params)
|
|
981
985
|
except json.JSONDecodeError:
|
|
982
986
|
method_params = json.loads(repair_json(params.params))
|
|
983
|
-
|
|
987
|
+
|
|
984
988
|
# Execute the requested method
|
|
985
989
|
result = None
|
|
986
990
|
if params.method == "search_content_by_keyword":
|
|
@@ -997,23 +1001,23 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
997
1001
|
result = await xhs_client.get_home_recommendations()
|
|
998
1002
|
else:
|
|
999
1003
|
return ActionResult(error=f"Unknown method: {params.method}")
|
|
1000
|
-
|
|
1004
|
+
|
|
1001
1005
|
# Save result to file
|
|
1002
1006
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1003
1007
|
filename = f"xhs_{params.method}_{timestamp}.json"
|
|
1004
1008
|
filepath = file_system.get_dir() / "data" / filename
|
|
1005
1009
|
filepath.parent.mkdir(exist_ok=True)
|
|
1006
|
-
|
|
1010
|
+
|
|
1007
1011
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
1008
1012
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
1009
|
-
|
|
1013
|
+
|
|
1010
1014
|
# Format result as markdown
|
|
1011
1015
|
if isinstance(result, list):
|
|
1012
1016
|
display_count = min(5, len(result))
|
|
1013
1017
|
md_content = f"## Xiaohongshu {params.method.replace('_', ' ').title()}\n\n"
|
|
1014
1018
|
md_content += f"Showing {display_count} of {len(result)} results:\n\n"
|
|
1015
1019
|
for i, item in enumerate(result[:display_count]):
|
|
1016
|
-
md_content += f"### Result {i+1}\n"
|
|
1020
|
+
md_content += f"### Result {i + 1}\n"
|
|
1017
1021
|
for key, value in item.items():
|
|
1018
1022
|
if not value:
|
|
1019
1023
|
continue
|
|
@@ -1030,27 +1034,26 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1030
1034
|
else:
|
|
1031
1035
|
md_content += f"- **{key}**: {value}\n"
|
|
1032
1036
|
md_content += "\n"
|
|
1033
|
-
|
|
1037
|
+
|
|
1034
1038
|
# Add file path to markdown
|
|
1035
1039
|
relative_path = str(filepath.relative_to(file_system.get_dir()))
|
|
1036
1040
|
md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
|
|
1037
1041
|
md_content += f"> 💡 Click the link above to view all results.\n"
|
|
1038
|
-
|
|
1042
|
+
|
|
1039
1043
|
logger.info(f'📕 Xiaohongshu data retrieved with method: {params.method}')
|
|
1040
|
-
|
|
1044
|
+
|
|
1041
1045
|
# Close client
|
|
1042
1046
|
await xhs_client.close()
|
|
1043
|
-
|
|
1047
|
+
|
|
1044
1048
|
return ActionResult(
|
|
1045
1049
|
extracted_content=md_content
|
|
1046
1050
|
)
|
|
1047
|
-
|
|
1051
|
+
|
|
1048
1052
|
except Exception as e:
|
|
1049
1053
|
error_msg = f'❌ Failed to retrieve Xiaohongshu data: {str(e)}'
|
|
1050
1054
|
logger.error(error_msg)
|
|
1051
1055
|
return ActionResult(error=error_msg, extracted_content=error_msg)
|
|
1052
1056
|
|
|
1053
|
-
|
|
1054
1057
|
@self.registry.action(
|
|
1055
1058
|
'Skill: Weibo API - Access Weibo platform data including search, post details, comments, user profiles, hot posts, and trending lists. Methods: search_posts_by_keyword, get_post_detail, get_all_post_comments, get_user_info, get_all_user_posts, get_hot_posts(推荐榜), get_trending_posts(热搜榜).',
|
|
1056
1059
|
param_model=SkillWeiboAction,
|
|
@@ -1074,11 +1077,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1074
1077
|
"""
|
|
1075
1078
|
try:
|
|
1076
1079
|
from vibe_surf.tools.website_api.weibo.client import WeiboApiClient
|
|
1077
|
-
|
|
1080
|
+
|
|
1078
1081
|
# Initialize client
|
|
1079
1082
|
wb_client = WeiboApiClient(browser_session=browser_manager.main_browser_session)
|
|
1080
1083
|
await wb_client.setup()
|
|
1081
|
-
|
|
1084
|
+
|
|
1082
1085
|
# Parse params JSON string
|
|
1083
1086
|
import json
|
|
1084
1087
|
from json_repair import repair_json
|
|
@@ -1086,7 +1089,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1086
1089
|
method_params = json.loads(params.params)
|
|
1087
1090
|
except json.JSONDecodeError:
|
|
1088
1091
|
method_params = json.loads(repair_json(params.params))
|
|
1089
|
-
|
|
1092
|
+
|
|
1090
1093
|
# Execute the requested method
|
|
1091
1094
|
result = None
|
|
1092
1095
|
if params.method == "search_posts_by_keyword":
|
|
@@ -1105,13 +1108,13 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1105
1108
|
result = await wb_client.get_trending_posts()
|
|
1106
1109
|
else:
|
|
1107
1110
|
return ActionResult(error=f"Unknown method: {params.method}")
|
|
1108
|
-
|
|
1111
|
+
|
|
1109
1112
|
# Save result to file
|
|
1110
1113
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1111
1114
|
filename = f"weibo_{params.method}_{timestamp}.json"
|
|
1112
1115
|
filepath = file_system.get_dir() / "data" / filename
|
|
1113
1116
|
filepath.parent.mkdir(exist_ok=True)
|
|
1114
|
-
|
|
1117
|
+
|
|
1115
1118
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
1116
1119
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
1117
1120
|
# Format result as markdown
|
|
@@ -1120,7 +1123,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1120
1123
|
md_content = f"## Weibo {params.method.replace('_', ' ').title()}\n\n"
|
|
1121
1124
|
md_content += f"Showing {display_count} of {len(result)} results:\n\n"
|
|
1122
1125
|
for i, item in enumerate(result[:display_count]):
|
|
1123
|
-
md_content += f"### Result {i+1}\n"
|
|
1126
|
+
md_content += f"### Result {i + 1}\n"
|
|
1124
1127
|
for key, value in item.items():
|
|
1125
1128
|
if not value:
|
|
1126
1129
|
continue
|
|
@@ -1137,21 +1140,21 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1137
1140
|
else:
|
|
1138
1141
|
md_content += f"- **{key}**: {value}\n"
|
|
1139
1142
|
md_content += "\n"
|
|
1140
|
-
|
|
1143
|
+
|
|
1141
1144
|
# Add file path to markdown
|
|
1142
1145
|
relative_path = str(filepath.relative_to(file_system.get_dir()))
|
|
1143
1146
|
md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
|
|
1144
1147
|
md_content += f"> 💡 Click the link above to view all results.\n"
|
|
1145
|
-
|
|
1148
|
+
|
|
1146
1149
|
logger.info(f'🐦 Weibo data retrieved with method: {params.method}')
|
|
1147
|
-
|
|
1150
|
+
|
|
1148
1151
|
# Close client
|
|
1149
1152
|
await wb_client.close()
|
|
1150
|
-
|
|
1153
|
+
|
|
1151
1154
|
return ActionResult(
|
|
1152
1155
|
extracted_content=md_content
|
|
1153
1156
|
)
|
|
1154
|
-
|
|
1157
|
+
|
|
1155
1158
|
except Exception as e:
|
|
1156
1159
|
import traceback
|
|
1157
1160
|
traceback.print_exc()
|
|
@@ -1159,7 +1162,6 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1159
1162
|
logger.error(error_msg)
|
|
1160
1163
|
return ActionResult(error=error_msg, extracted_content=error_msg)
|
|
1161
1164
|
|
|
1162
|
-
|
|
1163
1165
|
@self.registry.action(
|
|
1164
1166
|
'Skill: Douyin API - Access Douyin platform data including search, video details, comments, user profiles, and videos. Methods: search_content_by_keyword, fetch_video_details, fetch_all_video_comments, fetch_user_info, fetch_all_user_videos.',
|
|
1165
1167
|
param_model=SkillDouyinAction,
|
|
@@ -1181,11 +1183,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1181
1183
|
"""
|
|
1182
1184
|
try:
|
|
1183
1185
|
from vibe_surf.tools.website_api.douyin.client import DouyinApiClient
|
|
1184
|
-
|
|
1186
|
+
|
|
1185
1187
|
# Initialize client
|
|
1186
1188
|
dy_client = DouyinApiClient(browser_session=browser_manager.main_browser_session)
|
|
1187
1189
|
await dy_client.setup()
|
|
1188
|
-
|
|
1190
|
+
|
|
1189
1191
|
# Parse params JSON string
|
|
1190
1192
|
import json
|
|
1191
1193
|
from json_repair import repair_json
|
|
@@ -1193,7 +1195,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1193
1195
|
method_params = json.loads(params.params)
|
|
1194
1196
|
except json.JSONDecodeError:
|
|
1195
1197
|
method_params = json.loads(repair_json(params.params))
|
|
1196
|
-
|
|
1198
|
+
|
|
1197
1199
|
# Execute the requested method
|
|
1198
1200
|
result = None
|
|
1199
1201
|
if params.method == "search_content_by_keyword":
|
|
@@ -1208,23 +1210,23 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1208
1210
|
result = await dy_client.fetch_all_user_videos(**method_params)
|
|
1209
1211
|
else:
|
|
1210
1212
|
return ActionResult(error=f"Unknown method: {params.method}")
|
|
1211
|
-
|
|
1213
|
+
|
|
1212
1214
|
# Save result to file
|
|
1213
1215
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1214
1216
|
filename = f"douyin_{params.method}_{timestamp}.json"
|
|
1215
1217
|
filepath = file_system.get_dir() / "data" / filename
|
|
1216
1218
|
filepath.parent.mkdir(exist_ok=True)
|
|
1217
|
-
|
|
1219
|
+
|
|
1218
1220
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
1219
1221
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
1220
|
-
|
|
1222
|
+
|
|
1221
1223
|
# Format result as markdown
|
|
1222
1224
|
if isinstance(result, list):
|
|
1223
1225
|
display_count = min(5, len(result))
|
|
1224
1226
|
md_content = f"## Douyin {params.method.replace('_', ' ').title()}\n\n"
|
|
1225
1227
|
md_content += f"Showing {display_count} of {len(result)} results:\n\n"
|
|
1226
1228
|
for i, item in enumerate(result[:display_count]):
|
|
1227
|
-
md_content += f"### Result {i+1}\n"
|
|
1229
|
+
md_content += f"### Result {i + 1}\n"
|
|
1228
1230
|
for key, value in item.items():
|
|
1229
1231
|
if not value:
|
|
1230
1232
|
continue
|
|
@@ -1241,27 +1243,26 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1241
1243
|
else:
|
|
1242
1244
|
md_content += f"- **{key}**: {value}\n"
|
|
1243
1245
|
md_content += "\n"
|
|
1244
|
-
|
|
1246
|
+
|
|
1245
1247
|
# Add file path to markdown
|
|
1246
1248
|
relative_path = str(filepath.relative_to(file_system.get_dir()))
|
|
1247
1249
|
md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
|
|
1248
1250
|
md_content += f"> 💡 Click the link above to view all results.\n"
|
|
1249
|
-
|
|
1251
|
+
|
|
1250
1252
|
logger.info(f'🎵 Douyin data retrieved with method: {params.method}')
|
|
1251
|
-
|
|
1253
|
+
|
|
1252
1254
|
# Close client
|
|
1253
1255
|
await dy_client.close()
|
|
1254
|
-
|
|
1256
|
+
|
|
1255
1257
|
return ActionResult(
|
|
1256
1258
|
extracted_content=md_content
|
|
1257
1259
|
)
|
|
1258
|
-
|
|
1260
|
+
|
|
1259
1261
|
except Exception as e:
|
|
1260
1262
|
error_msg = f'❌ Failed to retrieve Douyin data: {str(e)}'
|
|
1261
1263
|
logger.error(error_msg)
|
|
1262
1264
|
return ActionResult(error=error_msg, extracted_content=error_msg)
|
|
1263
1265
|
|
|
1264
|
-
|
|
1265
1266
|
@self.registry.action(
|
|
1266
1267
|
"""Skill: YouTube API - Access YouTube platform data including search, video details, comments, channel info, trending videos, and video transcripts.
|
|
1267
1268
|
Methods:
|
|
@@ -1296,11 +1297,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1296
1297
|
"""
|
|
1297
1298
|
try:
|
|
1298
1299
|
from vibe_surf.tools.website_api.youtube.client import YouTubeApiClient
|
|
1299
|
-
|
|
1300
|
+
|
|
1300
1301
|
# Initialize client
|
|
1301
1302
|
yt_client = YouTubeApiClient(browser_session=browser_manager.main_browser_session)
|
|
1302
1303
|
await yt_client.setup()
|
|
1303
|
-
|
|
1304
|
+
|
|
1304
1305
|
# Parse params JSON string
|
|
1305
1306
|
import json
|
|
1306
1307
|
from json_repair import repair_json
|
|
@@ -1308,7 +1309,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1308
1309
|
method_params = json.loads(params.params)
|
|
1309
1310
|
except json.JSONDecodeError:
|
|
1310
1311
|
method_params = json.loads(repair_json(params.params))
|
|
1311
|
-
|
|
1312
|
+
|
|
1312
1313
|
# Execute the requested method
|
|
1313
1314
|
result = None
|
|
1314
1315
|
if params.method == "search_videos":
|
|
@@ -1327,23 +1328,23 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1327
1328
|
result = await yt_client.get_video_transcript(**method_params)
|
|
1328
1329
|
else:
|
|
1329
1330
|
return ActionResult(error=f"Unknown method: {params.method}")
|
|
1330
|
-
|
|
1331
|
+
|
|
1331
1332
|
# Save result to file
|
|
1332
1333
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1333
1334
|
filename = f"youtube_{params.method}_{timestamp}.json"
|
|
1334
1335
|
filepath = file_system.get_dir() / "data" / filename
|
|
1335
1336
|
filepath.parent.mkdir(exist_ok=True)
|
|
1336
|
-
|
|
1337
|
+
|
|
1337
1338
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
1338
1339
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
1339
|
-
|
|
1340
|
+
|
|
1340
1341
|
# Format result as markdown
|
|
1341
1342
|
if isinstance(result, list):
|
|
1342
1343
|
display_count = min(5, len(result))
|
|
1343
1344
|
md_content = f"## YouTube {params.method.replace('_', ' ').title()}\n\n"
|
|
1344
1345
|
md_content += f"Showing {display_count} of {len(result)} results:\n\n"
|
|
1345
1346
|
for i, item in enumerate(result[:display_count]):
|
|
1346
|
-
md_content += f"### Result {i+1}\n"
|
|
1347
|
+
md_content += f"### Result {i + 1}\n"
|
|
1347
1348
|
for key, value in item.items():
|
|
1348
1349
|
if not value:
|
|
1349
1350
|
continue
|
|
@@ -1360,32 +1361,31 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1360
1361
|
else:
|
|
1361
1362
|
md_content += f"- **{key}**: {value}\n"
|
|
1362
1363
|
md_content += "\n"
|
|
1363
|
-
|
|
1364
|
+
|
|
1364
1365
|
# Add file path to markdown
|
|
1365
1366
|
relative_path = str(filepath.relative_to(file_system.get_dir()))
|
|
1366
1367
|
md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
|
|
1367
1368
|
md_content += f"> 💡 Click the link above to view all results.\n"
|
|
1368
|
-
|
|
1369
|
+
|
|
1369
1370
|
logger.info(f'🎬 YouTube data retrieved with method: {params.method}')
|
|
1370
|
-
|
|
1371
|
+
|
|
1371
1372
|
# Close client
|
|
1372
1373
|
await yt_client.close()
|
|
1373
|
-
|
|
1374
|
+
|
|
1374
1375
|
return ActionResult(
|
|
1375
1376
|
extracted_content=md_content
|
|
1376
1377
|
)
|
|
1377
|
-
|
|
1378
|
+
|
|
1378
1379
|
except Exception as e:
|
|
1379
1380
|
error_msg = f'❌ Failed to retrieve YouTube data: {str(e)}'
|
|
1380
1381
|
logger.error(error_msg)
|
|
1381
1382
|
return ActionResult(error=error_msg, extracted_content=error_msg)
|
|
1382
1383
|
|
|
1383
|
-
|
|
1384
1384
|
async def _extract_google_results_rule_based(self, browser_session):
|
|
1385
1385
|
"""Rule-based extraction of Google search results using JavaScript"""
|
|
1386
1386
|
try:
|
|
1387
1387
|
cdp_session = await browser_session.get_or_create_cdp_session()
|
|
1388
|
-
|
|
1388
|
+
|
|
1389
1389
|
# JavaScript code to extract Google search results using DOM selectors
|
|
1390
1390
|
js_extraction_code = """
|
|
1391
1391
|
(function() {
|
|
@@ -1491,27 +1491,27 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1491
1491
|
}
|
|
1492
1492
|
})()
|
|
1493
1493
|
"""
|
|
1494
|
-
|
|
1494
|
+
|
|
1495
1495
|
# Execute JavaScript to extract results
|
|
1496
1496
|
result = await cdp_session.cdp_client.send.Runtime.evaluate(
|
|
1497
1497
|
params={'expression': js_extraction_code, 'returnByValue': True, 'awaitPromise': True},
|
|
1498
1498
|
session_id=cdp_session.session_id,
|
|
1499
1499
|
)
|
|
1500
|
-
|
|
1500
|
+
|
|
1501
1501
|
if result.get('exceptionDetails'):
|
|
1502
1502
|
logger.warning(f"JavaScript extraction failed: {result['exceptionDetails']}")
|
|
1503
1503
|
return []
|
|
1504
|
-
|
|
1504
|
+
|
|
1505
1505
|
result_data = result.get('result', {})
|
|
1506
1506
|
value = result_data.get('value', '[]')
|
|
1507
|
-
|
|
1507
|
+
|
|
1508
1508
|
try:
|
|
1509
1509
|
extracted_results = json.loads(value)
|
|
1510
1510
|
return extracted_results if isinstance(extracted_results, list) else []
|
|
1511
1511
|
except (json.JSONDecodeError, ValueError):
|
|
1512
1512
|
logger.warning(f"Failed to parse extraction results: {value}")
|
|
1513
1513
|
return []
|
|
1514
|
-
|
|
1514
|
+
|
|
1515
1515
|
except Exception as e:
|
|
1516
1516
|
logger.error(f"Rule-based extraction failed: {e}")
|
|
1517
1517
|
return []
|
|
@@ -1533,10 +1533,10 @@ Please generate alternative JavaScript code that avoids this system error:"""
|
|
|
1533
1533
|
# Rule-based extraction succeeded
|
|
1534
1534
|
logger.debug(f"Rule-based extraction found {len(results)} results for query: {query}")
|
|
1535
1535
|
return results[:search_ret_len] # Return top 6 results
|
|
1536
|
-
|
|
1536
|
+
|
|
1537
1537
|
# Fallback to LLM extraction if rule-based fails
|
|
1538
1538
|
logger.warning(f"Rule-based extraction failed for query '{query}', falling back to LLM")
|
|
1539
|
-
|
|
1539
|
+
|
|
1540
1540
|
extraction_query = f"""
|
|
1541
1541
|
Extract the top {search_ret_len} search results from this Google search page. For each result, provide:
|
|
1542
1542
|
- title: The clickable title/headline
|
|
@@ -1577,26 +1577,26 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
|
|
|
1577
1577
|
"""Rule-based deduplication to reduce dataset before LLM processing"""
|
|
1578
1578
|
if not results:
|
|
1579
1579
|
return []
|
|
1580
|
-
|
|
1580
|
+
|
|
1581
1581
|
deduplicated = []
|
|
1582
1582
|
seen_urls = set()
|
|
1583
1583
|
seen_titles = set()
|
|
1584
|
-
|
|
1584
|
+
|
|
1585
1585
|
for result in results:
|
|
1586
1586
|
url = result.get('url', '').strip()
|
|
1587
1587
|
title = result.get('title', '').strip().lower()
|
|
1588
|
-
|
|
1588
|
+
|
|
1589
1589
|
# Skip results with missing essential data
|
|
1590
1590
|
if not url or not title or url == 'No URL' or title == 'no title':
|
|
1591
1591
|
continue
|
|
1592
|
-
|
|
1592
|
+
|
|
1593
1593
|
# Normalize URL for comparison (remove fragments, query params for deduplication)
|
|
1594
1594
|
normalized_url = url.split('#')[0].split('?')[0].lower()
|
|
1595
|
-
|
|
1595
|
+
|
|
1596
1596
|
# Check for duplicate URLs
|
|
1597
1597
|
if normalized_url in seen_urls:
|
|
1598
1598
|
continue
|
|
1599
|
-
|
|
1599
|
+
|
|
1600
1600
|
# Check for very similar titles (basic similarity)
|
|
1601
1601
|
title_normalized = ''.join(c for c in title if c.isalnum()).lower()
|
|
1602
1602
|
if len(title_normalized) > 10: # Only check titles with substantial content
|
|
@@ -1609,35 +1609,35 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
|
|
|
1609
1609
|
if similarity > 0.8:
|
|
1610
1610
|
similar_found = True
|
|
1611
1611
|
break
|
|
1612
|
-
|
|
1612
|
+
|
|
1613
1613
|
if similar_found:
|
|
1614
1614
|
continue
|
|
1615
|
-
|
|
1615
|
+
|
|
1616
1616
|
# Add to deduplicated results
|
|
1617
1617
|
seen_urls.add(normalized_url)
|
|
1618
1618
|
seen_titles.add(title_normalized)
|
|
1619
1619
|
deduplicated.append(result)
|
|
1620
|
-
|
|
1620
|
+
|
|
1621
1621
|
# Sort by relevance indicators (prioritize results with longer summaries, non-generic titles)
|
|
1622
1622
|
def relevance_score(result):
|
|
1623
1623
|
score = 0
|
|
1624
1624
|
title = result.get('title', '')
|
|
1625
1625
|
summary = result.get('summary', '')
|
|
1626
|
-
|
|
1626
|
+
|
|
1627
1627
|
# Longer summaries are typically more informative
|
|
1628
1628
|
score += min(len(summary), 200) / 10
|
|
1629
|
-
|
|
1629
|
+
|
|
1630
1630
|
# Non-generic titles score higher
|
|
1631
1631
|
generic_terms = ['search results', 'no title', 'error', 'loading']
|
|
1632
1632
|
if not any(term in title.lower() for term in generic_terms):
|
|
1633
1633
|
score += 10
|
|
1634
|
-
|
|
1634
|
+
|
|
1635
1635
|
# Prefer results with actual descriptions
|
|
1636
1636
|
if summary and summary != 'No description available' and len(summary) > 20:
|
|
1637
1637
|
score += 5
|
|
1638
|
-
|
|
1638
|
+
|
|
1639
1639
|
return score
|
|
1640
|
-
|
|
1640
|
+
|
|
1641
1641
|
deduplicated.sort(key=relevance_score, reverse=True)
|
|
1642
1642
|
return deduplicated
|
|
1643
1643
|
|
|
@@ -2239,6 +2239,143 @@ You will be given a query and the markdown of a webpage that has been filtered t
|
|
|
2239
2239
|
long_term_memory=result,
|
|
2240
2240
|
)
|
|
2241
2241
|
|
|
2242
|
+
@self.registry.action(
|
|
2243
|
+
'Grep content from file - search for query or keywords and return surrounding context (simulates Linux grep command). For images, uses OCR to extract text first then performs grep search.',
|
|
2244
|
+
param_model=GrepContentAction,
|
|
2245
|
+
)
|
|
2246
|
+
async def grep_content_from_file(
|
|
2247
|
+
params: GrepContentAction,
|
|
2248
|
+
page_extraction_llm: BaseChatModel,
|
|
2249
|
+
file_system: CustomFileSystem,
|
|
2250
|
+
):
|
|
2251
|
+
try:
|
|
2252
|
+
# Get file path
|
|
2253
|
+
file_path = params.file_path
|
|
2254
|
+
full_file_path = file_path
|
|
2255
|
+
# Check if file exists
|
|
2256
|
+
if not os.path.exists(full_file_path):
|
|
2257
|
+
full_file_path = os.path.join(str(file_system.get_dir()), file_path)
|
|
2258
|
+
|
|
2259
|
+
# Determine if file is an image based on MIME type
|
|
2260
|
+
mime_type, _ = mimetypes.guess_type(file_path)
|
|
2261
|
+
is_image = mime_type and mime_type.startswith('image/')
|
|
2262
|
+
|
|
2263
|
+
if is_image:
|
|
2264
|
+
# Handle image files with LLM vision for OCR
|
|
2265
|
+
try:
|
|
2266
|
+
# Read image file and encode to base64
|
|
2267
|
+
with open(full_file_path, 'rb') as image_file:
|
|
2268
|
+
image_data = image_file.read()
|
|
2269
|
+
image_base64 = base64.b64encode(image_data).decode('utf-8')
|
|
2270
|
+
|
|
2271
|
+
# Create content parts for OCR
|
|
2272
|
+
content_parts: list[ContentPartTextParam | ContentPartImageParam] = [
|
|
2273
|
+
ContentPartTextParam(
|
|
2274
|
+
text="Please extract all text content from this image for search purposes. Return only the extracted text, no additional explanations.")
|
|
2275
|
+
]
|
|
2276
|
+
|
|
2277
|
+
# Add the image
|
|
2278
|
+
content_parts.append(
|
|
2279
|
+
ContentPartImageParam(
|
|
2280
|
+
image_url=ImageURL(
|
|
2281
|
+
url=f'data:{mime_type};base64,{image_base64}',
|
|
2282
|
+
media_type=mime_type,
|
|
2283
|
+
detail='high',
|
|
2284
|
+
),
|
|
2285
|
+
)
|
|
2286
|
+
)
|
|
2287
|
+
|
|
2288
|
+
# Create user message and invoke LLM for OCR
|
|
2289
|
+
user_message = UserMessage(content=content_parts, cache=True)
|
|
2290
|
+
response = await asyncio.wait_for(
|
|
2291
|
+
page_extraction_llm.ainvoke([user_message]),
|
|
2292
|
+
timeout=120.0,
|
|
2293
|
+
)
|
|
2294
|
+
|
|
2295
|
+
file_content = response.completion
|
|
2296
|
+
|
|
2297
|
+
except Exception as e:
|
|
2298
|
+
raise Exception(f'Failed to process image file {file_path} for OCR: {str(e)}')
|
|
2299
|
+
|
|
2300
|
+
else:
|
|
2301
|
+
# Handle non-image files by reading content
|
|
2302
|
+
try:
|
|
2303
|
+
file_content = await file_system.read_file(full_file_path, external_file=True)
|
|
2304
|
+
except Exception as e:
|
|
2305
|
+
raise Exception(f'Failed to read file {file_path}: {str(e)}')
|
|
2306
|
+
|
|
2307
|
+
# Perform grep search
|
|
2308
|
+
search_query = params.query.lower()
|
|
2309
|
+
context_chars = params.context_chars
|
|
2310
|
+
|
|
2311
|
+
# Find all matches with context
|
|
2312
|
+
matches = []
|
|
2313
|
+
content_lower = file_content.lower()
|
|
2314
|
+
search_start = 0
|
|
2315
|
+
|
|
2316
|
+
while True:
|
|
2317
|
+
match_pos = content_lower.find(search_query, search_start)
|
|
2318
|
+
if match_pos == -1:
|
|
2319
|
+
break
|
|
2320
|
+
|
|
2321
|
+
# Calculate context boundaries
|
|
2322
|
+
start_pos = max(0, match_pos - context_chars)
|
|
2323
|
+
end_pos = min(len(file_content), match_pos + len(search_query) + context_chars)
|
|
2324
|
+
|
|
2325
|
+
# Extract context with the match
|
|
2326
|
+
context_before = file_content[start_pos:match_pos]
|
|
2327
|
+
matched_text = file_content[match_pos:match_pos + len(search_query)]
|
|
2328
|
+
context_after = file_content[match_pos + len(search_query):end_pos]
|
|
2329
|
+
|
|
2330
|
+
# Add ellipsis if truncated
|
|
2331
|
+
if start_pos > 0:
|
|
2332
|
+
context_before = "..." + context_before
|
|
2333
|
+
if end_pos < len(file_content):
|
|
2334
|
+
context_after = context_after + "..."
|
|
2335
|
+
|
|
2336
|
+
matches.append({
|
|
2337
|
+
'context_before': context_before,
|
|
2338
|
+
'matched_text': matched_text,
|
|
2339
|
+
'context_after': context_after,
|
|
2340
|
+
'position': match_pos
|
|
2341
|
+
})
|
|
2342
|
+
|
|
2343
|
+
search_start = match_pos + 1
|
|
2344
|
+
|
|
2345
|
+
# Format results
|
|
2346
|
+
if not matches:
|
|
2347
|
+
extracted_content = f'File: {file_path}\nQuery: "{params.query}"\nResult: No matches found'
|
|
2348
|
+
else:
|
|
2349
|
+
result_text = f'File: {file_path}\nQuery: "{params.query}"\nFound {len(matches)} match(es):\n\n'
|
|
2350
|
+
|
|
2351
|
+
for i, match in enumerate(matches, 1):
|
|
2352
|
+
result_text += f"Match {i} (position: {match['position']}):\n"
|
|
2353
|
+
result_text += f"{match['context_before']}[{match['matched_text']}]{match['context_after']}\n\n"
|
|
2354
|
+
|
|
2355
|
+
extracted_content = result_text.strip()
|
|
2356
|
+
|
|
2357
|
+
# Handle memory storage
|
|
2358
|
+
if len(extracted_content) < 1000:
|
|
2359
|
+
memory = extracted_content
|
|
2360
|
+
include_extracted_content_only_once = False
|
|
2361
|
+
else:
|
|
2362
|
+
save_result = await file_system.save_extracted_content(extracted_content)
|
|
2363
|
+
memory = (
|
|
2364
|
+
f'Grep search completed in file {file_path} for query: {params.query}\nFound {len(matches)} match(es)\nContent saved to file system: {save_result}'
|
|
2365
|
+
)
|
|
2366
|
+
include_extracted_content_only_once = True
|
|
2367
|
+
|
|
2368
|
+
logger.info(f'🔍 Grep search completed in file: {file_path}, found {len(matches)} match(es)')
|
|
2369
|
+
return ActionResult(
|
|
2370
|
+
extracted_content=extracted_content,
|
|
2371
|
+
include_extracted_content_only_once=include_extracted_content_only_once,
|
|
2372
|
+
long_term_memory=memory,
|
|
2373
|
+
)
|
|
2374
|
+
|
|
2375
|
+
except Exception as e:
|
|
2376
|
+
logger.debug(f'Error grep searching content from file: {e}')
|
|
2377
|
+
raise RuntimeError(str(e))
|
|
2378
|
+
|
|
2242
2379
|
@self.registry.action(
|
|
2243
2380
|
'Create a directory within the FileSystem.'
|
|
2244
2381
|
)
|
|
@@ -2334,6 +2471,72 @@ You will be given a query and the markdown of a webpage that has been filtered t
|
|
|
2334
2471
|
self.mcp_clients.clear()
|
|
2335
2472
|
logger.info('All MCP clients unregistered and disconnected')
|
|
2336
2473
|
|
|
2474
|
+
async def register_composio_clients(self, composio_instance: Optional[Any] = None,
|
|
2475
|
+
toolkit_tools_dict: Optional[Dict[str, Any]] = None):
|
|
2476
|
+
"""
|
|
2477
|
+
Register Composio tools to the registry.
|
|
2478
|
+
|
|
2479
|
+
Args:
|
|
2480
|
+
composio_instance: Composio instance (optional, can be None initially)
|
|
2481
|
+
toolkit_tools_dict: Dict of toolkit_slug -> tools list
|
|
2482
|
+
"""
|
|
2483
|
+
try:
|
|
2484
|
+
# Initialize Composio client if not exists
|
|
2485
|
+
if self.composio_client is None:
|
|
2486
|
+
self.composio_client = ComposioClient(composio_instance=composio_instance)
|
|
2487
|
+
else:
|
|
2488
|
+
# Update the composio instance
|
|
2489
|
+
self.composio_client.update_composio_instance(composio_instance)
|
|
2490
|
+
|
|
2491
|
+
# Register tools if we have both instance and toolkit tools
|
|
2492
|
+
if composio_instance and toolkit_tools_dict:
|
|
2493
|
+
await self.composio_client.register_to_tools(
|
|
2494
|
+
tools=self,
|
|
2495
|
+
toolkit_tools_dict=toolkit_tools_dict,
|
|
2496
|
+
prefix="cpo."
|
|
2497
|
+
)
|
|
2498
|
+
logger.info(f'Successfully registered Composio tools from {len(toolkit_tools_dict)} toolkits')
|
|
2499
|
+
elif not composio_instance:
|
|
2500
|
+
logger.info("Composio client initialized without instance - will register tools later")
|
|
2501
|
+
elif not toolkit_tools_dict:
|
|
2502
|
+
logger.info("Composio client initialized without toolkit tools - will register tools later")
|
|
2503
|
+
|
|
2504
|
+
except Exception as e:
|
|
2505
|
+
logger.error(f'Failed to register Composio clients: {str(e)}')
|
|
2506
|
+
|
|
2507
|
+
async def unregister_composio_clients(self):
|
|
2508
|
+
"""
|
|
2509
|
+
Unregister all Composio tools from the registry.
|
|
2510
|
+
"""
|
|
2511
|
+
try:
|
|
2512
|
+
if self.composio_client:
|
|
2513
|
+
self.composio_client.unregister_all_tools(self)
|
|
2514
|
+
logger.info('All Composio tools unregistered')
|
|
2515
|
+
|
|
2516
|
+
except Exception as e:
|
|
2517
|
+
logger.error(f'Failed to unregister Composio clients: {str(e)}')
|
|
2518
|
+
|
|
2519
|
+
async def update_composio_tools(self, composio_instance: Optional[Any] = None,
|
|
2520
|
+
toolkit_tools_dict: Optional[Dict[str, Any]] = None):
|
|
2521
|
+
"""
|
|
2522
|
+
Update Composio tools by unregistering old ones and registering new ones.
|
|
2523
|
+
|
|
2524
|
+
Args:
|
|
2525
|
+
composio_instance: Composio instance
|
|
2526
|
+
toolkit_tools_dict: Dict of toolkit_slug -> tools list
|
|
2527
|
+
"""
|
|
2528
|
+
try:
|
|
2529
|
+
# Unregister existing tools
|
|
2530
|
+
await self.unregister_composio_clients()
|
|
2531
|
+
|
|
2532
|
+
# Register new tools
|
|
2533
|
+
await self.register_composio_clients(composio_instance, toolkit_tools_dict)
|
|
2534
|
+
|
|
2535
|
+
logger.info('Composio tools updated successfully')
|
|
2536
|
+
|
|
2537
|
+
except Exception as e:
|
|
2538
|
+
logger.error(f'Failed to update Composio tools: {str(e)}')
|
|
2539
|
+
|
|
2337
2540
|
@time_execution_sync('--act')
|
|
2338
2541
|
async def act(
|
|
2339
2542
|
self,
|