vibesurf 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. vibe_surf/_version.py +2 -2
  2. vibe_surf/agents/browser_use_agent.py +1 -1
  3. vibe_surf/agents/prompts/vibe_surf_prompt.py +6 -0
  4. vibe_surf/agents/report_writer_agent.py +50 -0
  5. vibe_surf/agents/vibe_surf_agent.py +56 -1
  6. vibe_surf/backend/api/composio.py +952 -0
  7. vibe_surf/backend/database/migrations/v005_add_composio_integration.sql +33 -0
  8. vibe_surf/backend/database/migrations/v006_add_credentials_table.sql +26 -0
  9. vibe_surf/backend/database/models.py +53 -1
  10. vibe_surf/backend/database/queries.py +312 -2
  11. vibe_surf/backend/main.py +28 -0
  12. vibe_surf/backend/shared_state.py +123 -9
  13. vibe_surf/chrome_extension/scripts/api-client.js +32 -0
  14. vibe_surf/chrome_extension/scripts/settings-manager.js +954 -1
  15. vibe_surf/chrome_extension/sidepanel.html +190 -0
  16. vibe_surf/chrome_extension/styles/settings-integrations.css +927 -0
  17. vibe_surf/chrome_extension/styles/settings-modal.css +7 -3
  18. vibe_surf/chrome_extension/styles/settings-responsive.css +37 -5
  19. vibe_surf/cli.py +98 -3
  20. vibe_surf/telemetry/__init__.py +60 -0
  21. vibe_surf/telemetry/service.py +112 -0
  22. vibe_surf/telemetry/views.py +156 -0
  23. vibe_surf/tools/browser_use_tools.py +90 -90
  24. vibe_surf/tools/composio_client.py +456 -0
  25. vibe_surf/tools/mcp_client.py +21 -2
  26. vibe_surf/tools/vibesurf_tools.py +290 -87
  27. vibe_surf/tools/views.py +16 -0
  28. vibe_surf/tools/website_api/youtube/client.py +35 -13
  29. vibe_surf/utils.py +13 -0
  30. {vibesurf-0.1.31.dist-info → vibesurf-0.1.33.dist-info}/METADATA +11 -9
  31. {vibesurf-0.1.31.dist-info → vibesurf-0.1.33.dist-info}/RECORD +35 -26
  32. {vibesurf-0.1.31.dist-info → vibesurf-0.1.33.dist-info}/WHEEL +0 -0
  33. {vibesurf-0.1.31.dist-info → vibesurf-0.1.33.dist-info}/entry_points.txt +0 -0
  34. {vibesurf-0.1.31.dist-info → vibesurf-0.1.33.dist-info}/licenses/LICENSE +0 -0
  35. {vibesurf-0.1.31.dist-info → vibesurf-0.1.33.dist-info}/top_level.txt +0 -0
@@ -32,9 +32,10 @@ from vibe_surf.browser.agent_browser_session import AgentBrowserSession
32
32
  from vibe_surf.tools.views import HoverAction, ExtractionAction, FileExtractionAction, BrowserUseAgentExecution, \
33
33
  ReportWriterTask, TodoGenerateAction, TodoModifyAction, VibeSurfDoneAction, SkillSearchAction, SkillCrawlAction, \
34
34
  SkillSummaryAction, SkillTakeScreenshotAction, SkillDeepResearchAction, SkillCodeAction, SkillFinanceAction, \
35
- SkillXhsAction, SkillDouyinAction, SkillYoutubeAction, SkillWeiboAction
35
+ SkillXhsAction, SkillDouyinAction, SkillYoutubeAction, SkillWeiboAction, GrepContentAction
36
36
  from vibe_surf.tools.finance_tools import FinanceDataRetriever, FinanceMarkdownFormatter, FinanceMethod
37
37
  from vibe_surf.tools.mcp_client import CustomMCPClient
38
+ from vibe_surf.tools.composio_client import ComposioClient
38
39
  from vibe_surf.tools.file_system import CustomFileSystem
39
40
  from vibe_surf.browser.browser_manager import BrowserManager
40
41
  from vibe_surf.tools.vibesurf_registry import VibeSurfRegistry
@@ -166,7 +167,8 @@ def convert_selector_map_for_llm(selector_map) -> dict:
166
167
 
167
168
 
168
169
  class VibeSurfTools:
169
- def __init__(self, exclude_actions: list[str] = [], mcp_server_config: Optional[Dict[str, Any]] = None):
170
+ def __init__(self, exclude_actions: list[str] = [], mcp_server_config: Optional[Dict[str, Any]] = None,
171
+ composio_client: ComposioClient = None):
170
172
  self.registry = VibeSurfRegistry(exclude_actions)
171
173
  self._register_file_actions()
172
174
  self._register_browser_use_agent()
@@ -176,6 +178,7 @@ class VibeSurfTools:
176
178
  self._register_skills()
177
179
  self.mcp_server_config = mcp_server_config
178
180
  self.mcp_clients: Dict[str, MCPClient] = {}
181
+ self.composio_client: ComposioClient = composio_client
179
182
 
180
183
  def _register_skills(self):
181
184
  @self.registry.action(
@@ -290,7 +293,7 @@ Example format: ["query 1", "query 2", "query 3", "query 4", "query 5", "query 6
290
293
  "url": result.get('url', 'No URL'),
291
294
  "summary": result.get('summary', 'No summary available')
292
295
  })
293
-
296
+
294
297
  ranking_prompt = f"""
295
298
  Rank these search results for the query "{params.query}" by relevance and value.
296
299
  Select the TOP 10 most relevant and valuable results.
@@ -315,7 +318,8 @@ Format: [index1, index2, index3, ...]
315
318
  if not isinstance(selected_indices, list):
316
319
  raise ValueError("Invalid ranking results format")
317
320
  # Ensure indices are valid and limit to 10
318
- valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
321
+ valid_indices = [i for i in selected_indices if
322
+ isinstance(i, int) and 0 <= i < len(all_results)][:10]
319
323
  if valid_indices:
320
324
  top_results = [all_results[i] for i in valid_indices]
321
325
  else:
@@ -325,7 +329,8 @@ Format: [index1, index2, index3, ...]
325
329
  selected_indices_s = repair_json(ranking_response.completion.strip())
326
330
  selected_indices = json.loads(selected_indices_s)
327
331
  if isinstance(selected_indices, list):
328
- valid_indices = [i for i in selected_indices if isinstance(i, int) and 0 <= i < len(all_results)][:10]
332
+ valid_indices = [i for i in selected_indices if
333
+ isinstance(i, int) and 0 <= i < len(all_results)][:10]
329
334
  if valid_indices:
330
335
  top_results = [all_results[i] for i in valid_indices]
331
336
  else:
@@ -897,7 +902,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
897
902
  try:
898
903
  # Default to get_info if no methods specified
899
904
  methods = params.methods if params.methods else [FinanceMethod.GET_INFO]
900
-
905
+
901
906
  # Convert string methods to FinanceMethod enum if needed
902
907
  if methods and isinstance(methods[0], str):
903
908
  try:
@@ -907,13 +912,13 @@ Please generate alternative JavaScript code that avoids this system error:"""
907
912
  return ActionResult(
908
913
  error=f'Invalid method in {methods}. Available methods: {available_methods}'
909
914
  )
910
-
915
+
911
916
  # Create data retriever with symbol
912
917
  retriever = FinanceDataRetriever(params.symbol)
913
-
918
+
914
919
  # Convert FinanceMethod enum values to strings for the retriever
915
920
  method_strings = [method.value for method in methods]
916
-
921
+
917
922
  # Retrieve financial data
918
923
  financial_data = retriever.get_finance_data(
919
924
  methods=method_strings,
@@ -923,29 +928,28 @@ Please generate alternative JavaScript code that avoids this system error:"""
923
928
  interval=getattr(params, 'interval', '1d'),
924
929
  num_news=getattr(params, 'num_news', 5)
925
930
  )
926
-
931
+
927
932
  # Format as markdown using the static method
928
933
  markdown_content = FinanceMarkdownFormatter.format_finance_data(
929
934
  symbol=params.symbol,
930
935
  results=financial_data,
931
936
  methods=method_strings
932
937
  )
933
-
938
+
934
939
  method_names = [method.value for method in methods]
935
940
  logger.info(f'💹 Comprehensive finance data retrieved for {params.symbol} with methods: {method_names}')
936
-
941
+
937
942
  return ActionResult(
938
943
  extracted_content=markdown_content,
939
944
  include_extracted_content_only_once=True,
940
945
  long_term_memory=f'Retrieved comprehensive financial data for {params.symbol} using methods: {", ".join(method_names)}',
941
946
  )
942
-
947
+
943
948
  except Exception as e:
944
949
  error_msg = f'❌ Failed to retrieve financial data for {params.symbol}: {str(e)}'
945
950
  logger.error(error_msg)
946
951
  return ActionResult(error=error_msg, extracted_content=error_msg)
947
952
 
948
-
949
953
  @self.registry.action(
950
954
  'Skill: Xiaohongshu API - Access Xiaohongshu (Little Red Book) platform data including search, content details, comments, user profiles, and recommendations. Methods: search_content_by_keyword, fetch_content_details, fetch_all_content_comments, get_user_profile, fetch_all_user_content, get_home_recommendations.',
951
955
  param_model=SkillXhsAction,
@@ -968,11 +972,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
968
972
  """
969
973
  try:
970
974
  from vibe_surf.tools.website_api.xhs.client import XiaoHongShuApiClient
971
-
975
+
972
976
  # Initialize client
973
977
  xhs_client = XiaoHongShuApiClient(browser_session=browser_manager.main_browser_session)
974
978
  await xhs_client.setup()
975
-
979
+
976
980
  # Parse params JSON string
977
981
  import json
978
982
  from json_repair import repair_json
@@ -980,7 +984,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
980
984
  method_params = json.loads(params.params)
981
985
  except json.JSONDecodeError:
982
986
  method_params = json.loads(repair_json(params.params))
983
-
987
+
984
988
  # Execute the requested method
985
989
  result = None
986
990
  if params.method == "search_content_by_keyword":
@@ -997,23 +1001,23 @@ Please generate alternative JavaScript code that avoids this system error:"""
997
1001
  result = await xhs_client.get_home_recommendations()
998
1002
  else:
999
1003
  return ActionResult(error=f"Unknown method: {params.method}")
1000
-
1004
+
1001
1005
  # Save result to file
1002
1006
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1003
1007
  filename = f"xhs_{params.method}_{timestamp}.json"
1004
1008
  filepath = file_system.get_dir() / "data" / filename
1005
1009
  filepath.parent.mkdir(exist_ok=True)
1006
-
1010
+
1007
1011
  with open(filepath, "w", encoding="utf-8") as f:
1008
1012
  json.dump(result, f, ensure_ascii=False, indent=2)
1009
-
1013
+
1010
1014
  # Format result as markdown
1011
1015
  if isinstance(result, list):
1012
1016
  display_count = min(5, len(result))
1013
1017
  md_content = f"## Xiaohongshu {params.method.replace('_', ' ').title()}\n\n"
1014
1018
  md_content += f"Showing {display_count} of {len(result)} results:\n\n"
1015
1019
  for i, item in enumerate(result[:display_count]):
1016
- md_content += f"### Result {i+1}\n"
1020
+ md_content += f"### Result {i + 1}\n"
1017
1021
  for key, value in item.items():
1018
1022
  if not value:
1019
1023
  continue
@@ -1030,27 +1034,26 @@ Please generate alternative JavaScript code that avoids this system error:"""
1030
1034
  else:
1031
1035
  md_content += f"- **{key}**: {value}\n"
1032
1036
  md_content += "\n"
1033
-
1037
+
1034
1038
  # Add file path to markdown
1035
1039
  relative_path = str(filepath.relative_to(file_system.get_dir()))
1036
1040
  md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
1037
1041
  md_content += f"> 💡 Click the link above to view all results.\n"
1038
-
1042
+
1039
1043
  logger.info(f'📕 Xiaohongshu data retrieved with method: {params.method}')
1040
-
1044
+
1041
1045
  # Close client
1042
1046
  await xhs_client.close()
1043
-
1047
+
1044
1048
  return ActionResult(
1045
1049
  extracted_content=md_content
1046
1050
  )
1047
-
1051
+
1048
1052
  except Exception as e:
1049
1053
  error_msg = f'❌ Failed to retrieve Xiaohongshu data: {str(e)}'
1050
1054
  logger.error(error_msg)
1051
1055
  return ActionResult(error=error_msg, extracted_content=error_msg)
1052
1056
 
1053
-
1054
1057
  @self.registry.action(
1055
1058
  'Skill: Weibo API - Access Weibo platform data including search, post details, comments, user profiles, hot posts, and trending lists. Methods: search_posts_by_keyword, get_post_detail, get_all_post_comments, get_user_info, get_all_user_posts, get_hot_posts(推荐榜), get_trending_posts(热搜榜).',
1056
1059
  param_model=SkillWeiboAction,
@@ -1074,11 +1077,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
1074
1077
  """
1075
1078
  try:
1076
1079
  from vibe_surf.tools.website_api.weibo.client import WeiboApiClient
1077
-
1080
+
1078
1081
  # Initialize client
1079
1082
  wb_client = WeiboApiClient(browser_session=browser_manager.main_browser_session)
1080
1083
  await wb_client.setup()
1081
-
1084
+
1082
1085
  # Parse params JSON string
1083
1086
  import json
1084
1087
  from json_repair import repair_json
@@ -1086,7 +1089,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
1086
1089
  method_params = json.loads(params.params)
1087
1090
  except json.JSONDecodeError:
1088
1091
  method_params = json.loads(repair_json(params.params))
1089
-
1092
+
1090
1093
  # Execute the requested method
1091
1094
  result = None
1092
1095
  if params.method == "search_posts_by_keyword":
@@ -1105,13 +1108,13 @@ Please generate alternative JavaScript code that avoids this system error:"""
1105
1108
  result = await wb_client.get_trending_posts()
1106
1109
  else:
1107
1110
  return ActionResult(error=f"Unknown method: {params.method}")
1108
-
1111
+
1109
1112
  # Save result to file
1110
1113
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1111
1114
  filename = f"weibo_{params.method}_{timestamp}.json"
1112
1115
  filepath = file_system.get_dir() / "data" / filename
1113
1116
  filepath.parent.mkdir(exist_ok=True)
1114
-
1117
+
1115
1118
  with open(filepath, "w", encoding="utf-8") as f:
1116
1119
  json.dump(result, f, ensure_ascii=False, indent=2)
1117
1120
  # Format result as markdown
@@ -1120,7 +1123,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
1120
1123
  md_content = f"## Weibo {params.method.replace('_', ' ').title()}\n\n"
1121
1124
  md_content += f"Showing {display_count} of {len(result)} results:\n\n"
1122
1125
  for i, item in enumerate(result[:display_count]):
1123
- md_content += f"### Result {i+1}\n"
1126
+ md_content += f"### Result {i + 1}\n"
1124
1127
  for key, value in item.items():
1125
1128
  if not value:
1126
1129
  continue
@@ -1137,21 +1140,21 @@ Please generate alternative JavaScript code that avoids this system error:"""
1137
1140
  else:
1138
1141
  md_content += f"- **{key}**: {value}\n"
1139
1142
  md_content += "\n"
1140
-
1143
+
1141
1144
  # Add file path to markdown
1142
1145
  relative_path = str(filepath.relative_to(file_system.get_dir()))
1143
1146
  md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
1144
1147
  md_content += f"> 💡 Click the link above to view all results.\n"
1145
-
1148
+
1146
1149
  logger.info(f'🐦 Weibo data retrieved with method: {params.method}')
1147
-
1150
+
1148
1151
  # Close client
1149
1152
  await wb_client.close()
1150
-
1153
+
1151
1154
  return ActionResult(
1152
1155
  extracted_content=md_content
1153
1156
  )
1154
-
1157
+
1155
1158
  except Exception as e:
1156
1159
  import traceback
1157
1160
  traceback.print_exc()
@@ -1159,7 +1162,6 @@ Please generate alternative JavaScript code that avoids this system error:"""
1159
1162
  logger.error(error_msg)
1160
1163
  return ActionResult(error=error_msg, extracted_content=error_msg)
1161
1164
 
1162
-
1163
1165
  @self.registry.action(
1164
1166
  'Skill: Douyin API - Access Douyin platform data including search, video details, comments, user profiles, and videos. Methods: search_content_by_keyword, fetch_video_details, fetch_all_video_comments, fetch_user_info, fetch_all_user_videos.',
1165
1167
  param_model=SkillDouyinAction,
@@ -1181,11 +1183,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
1181
1183
  """
1182
1184
  try:
1183
1185
  from vibe_surf.tools.website_api.douyin.client import DouyinApiClient
1184
-
1186
+
1185
1187
  # Initialize client
1186
1188
  dy_client = DouyinApiClient(browser_session=browser_manager.main_browser_session)
1187
1189
  await dy_client.setup()
1188
-
1190
+
1189
1191
  # Parse params JSON string
1190
1192
  import json
1191
1193
  from json_repair import repair_json
@@ -1193,7 +1195,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
1193
1195
  method_params = json.loads(params.params)
1194
1196
  except json.JSONDecodeError:
1195
1197
  method_params = json.loads(repair_json(params.params))
1196
-
1198
+
1197
1199
  # Execute the requested method
1198
1200
  result = None
1199
1201
  if params.method == "search_content_by_keyword":
@@ -1208,23 +1210,23 @@ Please generate alternative JavaScript code that avoids this system error:"""
1208
1210
  result = await dy_client.fetch_all_user_videos(**method_params)
1209
1211
  else:
1210
1212
  return ActionResult(error=f"Unknown method: {params.method}")
1211
-
1213
+
1212
1214
  # Save result to file
1213
1215
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1214
1216
  filename = f"douyin_{params.method}_{timestamp}.json"
1215
1217
  filepath = file_system.get_dir() / "data" / filename
1216
1218
  filepath.parent.mkdir(exist_ok=True)
1217
-
1219
+
1218
1220
  with open(filepath, "w", encoding="utf-8") as f:
1219
1221
  json.dump(result, f, ensure_ascii=False, indent=2)
1220
-
1222
+
1221
1223
  # Format result as markdown
1222
1224
  if isinstance(result, list):
1223
1225
  display_count = min(5, len(result))
1224
1226
  md_content = f"## Douyin {params.method.replace('_', ' ').title()}\n\n"
1225
1227
  md_content += f"Showing {display_count} of {len(result)} results:\n\n"
1226
1228
  for i, item in enumerate(result[:display_count]):
1227
- md_content += f"### Result {i+1}\n"
1229
+ md_content += f"### Result {i + 1}\n"
1228
1230
  for key, value in item.items():
1229
1231
  if not value:
1230
1232
  continue
@@ -1241,27 +1243,26 @@ Please generate alternative JavaScript code that avoids this system error:"""
1241
1243
  else:
1242
1244
  md_content += f"- **{key}**: {value}\n"
1243
1245
  md_content += "\n"
1244
-
1246
+
1245
1247
  # Add file path to markdown
1246
1248
  relative_path = str(filepath.relative_to(file_system.get_dir()))
1247
1249
  md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
1248
1250
  md_content += f"> 💡 Click the link above to view all results.\n"
1249
-
1251
+
1250
1252
  logger.info(f'🎵 Douyin data retrieved with method: {params.method}')
1251
-
1253
+
1252
1254
  # Close client
1253
1255
  await dy_client.close()
1254
-
1256
+
1255
1257
  return ActionResult(
1256
1258
  extracted_content=md_content
1257
1259
  )
1258
-
1260
+
1259
1261
  except Exception as e:
1260
1262
  error_msg = f'❌ Failed to retrieve Douyin data: {str(e)}'
1261
1263
  logger.error(error_msg)
1262
1264
  return ActionResult(error=error_msg, extracted_content=error_msg)
1263
1265
 
1264
-
1265
1266
  @self.registry.action(
1266
1267
  """Skill: YouTube API - Access YouTube platform data including search, video details, comments, channel info, trending videos, and video transcripts.
1267
1268
  Methods:
@@ -1296,11 +1297,11 @@ Please generate alternative JavaScript code that avoids this system error:"""
1296
1297
  """
1297
1298
  try:
1298
1299
  from vibe_surf.tools.website_api.youtube.client import YouTubeApiClient
1299
-
1300
+
1300
1301
  # Initialize client
1301
1302
  yt_client = YouTubeApiClient(browser_session=browser_manager.main_browser_session)
1302
1303
  await yt_client.setup()
1303
-
1304
+
1304
1305
  # Parse params JSON string
1305
1306
  import json
1306
1307
  from json_repair import repair_json
@@ -1308,7 +1309,7 @@ Please generate alternative JavaScript code that avoids this system error:"""
1308
1309
  method_params = json.loads(params.params)
1309
1310
  except json.JSONDecodeError:
1310
1311
  method_params = json.loads(repair_json(params.params))
1311
-
1312
+
1312
1313
  # Execute the requested method
1313
1314
  result = None
1314
1315
  if params.method == "search_videos":
@@ -1327,23 +1328,23 @@ Please generate alternative JavaScript code that avoids this system error:"""
1327
1328
  result = await yt_client.get_video_transcript(**method_params)
1328
1329
  else:
1329
1330
  return ActionResult(error=f"Unknown method: {params.method}")
1330
-
1331
+
1331
1332
  # Save result to file
1332
1333
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1333
1334
  filename = f"youtube_{params.method}_{timestamp}.json"
1334
1335
  filepath = file_system.get_dir() / "data" / filename
1335
1336
  filepath.parent.mkdir(exist_ok=True)
1336
-
1337
+
1337
1338
  with open(filepath, "w", encoding="utf-8") as f:
1338
1339
  json.dump(result, f, ensure_ascii=False, indent=2)
1339
-
1340
+
1340
1341
  # Format result as markdown
1341
1342
  if isinstance(result, list):
1342
1343
  display_count = min(5, len(result))
1343
1344
  md_content = f"## YouTube {params.method.replace('_', ' ').title()}\n\n"
1344
1345
  md_content += f"Showing {display_count} of {len(result)} results:\n\n"
1345
1346
  for i, item in enumerate(result[:display_count]):
1346
- md_content += f"### Result {i+1}\n"
1347
+ md_content += f"### Result {i + 1}\n"
1347
1348
  for key, value in item.items():
1348
1349
  if not value:
1349
1350
  continue
@@ -1360,32 +1361,31 @@ Please generate alternative JavaScript code that avoids this system error:"""
1360
1361
  else:
1361
1362
  md_content += f"- **{key}**: {value}\n"
1362
1363
  md_content += "\n"
1363
-
1364
+
1364
1365
  # Add file path to markdown
1365
1366
  relative_path = str(filepath.relative_to(file_system.get_dir()))
1366
1367
  md_content += f"\n> 📁 Full data saved to: [{filename}]({relative_path})\n"
1367
1368
  md_content += f"> 💡 Click the link above to view all results.\n"
1368
-
1369
+
1369
1370
  logger.info(f'🎬 YouTube data retrieved with method: {params.method}')
1370
-
1371
+
1371
1372
  # Close client
1372
1373
  await yt_client.close()
1373
-
1374
+
1374
1375
  return ActionResult(
1375
1376
  extracted_content=md_content
1376
1377
  )
1377
-
1378
+
1378
1379
  except Exception as e:
1379
1380
  error_msg = f'❌ Failed to retrieve YouTube data: {str(e)}'
1380
1381
  logger.error(error_msg)
1381
1382
  return ActionResult(error=error_msg, extracted_content=error_msg)
1382
1383
 
1383
-
1384
1384
  async def _extract_google_results_rule_based(self, browser_session):
1385
1385
  """Rule-based extraction of Google search results using JavaScript"""
1386
1386
  try:
1387
1387
  cdp_session = await browser_session.get_or_create_cdp_session()
1388
-
1388
+
1389
1389
  # JavaScript code to extract Google search results using DOM selectors
1390
1390
  js_extraction_code = """
1391
1391
  (function() {
@@ -1491,27 +1491,27 @@ Please generate alternative JavaScript code that avoids this system error:"""
1491
1491
  }
1492
1492
  })()
1493
1493
  """
1494
-
1494
+
1495
1495
  # Execute JavaScript to extract results
1496
1496
  result = await cdp_session.cdp_client.send.Runtime.evaluate(
1497
1497
  params={'expression': js_extraction_code, 'returnByValue': True, 'awaitPromise': True},
1498
1498
  session_id=cdp_session.session_id,
1499
1499
  )
1500
-
1500
+
1501
1501
  if result.get('exceptionDetails'):
1502
1502
  logger.warning(f"JavaScript extraction failed: {result['exceptionDetails']}")
1503
1503
  return []
1504
-
1504
+
1505
1505
  result_data = result.get('result', {})
1506
1506
  value = result_data.get('value', '[]')
1507
-
1507
+
1508
1508
  try:
1509
1509
  extracted_results = json.loads(value)
1510
1510
  return extracted_results if isinstance(extracted_results, list) else []
1511
1511
  except (json.JSONDecodeError, ValueError):
1512
1512
  logger.warning(f"Failed to parse extraction results: {value}")
1513
1513
  return []
1514
-
1514
+
1515
1515
  except Exception as e:
1516
1516
  logger.error(f"Rule-based extraction failed: {e}")
1517
1517
  return []
@@ -1533,10 +1533,10 @@ Please generate alternative JavaScript code that avoids this system error:"""
1533
1533
  # Rule-based extraction succeeded
1534
1534
  logger.debug(f"Rule-based extraction found {len(results)} results for query: {query}")
1535
1535
  return results[:search_ret_len] # Return top 6 results
1536
-
1536
+
1537
1537
  # Fallback to LLM extraction if rule-based fails
1538
1538
  logger.warning(f"Rule-based extraction failed for query '{query}', falling back to LLM")
1539
-
1539
+
1540
1540
  extraction_query = f"""
1541
1541
  Extract the top {search_ret_len} search results from this Google search page. For each result, provide:
1542
1542
  - title: The clickable title/headline
@@ -1577,26 +1577,26 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
1577
1577
  """Rule-based deduplication to reduce dataset before LLM processing"""
1578
1578
  if not results:
1579
1579
  return []
1580
-
1580
+
1581
1581
  deduplicated = []
1582
1582
  seen_urls = set()
1583
1583
  seen_titles = set()
1584
-
1584
+
1585
1585
  for result in results:
1586
1586
  url = result.get('url', '').strip()
1587
1587
  title = result.get('title', '').strip().lower()
1588
-
1588
+
1589
1589
  # Skip results with missing essential data
1590
1590
  if not url or not title or url == 'No URL' or title == 'no title':
1591
1591
  continue
1592
-
1592
+
1593
1593
  # Normalize URL for comparison (remove fragments, query params for deduplication)
1594
1594
  normalized_url = url.split('#')[0].split('?')[0].lower()
1595
-
1595
+
1596
1596
  # Check for duplicate URLs
1597
1597
  if normalized_url in seen_urls:
1598
1598
  continue
1599
-
1599
+
1600
1600
  # Check for very similar titles (basic similarity)
1601
1601
  title_normalized = ''.join(c for c in title if c.isalnum()).lower()
1602
1602
  if len(title_normalized) > 10: # Only check titles with substantial content
@@ -1609,35 +1609,35 @@ Return results as a JSON array: [{{"title": "...", "url": "...", "summary": "...
1609
1609
  if similarity > 0.8:
1610
1610
  similar_found = True
1611
1611
  break
1612
-
1612
+
1613
1613
  if similar_found:
1614
1614
  continue
1615
-
1615
+
1616
1616
  # Add to deduplicated results
1617
1617
  seen_urls.add(normalized_url)
1618
1618
  seen_titles.add(title_normalized)
1619
1619
  deduplicated.append(result)
1620
-
1620
+
1621
1621
  # Sort by relevance indicators (prioritize results with longer summaries, non-generic titles)
1622
1622
  def relevance_score(result):
1623
1623
  score = 0
1624
1624
  title = result.get('title', '')
1625
1625
  summary = result.get('summary', '')
1626
-
1626
+
1627
1627
  # Longer summaries are typically more informative
1628
1628
  score += min(len(summary), 200) / 10
1629
-
1629
+
1630
1630
  # Non-generic titles score higher
1631
1631
  generic_terms = ['search results', 'no title', 'error', 'loading']
1632
1632
  if not any(term in title.lower() for term in generic_terms):
1633
1633
  score += 10
1634
-
1634
+
1635
1635
  # Prefer results with actual descriptions
1636
1636
  if summary and summary != 'No description available' and len(summary) > 20:
1637
1637
  score += 5
1638
-
1638
+
1639
1639
  return score
1640
-
1640
+
1641
1641
  deduplicated.sort(key=relevance_score, reverse=True)
1642
1642
  return deduplicated
1643
1643
 
@@ -2239,6 +2239,143 @@ You will be given a query and the markdown of a webpage that has been filtered t
2239
2239
  long_term_memory=result,
2240
2240
  )
2241
2241
 
2242
+ @self.registry.action(
2243
+ 'Grep content from file - search for query or keywords and return surrounding context (simulates Linux grep command). For images, uses OCR to extract text first then performs grep search.',
2244
+ param_model=GrepContentAction,
2245
+ )
2246
+ async def grep_content_from_file(
2247
+ params: GrepContentAction,
2248
+ page_extraction_llm: BaseChatModel,
2249
+ file_system: CustomFileSystem,
2250
+ ):
2251
+ try:
2252
+ # Get file path
2253
+ file_path = params.file_path
2254
+ full_file_path = file_path
2255
+ # Check if file exists
2256
+ if not os.path.exists(full_file_path):
2257
+ full_file_path = os.path.join(str(file_system.get_dir()), file_path)
2258
+
2259
+ # Determine if file is an image based on MIME type
2260
+ mime_type, _ = mimetypes.guess_type(file_path)
2261
+ is_image = mime_type and mime_type.startswith('image/')
2262
+
2263
+ if is_image:
2264
+ # Handle image files with LLM vision for OCR
2265
+ try:
2266
+ # Read image file and encode to base64
2267
+ with open(full_file_path, 'rb') as image_file:
2268
+ image_data = image_file.read()
2269
+ image_base64 = base64.b64encode(image_data).decode('utf-8')
2270
+
2271
+ # Create content parts for OCR
2272
+ content_parts: list[ContentPartTextParam | ContentPartImageParam] = [
2273
+ ContentPartTextParam(
2274
+ text="Please extract all text content from this image for search purposes. Return only the extracted text, no additional explanations.")
2275
+ ]
2276
+
2277
+ # Add the image
2278
+ content_parts.append(
2279
+ ContentPartImageParam(
2280
+ image_url=ImageURL(
2281
+ url=f'data:{mime_type};base64,{image_base64}',
2282
+ media_type=mime_type,
2283
+ detail='high',
2284
+ ),
2285
+ )
2286
+ )
2287
+
2288
+ # Create user message and invoke LLM for OCR
2289
+ user_message = UserMessage(content=content_parts, cache=True)
2290
+ response = await asyncio.wait_for(
2291
+ page_extraction_llm.ainvoke([user_message]),
2292
+ timeout=120.0,
2293
+ )
2294
+
2295
+ file_content = response.completion
2296
+
2297
+ except Exception as e:
2298
+ raise Exception(f'Failed to process image file {file_path} for OCR: {str(e)}')
2299
+
2300
+ else:
2301
+ # Handle non-image files by reading content
2302
+ try:
2303
+ file_content = await file_system.read_file(full_file_path, external_file=True)
2304
+ except Exception as e:
2305
+ raise Exception(f'Failed to read file {file_path}: {str(e)}')
2306
+
2307
+ # Perform grep search
2308
+ search_query = params.query.lower()
2309
+ context_chars = params.context_chars
2310
+
2311
+ # Find all matches with context
2312
+ matches = []
2313
+ content_lower = file_content.lower()
2314
+ search_start = 0
2315
+
2316
+ while True:
2317
+ match_pos = content_lower.find(search_query, search_start)
2318
+ if match_pos == -1:
2319
+ break
2320
+
2321
+ # Calculate context boundaries
2322
+ start_pos = max(0, match_pos - context_chars)
2323
+ end_pos = min(len(file_content), match_pos + len(search_query) + context_chars)
2324
+
2325
+ # Extract context with the match
2326
+ context_before = file_content[start_pos:match_pos]
2327
+ matched_text = file_content[match_pos:match_pos + len(search_query)]
2328
+ context_after = file_content[match_pos + len(search_query):end_pos]
2329
+
2330
+ # Add ellipsis if truncated
2331
+ if start_pos > 0:
2332
+ context_before = "..." + context_before
2333
+ if end_pos < len(file_content):
2334
+ context_after = context_after + "..."
2335
+
2336
+ matches.append({
2337
+ 'context_before': context_before,
2338
+ 'matched_text': matched_text,
2339
+ 'context_after': context_after,
2340
+ 'position': match_pos
2341
+ })
2342
+
2343
+ search_start = match_pos + 1
2344
+
2345
+ # Format results
2346
+ if not matches:
2347
+ extracted_content = f'File: {file_path}\nQuery: "{params.query}"\nResult: No matches found'
2348
+ else:
2349
+ result_text = f'File: {file_path}\nQuery: "{params.query}"\nFound {len(matches)} match(es):\n\n'
2350
+
2351
+ for i, match in enumerate(matches, 1):
2352
+ result_text += f"Match {i} (position: {match['position']}):\n"
2353
+ result_text += f"{match['context_before']}[{match['matched_text']}]{match['context_after']}\n\n"
2354
+
2355
+ extracted_content = result_text.strip()
2356
+
2357
+ # Handle memory storage
2358
+ if len(extracted_content) < 1000:
2359
+ memory = extracted_content
2360
+ include_extracted_content_only_once = False
2361
+ else:
2362
+ save_result = await file_system.save_extracted_content(extracted_content)
2363
+ memory = (
2364
+ f'Grep search completed in file {file_path} for query: {params.query}\nFound {len(matches)} match(es)\nContent saved to file system: {save_result}'
2365
+ )
2366
+ include_extracted_content_only_once = True
2367
+
2368
+ logger.info(f'🔍 Grep search completed in file: {file_path}, found {len(matches)} match(es)')
2369
+ return ActionResult(
2370
+ extracted_content=extracted_content,
2371
+ include_extracted_content_only_once=include_extracted_content_only_once,
2372
+ long_term_memory=memory,
2373
+ )
2374
+
2375
+ except Exception as e:
2376
+ logger.debug(f'Error grep searching content from file: {e}')
2377
+ raise RuntimeError(str(e))
2378
+
2242
2379
  @self.registry.action(
2243
2380
  'Create a directory within the FileSystem.'
2244
2381
  )
@@ -2334,6 +2471,72 @@ You will be given a query and the markdown of a webpage that has been filtered t
2334
2471
  self.mcp_clients.clear()
2335
2472
  logger.info('All MCP clients unregistered and disconnected')
2336
2473
 
2474
+ async def register_composio_clients(self, composio_instance: Optional[Any] = None,
2475
+ toolkit_tools_dict: Optional[Dict[str, Any]] = None):
2476
+ """
2477
+ Register Composio tools to the registry.
2478
+
2479
+ Args:
2480
+ composio_instance: Composio instance (optional, can be None initially)
2481
+ toolkit_tools_dict: Dict of toolkit_slug -> tools list
2482
+ """
2483
+ try:
2484
+ # Initialize Composio client if not exists
2485
+ if self.composio_client is None:
2486
+ self.composio_client = ComposioClient(composio_instance=composio_instance)
2487
+ else:
2488
+ # Update the composio instance
2489
+ self.composio_client.update_composio_instance(composio_instance)
2490
+
2491
+ # Register tools if we have both instance and toolkit tools
2492
+ if composio_instance and toolkit_tools_dict:
2493
+ await self.composio_client.register_to_tools(
2494
+ tools=self,
2495
+ toolkit_tools_dict=toolkit_tools_dict,
2496
+ prefix="cpo."
2497
+ )
2498
+ logger.info(f'Successfully registered Composio tools from {len(toolkit_tools_dict)} toolkits')
2499
+ elif not composio_instance:
2500
+ logger.info("Composio client initialized without instance - will register tools later")
2501
+ elif not toolkit_tools_dict:
2502
+ logger.info("Composio client initialized without toolkit tools - will register tools later")
2503
+
2504
+ except Exception as e:
2505
+ logger.error(f'Failed to register Composio clients: {str(e)}')
2506
+
2507
+ async def unregister_composio_clients(self):
2508
+ """
2509
+ Unregister all Composio tools from the registry.
2510
+ """
2511
+ try:
2512
+ if self.composio_client:
2513
+ self.composio_client.unregister_all_tools(self)
2514
+ logger.info('All Composio tools unregistered')
2515
+
2516
+ except Exception as e:
2517
+ logger.error(f'Failed to unregister Composio clients: {str(e)}')
2518
+
2519
+ async def update_composio_tools(self, composio_instance: Optional[Any] = None,
2520
+ toolkit_tools_dict: Optional[Dict[str, Any]] = None):
2521
+ """
2522
+ Update Composio tools by unregistering old ones and registering new ones.
2523
+
2524
+ Args:
2525
+ composio_instance: Composio instance
2526
+ toolkit_tools_dict: Dict of toolkit_slug -> tools list
2527
+ """
2528
+ try:
2529
+ # Unregister existing tools
2530
+ await self.unregister_composio_clients()
2531
+
2532
+ # Register new tools
2533
+ await self.register_composio_clients(composio_instance, toolkit_tools_dict)
2534
+
2535
+ logger.info('Composio tools updated successfully')
2536
+
2537
+ except Exception as e:
2538
+ logger.error(f'Failed to update Composio tools: {str(e)}')
2539
+
2337
2540
  @time_execution_sync('--act')
2338
2541
  async def act(
2339
2542
  self,