tooluniverse 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (150) hide show
  1. tooluniverse/__init__.py +57 -1
  2. tooluniverse/blast_tool.py +132 -0
  3. tooluniverse/boltz_tool.py +2 -2
  4. tooluniverse/cbioportal_tool.py +42 -0
  5. tooluniverse/clinvar_tool.py +268 -74
  6. tooluniverse/compose_scripts/tool_discover.py +1941 -443
  7. tooluniverse/data/agentic_tools.json +0 -370
  8. tooluniverse/data/alphafold_tools.json +6 -6
  9. tooluniverse/data/blast_tools.json +112 -0
  10. tooluniverse/data/cbioportal_tools.json +87 -0
  11. tooluniverse/data/clinvar_tools.json +235 -0
  12. tooluniverse/data/compose_tools.json +0 -89
  13. tooluniverse/data/dbsnp_tools.json +275 -0
  14. tooluniverse/data/emdb_tools.json +61 -0
  15. tooluniverse/data/ensembl_tools.json +259 -0
  16. tooluniverse/data/file_download_tools.json +275 -0
  17. tooluniverse/data/geo_tools.json +200 -48
  18. tooluniverse/data/gnomad_tools.json +109 -0
  19. tooluniverse/data/gtopdb_tools.json +68 -0
  20. tooluniverse/data/gwas_tools.json +32 -0
  21. tooluniverse/data/interpro_tools.json +199 -0
  22. tooluniverse/data/jaspar_tools.json +70 -0
  23. tooluniverse/data/kegg_tools.json +356 -0
  24. tooluniverse/data/mpd_tools.json +87 -0
  25. tooluniverse/data/ols_tools.json +314 -0
  26. tooluniverse/data/package_discovery_tools.json +64 -0
  27. tooluniverse/data/packages/categorized_tools.txt +0 -1
  28. tooluniverse/data/packages/machine_learning_tools.json +0 -47
  29. tooluniverse/data/paleobiology_tools.json +91 -0
  30. tooluniverse/data/pride_tools.json +62 -0
  31. tooluniverse/data/pypi_package_inspector_tools.json +158 -0
  32. tooluniverse/data/python_executor_tools.json +341 -0
  33. tooluniverse/data/regulomedb_tools.json +50 -0
  34. tooluniverse/data/remap_tools.json +89 -0
  35. tooluniverse/data/screen_tools.json +89 -0
  36. tooluniverse/data/tool_discovery_agents.json +428 -0
  37. tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
  38. tooluniverse/data/uniprot_tools.json +77 -0
  39. tooluniverse/data/web_search_tools.json +250 -0
  40. tooluniverse/data/worms_tools.json +55 -0
  41. tooluniverse/dbsnp_tool.py +196 -58
  42. tooluniverse/default_config.py +35 -2
  43. tooluniverse/emdb_tool.py +30 -0
  44. tooluniverse/ensembl_tool.py +140 -47
  45. tooluniverse/execute_function.py +74 -14
  46. tooluniverse/file_download_tool.py +269 -0
  47. tooluniverse/geo_tool.py +81 -28
  48. tooluniverse/gnomad_tool.py +100 -52
  49. tooluniverse/gtopdb_tool.py +41 -0
  50. tooluniverse/interpro_tool.py +72 -0
  51. tooluniverse/jaspar_tool.py +30 -0
  52. tooluniverse/kegg_tool.py +230 -0
  53. tooluniverse/mpd_tool.py +42 -0
  54. tooluniverse/ncbi_eutils_tool.py +96 -0
  55. tooluniverse/ols_tool.py +435 -0
  56. tooluniverse/package_discovery_tool.py +217 -0
  57. tooluniverse/paleobiology_tool.py +30 -0
  58. tooluniverse/pride_tool.py +30 -0
  59. tooluniverse/pypi_package_inspector_tool.py +593 -0
  60. tooluniverse/python_executor_tool.py +711 -0
  61. tooluniverse/regulomedb_tool.py +30 -0
  62. tooluniverse/remap_tool.py +44 -0
  63. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +1 -1
  64. tooluniverse/screen_tool.py +44 -0
  65. tooluniverse/smcp_server.py +3 -3
  66. tooluniverse/tool_finder_embedding.py +3 -1
  67. tooluniverse/tool_finder_keyword.py +3 -1
  68. tooluniverse/tool_finder_llm.py +6 -2
  69. tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
  70. tooluniverse/tools/BLAST_protein_search.py +63 -0
  71. tooluniverse/tools/ClinVar_search_variants.py +26 -15
  72. tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
  73. tooluniverse/tools/EMDB_get_structure.py +46 -0
  74. tooluniverse/tools/GtoPdb_get_targets.py +52 -0
  75. tooluniverse/tools/InterPro_get_domain_details.py +46 -0
  76. tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
  77. tooluniverse/tools/InterPro_search_domains.py +52 -0
  78. tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
  79. tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
  80. tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
  81. tooluniverse/tools/PackageAnalyzer.py +55 -0
  82. tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
  83. tooluniverse/tools/PyPIPackageInspector.py +59 -0
  84. tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
  85. tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
  86. tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
  87. tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
  88. tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
  89. tooluniverse/tools/ToolDiscover.py +11 -11
  90. tooluniverse/tools/UniProt_id_mapping.py +63 -0
  91. tooluniverse/tools/UniProt_search.py +63 -0
  92. tooluniverse/tools/UnifiedToolGenerator.py +59 -0
  93. tooluniverse/tools/WoRMS_search_species.py +49 -0
  94. tooluniverse/tools/XMLToolOptimizer.py +55 -0
  95. tooluniverse/tools/__init__.py +119 -29
  96. tooluniverse/tools/alphafold_get_annotations.py +3 -3
  97. tooluniverse/tools/alphafold_get_prediction.py +3 -3
  98. tooluniverse/tools/alphafold_get_summary.py +3 -3
  99. tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
  100. tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
  101. tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
  102. tooluniverse/tools/clinvar_get_variant_details.py +49 -0
  103. tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
  104. tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
  105. tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
  106. tooluniverse/tools/download_binary_file.py +66 -0
  107. tooluniverse/tools/download_file.py +71 -0
  108. tooluniverse/tools/download_text_content.py +55 -0
  109. tooluniverse/tools/dynamic_package_discovery.py +59 -0
  110. tooluniverse/tools/ensembl_get_sequence.py +52 -0
  111. tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
  112. tooluniverse/tools/ensembl_lookup_gene.py +46 -0
  113. tooluniverse/tools/geo_get_dataset_info.py +46 -0
  114. tooluniverse/tools/geo_get_sample_info.py +46 -0
  115. tooluniverse/tools/geo_search_datasets.py +67 -0
  116. tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
  117. tooluniverse/tools/kegg_find_genes.py +52 -0
  118. tooluniverse/tools/kegg_get_gene_info.py +46 -0
  119. tooluniverse/tools/kegg_get_pathway_info.py +46 -0
  120. tooluniverse/tools/kegg_list_organisms.py +44 -0
  121. tooluniverse/tools/kegg_search_pathway.py +46 -0
  122. tooluniverse/tools/ols_find_similar_terms.py +63 -0
  123. tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
  124. tooluniverse/tools/ols_get_term_ancestors.py +67 -0
  125. tooluniverse/tools/ols_get_term_children.py +67 -0
  126. tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
  127. tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
  128. tooluniverse/tools/ols_search_terms.py +71 -0
  129. tooluniverse/tools/python_code_executor.py +79 -0
  130. tooluniverse/tools/python_script_runner.py +79 -0
  131. tooluniverse/tools/web_api_documentation_search.py +63 -0
  132. tooluniverse/tools/web_search.py +71 -0
  133. tooluniverse/uniprot_tool.py +219 -16
  134. tooluniverse/url_tool.py +18 -0
  135. tooluniverse/utils.py +2 -2
  136. tooluniverse/web_search_tool.py +229 -0
  137. tooluniverse/worms_tool.py +64 -0
  138. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +3 -2
  139. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +143 -54
  140. tooluniverse/data/genomics_tools.json +0 -174
  141. tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
  142. tooluniverse/tools/ToolImplementationGenerator.py +0 -67
  143. tooluniverse/tools/ToolOptimizer.py +0 -59
  144. tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
  145. tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
  146. tooluniverse/ucsc_tool.py +0 -60
  147. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
  148. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
  149. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
  150. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import time
1
2
  import requests
2
3
  from typing import Any, Dict
3
4
  from .base_tool import BaseTool
@@ -22,7 +23,7 @@ class UniProtRESTTool(BaseTool):
22
23
  """Custom data extraction with support for filtering"""
23
24
 
24
25
  # Handle specific UniProt extraction patterns
25
- if extract_path == "comments[?(@.commentType=='FUNCTION')].texts[*].value":
26
+ if extract_path == ("comments[?(@.commentType==" "'FUNCTION')].texts[*].value"):
26
27
  # Extract function comments
27
28
  result = []
28
29
  for comment in data.get("comments", []):
@@ -32,41 +33,40 @@ class UniProtRESTTool(BaseTool):
32
33
  result.append(text["value"])
33
34
  return result
34
35
 
35
- elif (
36
- extract_path
37
- == "comments[?(@.commentType=='SUBCELLULAR LOCATION')].subcellularLocations[*].location.value"
36
+ elif extract_path == (
37
+ "comments[?(@.commentType=="
38
+ "'SUBCELLULAR LOCATION')].subcellularLocations[*].location.value"
38
39
  ):
39
40
  # Extract subcellular locations
40
41
  result = []
41
42
  for comment in data.get("comments", []):
42
43
  if comment.get("commentType") == "SUBCELLULAR LOCATION":
43
44
  for location in comment.get("subcellularLocations", []):
44
- if "location" in location and "value" in location["location"]:
45
+ if "location" in location and ("value" in location["location"]):
45
46
  result.append(location["location"]["value"])
46
47
  return result
47
48
 
48
49
  elif extract_path == "features[?(@.type=='VARIANT')]":
49
- # Extract variant features (correct type is "Natural variant")
50
+ # Extract variant features
50
51
  result = []
51
52
  for feature in data.get("features", []):
52
53
  if feature.get("type") == "Natural variant":
53
54
  result.append(feature)
54
55
  return result
55
56
 
56
- elif (
57
- extract_path
58
- == "features[?(@.type=='MODIFIED RESIDUE' || @.type=='SIGNAL')]"
57
+ elif extract_path == (
58
+ "features[?(@.type=='MODIFIED RESIDUE' || " "@.type=='SIGNAL')]"
59
59
  ):
60
- # Extract PTM and signal features (correct types are "Modified residue" and "Signal")
60
+ # Extract PTM and signal features
61
61
  result = []
62
62
  for feature in data.get("features", []):
63
63
  if feature.get("type") in ["Modified residue", "Signal"]:
64
64
  result.append(feature)
65
65
  return result
66
66
 
67
- elif (
68
- extract_path
69
- == "comments[?(@.commentType=='ALTERNATIVE PRODUCTS')].isoforms[*].isoformIds[*]"
67
+ elif extract_path == (
68
+ "comments[?(@.commentType=="
69
+ "'ALTERNATIVE PRODUCTS')].isoforms[*].isoformIds[*]"
70
70
  ):
71
71
  # Extract isoform IDs
72
72
  result = []
@@ -97,17 +97,220 @@ class UniProtRESTTool(BaseTool):
97
97
  return {"error": "jsonpath_ng library is required for data extraction"}
98
98
  except Exception as e:
99
99
  return {
100
- "error": f"Failed to extract UniProt fields using JSONPath '{extract_path}': {e}"
100
+ "error": (
101
+ f"Failed to extract UniProt fields using "
102
+ f"JSONPath '{extract_path}': {e}"
103
+ )
101
104
  }
102
105
 
106
+ def _handle_search(self, arguments: Dict[str, Any]) -> Any:
107
+ """Handle search queries with flexible parameters"""
108
+ query = arguments.get("query", "")
109
+ organism = arguments.get("organism", "")
110
+ limit = min(arguments.get("limit", 25), 500)
111
+ fields = arguments.get("fields")
112
+
113
+ # Build query string
114
+ query_parts = [query]
115
+ if organism:
116
+ # Support common organism names
117
+ organism_map = {
118
+ "human": "9606",
119
+ "mouse": "10090",
120
+ "rat": "10116",
121
+ "yeast": "559292",
122
+ }
123
+ taxon_id = organism_map.get(organism.lower(), organism)
124
+ query_parts.append(f"organism_id:{taxon_id}")
125
+
126
+ full_query = " AND ".join(query_parts)
127
+
128
+ # Build parameters
129
+ params = {"query": full_query, "size": str(limit), "format": "json"}
130
+
131
+ # Add fields parameter if specified
132
+ if fields and isinstance(fields, list):
133
+ params["fields"] = ",".join(fields)
134
+
135
+ url = "https://rest.uniprot.org/uniprotkb/search"
136
+
137
+ try:
138
+ resp = requests.get(url, params=params, timeout=self.timeout)
139
+ resp.raise_for_status()
140
+ data = resp.json()
141
+
142
+ # Extract results
143
+ results = data.get("results", [])
144
+ formatted_results = []
145
+
146
+ for entry in results:
147
+ formatted_entry = {
148
+ "accession": entry.get("primaryAccession", ""),
149
+ "id": entry.get("uniProtkbId", ""),
150
+ "protein_name": "",
151
+ "gene_names": [],
152
+ "organism": "",
153
+ "length": 0,
154
+ }
155
+
156
+ # Extract protein name
157
+ protein_desc = entry.get("proteinDescription", {})
158
+ rec_name = protein_desc.get("recommendedName", {})
159
+ if rec_name:
160
+ full_name = rec_name.get("fullName", {})
161
+ if full_name:
162
+ formatted_entry["protein_name"] = full_name.get("value", "")
163
+
164
+ # Extract gene names
165
+ genes = entry.get("genes", [])
166
+ for gene in genes:
167
+ gene_name = gene.get("geneName", {})
168
+ if gene_name:
169
+ formatted_entry["gene_names"].append(gene_name.get("value", ""))
170
+
171
+ # Extract organism
172
+ organism_info = entry.get("organism", {})
173
+ formatted_entry["organism"] = organism_info.get("scientificName", "")
174
+
175
+ # Extract sequence length
176
+ sequence = entry.get("sequence", {})
177
+ formatted_entry["length"] = sequence.get("length", 0)
178
+
179
+ formatted_results.append(formatted_entry)
180
+
181
+ return {
182
+ "total_results": data.get("resultsFound", len(results)),
183
+ "returned": len(results),
184
+ "results": formatted_results,
185
+ }
186
+
187
+ except requests.exceptions.Timeout:
188
+ return {"error": "Request to UniProt API timed out"}
189
+ except requests.exceptions.RequestException as e:
190
+ return {"error": f"Request to UniProt API failed: {e}"}
191
+ except ValueError as e:
192
+ return {"error": f"Failed to parse JSON response: {e}"}
193
+
194
+ def _handle_id_mapping(self, arguments: Dict[str, Any]) -> Any:
195
+ """Handle ID mapping requests"""
196
+
197
+ ids = arguments.get("ids", [])
198
+ from_db = arguments.get("from_db", "")
199
+ to_db = arguments.get("to_db", "UniProtKB")
200
+ max_wait_time = arguments.get("max_wait_time", 30)
201
+
202
+ # Normalize IDs to list
203
+ if isinstance(ids, str):
204
+ ids = [ids]
205
+
206
+ # Normalize database names
207
+ db_mapping = {
208
+ "Ensembl": "Ensembl",
209
+ "Gene_Name": "Gene_Name",
210
+ "RefSeq_Protein": "RefSeq_Protein_ID",
211
+ "PDB": "PDB_ID",
212
+ "EMBL": "EMBL_ID",
213
+ "UniProtKB": "UniProtKB_AC-ID",
214
+ }
215
+ from_db_normalized = db_mapping.get(from_db, from_db)
216
+ to_db_normalized = db_mapping.get(to_db, to_db)
217
+
218
+ # Step 1: Submit mapping job
219
+ submit_url = "https://rest.uniprot.org/idmapping/run"
220
+ payload = {"ids": ids, "from": from_db_normalized, "to": to_db_normalized}
221
+
222
+ try:
223
+ resp = requests.post(submit_url, json=payload, timeout=self.timeout)
224
+ resp.raise_for_status()
225
+ job_data = resp.json()
226
+ job_id = job_data.get("jobId")
227
+
228
+ if not job_id:
229
+ return {"error": "Failed to get job ID from UniProt ID mapping"}
230
+
231
+ # Step 2: Poll for job completion
232
+ status_url = f"https://rest.uniprot.org/idmapping/status/{job_id}"
233
+ results_url = f"https://rest.uniprot.org/idmapping/results/{job_id}"
234
+
235
+ start_time = time.time()
236
+ while time.time() - start_time < max_wait_time:
237
+ status_resp = requests.get(status_url, timeout=self.timeout)
238
+ status_data = status_resp.json()
239
+
240
+ if status_data.get("status") == "FINISHED":
241
+ # Step 3: Retrieve results
242
+ results_resp = requests.get(results_url, timeout=self.timeout)
243
+ results_data = results_resp.json()
244
+
245
+ # Format results
246
+ formatted_results = []
247
+ failed = []
248
+
249
+ # Extract mappings
250
+ results = results_data.get("results", [])
251
+ for result in results:
252
+ from_value = result.get("from", "")
253
+ to_values = result.get("to", {}).get("results", [])
254
+
255
+ if to_values:
256
+ for to_item in to_values:
257
+ to_info = to_item.get("to", {})
258
+ gene_names = to_info.get("geneNames", [])
259
+ gene_name = ""
260
+ if gene_names:
261
+ gene_name = gene_names[0].get("value", "")
262
+
263
+ formatted_results.append(
264
+ {
265
+ "from": from_value,
266
+ "to": {
267
+ "accession": to_info.get(
268
+ "primaryAccession", ""
269
+ ),
270
+ "id": to_info.get("uniProtkbId", ""),
271
+ "gene_name": gene_name,
272
+ },
273
+ }
274
+ )
275
+ else:
276
+ failed.append(from_value)
277
+
278
+ return {
279
+ "mapped_count": len(formatted_results),
280
+ "results": formatted_results,
281
+ "failed": list(set(failed)) if failed else [],
282
+ }
283
+ elif status_data.get("status") == "FAILED":
284
+ return {"error": "ID mapping job failed"}
285
+
286
+ time.sleep(1) # Wait 1 second before next poll
287
+
288
+ return {"error": (f"ID mapping timed out after {max_wait_time} seconds")}
289
+
290
+ except requests.exceptions.Timeout:
291
+ return {"error": "Request to UniProt API timed out"}
292
+ except requests.exceptions.RequestException as e:
293
+ return {"error": f"Request to UniProt API failed: {e}"}
294
+ except ValueError as e:
295
+ return {"error": f"Failed to parse JSON response: {e}"}
296
+
103
297
  def run(self, arguments: Dict[str, Any]) -> Any:
104
- # Build URL
298
+ # Check if this is a search request
299
+ search_type = self.tool_config.get("fields", {}).get("search_type")
300
+ mapping_type = self.tool_config.get("fields", {}).get("mapping_type")
301
+
302
+ if search_type == "search":
303
+ return self._handle_search(arguments)
304
+ elif mapping_type == "async":
305
+ return self._handle_id_mapping(arguments)
306
+
307
+ # Build URL for standard accession-based queries
105
308
  url = self._build_url(arguments)
106
309
  try:
107
310
  resp = requests.get(url, timeout=self.timeout)
108
311
  if resp.status_code != 200:
109
312
  return {
110
- "error": f"UniProt API returned status code: {resp.status_code}",
313
+ "error": (f"UniProt API returned status code: {resp.status_code}"),
111
314
  "detail": resp.text,
112
315
  }
113
316
  data = resp.json()
tooluniverse/url_tool.py CHANGED
@@ -188,6 +188,24 @@ class URLToPDFTextTool(BaseTool):
188
188
 
189
189
  timeout = arguments.get("timeout", 30)
190
190
 
191
+ # First, check if the URL returns HTML or a downloadable file
192
+ try:
193
+ resp = requests.head(url, timeout=timeout, allow_redirects=True)
194
+ content_type = resp.headers.get("Content-Type", "").lower()
195
+ # If it's not HTML, handle it as a simple text download
196
+ is_html = "text/html" in content_type or "application/xhtml" in content_type
197
+ if not is_html:
198
+ # Download the file directly and return its text content
199
+ resp = requests.get(url, timeout=timeout, allow_redirects=True)
200
+ if resp.status_code != 200:
201
+ return {"error": f"HTTP {resp.status_code}"}
202
+ text = resp.text
203
+ if not text.strip():
204
+ return {"error": "File appears to be empty or binary."}
205
+ return {self.return_key: text.strip()}
206
+ except requests.exceptions.RequestException as e:
207
+ return {"error": f"Failed to check content type: {e}"}
208
+
191
209
  # Ensure browsers are installed (auto-install if needed)
192
210
  ensure_error = self._ensure_playwright_browsers(
193
211
  browsers=("chromium",), with_deps=False
tooluniverse/utils.py CHANGED
@@ -117,7 +117,7 @@ def yaml_to_dict(yaml_file_path):
117
117
  dict: Dictionary representation of the YAML file content.
118
118
  """
119
119
  try:
120
- with open(yaml_file_path, "r") as file:
120
+ with open(yaml_file_path, "r", encoding="utf-8") as file:
121
121
  yaml_dict = yaml.safe_load(file)
122
122
  return yaml_dict
123
123
  except FileNotFoundError:
@@ -136,7 +136,7 @@ def read_json_list(file_path):
136
136
  Returns
137
137
  list: A list of dictionaries containing the JSON objects.
138
138
  """
139
- with open(file_path, "r") as file:
139
+ with open(file_path, "r", encoding="utf-8") as file:
140
140
  data = json.load(file)
141
141
  return data
142
142
 
@@ -0,0 +1,229 @@
1
+ """
2
+ Web search tools for ToolUniverse using DDGS (Dux Distributed Global Search).
3
+
4
+ This module provides web search capabilities using the ddgs library,
5
+ which supports multiple search engines including DuckDuckGo, Google, Bing, etc.
6
+ """
7
+
8
+ import time
9
+ from typing import Dict, Any, List
10
+ from ddgs import DDGS
11
+ from .base_tool import BaseTool
12
+ from .tool_registry import register_tool
13
+
14
+
15
+ @register_tool("WebSearchTool")
16
+ class WebSearchTool(BaseTool):
17
+ """
18
+ Web search tool using DDGS library.
19
+
20
+ This tool performs web searches using the DDGS library which supports
21
+ multiple search engines including Google, Bing, Brave, Yahoo, DuckDuckGo, etc.
22
+ """
23
+
24
+ def __init__(self, tool_config: Dict[str, Any]):
25
+ super().__init__(tool_config)
26
+ # DDGS instance will be created per request to avoid session issues
27
+
28
+ def _search_with_ddgs(
29
+ self,
30
+ query: str,
31
+ max_results: int = 10,
32
+ backend: str = "auto",
33
+ region: str = "us-en",
34
+ safesearch: str = "moderate",
35
+ ) -> List[Dict[str, Any]]:
36
+ """
37
+ Perform a web search using DDGS library and return formatted results.
38
+
39
+ Args:
40
+ query: Search query string
41
+ max_results: Maximum number of results to return
42
+ backend: Search engine backend (auto, google, bing, brave, etc.)
43
+ region: Search region (e.g., 'us-en', 'cn-zh')
44
+ safesearch: Safe search level ('on', 'moderate', 'off')
45
+
46
+ Returns:
47
+ List of search results with title, url, and snippet
48
+ """
49
+ try:
50
+ # Create DDGS instance
51
+ ddgs = DDGS()
52
+
53
+ # Perform search using DDGS
54
+ search_results = list(
55
+ ddgs.text(
56
+ query=query,
57
+ max_results=max_results,
58
+ backend=backend,
59
+ region=region,
60
+ safesearch=safesearch,
61
+ )
62
+ )
63
+
64
+ # Convert DDGS results to our expected format
65
+ results = []
66
+ for i, result in enumerate(search_results):
67
+ results.append(
68
+ {
69
+ "title": result.get("title", ""),
70
+ "url": result.get("href", ""),
71
+ "snippet": result.get("body", ""),
72
+ "rank": i + 1,
73
+ }
74
+ )
75
+
76
+ return results
77
+
78
+ except Exception as e:
79
+ return [
80
+ {
81
+ "title": "Search Error",
82
+ "url": "",
83
+ "snippet": f"Failed to perform search: {str(e)}",
84
+ "rank": 0,
85
+ }
86
+ ]
87
+
88
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
89
+ """
90
+ Execute web search using DDGS.
91
+
92
+ Args:
93
+ arguments: Dictionary containing:
94
+ - query: Search query string
95
+ - max_results: Maximum number of results (default: 10)
96
+ - search_type: Type of search (default: 'general')
97
+ - backend: Search engine backend (default: 'auto')
98
+ - region: Search region (default: 'us-en')
99
+ - safesearch: Safe search level (default: 'moderate')
100
+
101
+ Returns:
102
+ Dictionary containing search results
103
+ """
104
+ try:
105
+ query = arguments.get("query", "").strip()
106
+ max_results = int(arguments.get("max_results", 10))
107
+ search_type = arguments.get("search_type", "general")
108
+ backend = arguments.get("backend", "auto")
109
+ region = arguments.get("region", "us-en")
110
+ safesearch = arguments.get("safesearch", "moderate")
111
+
112
+ if not query:
113
+ return {
114
+ "status": "error",
115
+ "error": "Query parameter is required",
116
+ "results": [],
117
+ }
118
+
119
+ # Validate max_results
120
+ max_results = max(1, min(max_results, 50)) # Limit between 1-50
121
+
122
+ # Modify query based on search type
123
+ if search_type == "api_documentation":
124
+ query = f"{query} API documentation python library"
125
+ elif search_type == "python_packages":
126
+ query = f"{query} python package pypi"
127
+ elif search_type == "github":
128
+ query = f"{query} site:github.com"
129
+
130
+ # Perform search using DDGS
131
+ results = self._search_with_ddgs(
132
+ query=query,
133
+ max_results=max_results,
134
+ backend=backend,
135
+ region=region,
136
+ safesearch=safesearch,
137
+ )
138
+
139
+ # Add rate limiting to be respectful
140
+ time.sleep(0.5)
141
+
142
+ return {
143
+ "status": "success",
144
+ "query": query,
145
+ "search_type": search_type,
146
+ "total_results": len(results),
147
+ "results": results,
148
+ }
149
+
150
+ except Exception as e:
151
+ return {"status": "error", "error": str(e), "results": []}
152
+
153
+
154
+ @register_tool("WebAPIDocumentationSearchTool")
155
+ class WebAPIDocumentationSearchTool(WebSearchTool):
156
+ """
157
+ Specialized web search tool for API documentation and Python libraries.
158
+
159
+ This tool is optimized for finding API documentation, Python packages,
160
+ and technical resources using DDGS with multiple search engines.
161
+ """
162
+
163
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
164
+ """
165
+ Execute API documentation focused search.
166
+
167
+ Args:
168
+ arguments: Dictionary containing:
169
+ - query: Search query string
170
+ - max_results: Maximum number of results (default: 10)
171
+ - focus: Focus area ('api_docs', 'python_packages', etc.)
172
+ - backend: Search engine backend (default: 'auto')
173
+
174
+ Returns:
175
+ Dictionary containing search results
176
+ """
177
+ try:
178
+ query = arguments.get("query", "").strip()
179
+ focus = arguments.get("focus", "api_docs")
180
+ backend = arguments.get("backend", "auto")
181
+
182
+ if not query:
183
+ return {
184
+ "status": "error",
185
+ "error": "Query parameter is required",
186
+ "results": [],
187
+ }
188
+
189
+ # Modify query based on focus
190
+ if focus == "api_docs":
191
+ enhanced_query = f'"{query}" API documentation official docs'
192
+ elif focus == "python_packages":
193
+ enhanced_query = f'"{query}" python package pypi install pip'
194
+ elif focus == "github_repos":
195
+ enhanced_query = f'"{query}" github repository source code'
196
+ else:
197
+ enhanced_query = f'"{query}" documentation API reference'
198
+
199
+ # Use parent class search with enhanced query
200
+ arguments["query"] = enhanced_query
201
+ arguments["search_type"] = "api_documentation"
202
+ arguments["backend"] = backend
203
+
204
+ result = super().run(arguments)
205
+
206
+ # Add focus-specific metadata
207
+ if result["status"] == "success":
208
+ result["focus"] = focus
209
+ result["enhanced_query"] = enhanced_query
210
+
211
+ # Filter results for better relevance
212
+ if focus == "python_packages":
213
+ result["results"] = [
214
+ r
215
+ for r in result["results"]
216
+ if (
217
+ "pypi.org" in r.get("url", "")
218
+ or "python" in r.get("title", "").lower()
219
+ )
220
+ ]
221
+ elif focus == "github_repos":
222
+ result["results"] = [
223
+ r for r in result["results"] if "github.com" in r.get("url", "")
224
+ ]
225
+
226
+ return result
227
+
228
+ except Exception as e:
229
+ return {"status": "error", "error": str(e), "results": []}
@@ -0,0 +1,64 @@
1
+ import requests
2
+ import urllib.parse
3
+ from typing import Any, Dict
4
+ from .base_tool import BaseTool
5
+ from .tool_registry import register_tool
6
+
7
+
8
+ @register_tool("WoRMSRESTTool")
9
+ class WoRMSRESTTool(BaseTool):
10
+ def __init__(self, tool_config: Dict):
11
+ super().__init__(tool_config)
12
+ self.base_url = "https://www.marinespecies.org/rest"
13
+ self.session = requests.Session()
14
+ self.session.headers.update({"Accept": "application/json"})
15
+ self.timeout = 30
16
+
17
+ def _build_url(self, args: Dict[str, Any]) -> str:
18
+ url = self.tool_config["fields"]["endpoint"]
19
+ for k, v in args.items():
20
+ url = url.replace(f"{{{k}}}", str(v))
21
+ return url
22
+
23
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
24
+ try:
25
+ # Build URL with proper encoding
26
+ query = arguments.get("query", "")
27
+ if not query:
28
+ return {"status": "error", "error": "Query parameter is required"}
29
+
30
+ # URL encode the query
31
+ encoded_query = urllib.parse.quote(query)
32
+ url = (
33
+ f"https://www.marinespecies.org/rest/AphiaRecordsByName/{encoded_query}"
34
+ )
35
+
36
+ response = self.session.get(url, timeout=self.timeout)
37
+ response.raise_for_status()
38
+
39
+ # Check if response is empty
40
+ if not response.text.strip():
41
+ return {
42
+ "status": "success",
43
+ "data": [],
44
+ "url": url,
45
+ "message": "No results found for this query",
46
+ }
47
+
48
+ data = response.json()
49
+
50
+ # WoRMS returns array of species, extract first few results
51
+ if isinstance(data, list) and len(data) > 0:
52
+ # Limit results to first 5 for better performance
53
+ limited_data = data[:5]
54
+ return {
55
+ "status": "success",
56
+ "data": limited_data,
57
+ "url": url,
58
+ "count": len(limited_data),
59
+ "total_found": len(data),
60
+ }
61
+ else:
62
+ return {"status": "success", "data": data, "url": url}
63
+ except Exception as e:
64
+ return {"status": "error", "error": f"WoRMS API error: {str(e)}"}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tooluniverse
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: A comprehensive collection of scientific tools for Agentic AI, offering integration with the ToolUniverse SDK and MCP Server to support advanced scientific workflows.
5
5
  Author-email: Shanghua Gao <shanghuagao@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/mims-harvard/ToolUniverse
@@ -29,7 +29,7 @@ Requires-Dist: sentence-transformers>=5.1.0
29
29
  Requires-Dist: fitz>=0.0.1.dev2
30
30
  Requires-Dist: pandas>=2.2.3
31
31
  Requires-Dist: admet-ai>=1.2.0
32
- Requires-Dist: setuptools>=70.0.0
32
+ Requires-Dist: setuptools<81.0.0,>=70.0.0
33
33
  Requires-Dist: pdfplumber>=0.11.0
34
34
  Requires-Dist: playwright>=1.55.0
35
35
  Requires-Dist: faiss-cpu>=1.12.0
@@ -39,6 +39,7 @@ Requires-Dist: aiohttp
39
39
  Requires-Dist: beautifulsoup4>=4.12.0
40
40
  Requires-Dist: markitdown[all]>=0.1.0
41
41
  Requires-Dist: psutil>=5.9.0
42
+ Requires-Dist: ddgs>=9.0.0
42
43
  Provides-Extra: dev
43
44
  Requires-Dist: pytest>=7.0; extra == "dev"
44
45
  Requires-Dist: pytest-cov>=4.0; extra == "dev"