tooluniverse 1.0.9.1__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (191) hide show
  1. tooluniverse/__init__.py +57 -1
  2. tooluniverse/admetai_tool.py +1 -1
  3. tooluniverse/agentic_tool.py +65 -17
  4. tooluniverse/base_tool.py +19 -8
  5. tooluniverse/blast_tool.py +132 -0
  6. tooluniverse/boltz_tool.py +3 -3
  7. tooluniverse/cache/result_cache_manager.py +167 -12
  8. tooluniverse/cbioportal_tool.py +42 -0
  9. tooluniverse/clinvar_tool.py +268 -74
  10. tooluniverse/compose_scripts/drug_safety_analyzer.py +1 -1
  11. tooluniverse/compose_scripts/multi_agent_literature_search.py +1 -1
  12. tooluniverse/compose_scripts/output_summarizer.py +4 -4
  13. tooluniverse/compose_scripts/tool_discover.py +1941 -443
  14. tooluniverse/compose_scripts/tool_graph_composer.py +1 -1
  15. tooluniverse/compose_scripts/tool_metadata_generator.py +1 -1
  16. tooluniverse/compose_tool.py +9 -9
  17. tooluniverse/core_tool.py +2 -2
  18. tooluniverse/ctg_tool.py +4 -4
  19. tooluniverse/custom_tool.py +1 -1
  20. tooluniverse/data/agentic_tools.json +0 -370
  21. tooluniverse/data/alphafold_tools.json +6 -6
  22. tooluniverse/data/blast_tools.json +112 -0
  23. tooluniverse/data/cbioportal_tools.json +87 -0
  24. tooluniverse/data/clinvar_tools.json +235 -0
  25. tooluniverse/data/compose_tools.json +0 -89
  26. tooluniverse/data/dbsnp_tools.json +275 -0
  27. tooluniverse/data/emdb_tools.json +61 -0
  28. tooluniverse/data/ensembl_tools.json +259 -0
  29. tooluniverse/data/file_download_tools.json +275 -0
  30. tooluniverse/data/geo_tools.json +200 -48
  31. tooluniverse/data/gnomad_tools.json +109 -0
  32. tooluniverse/data/gtopdb_tools.json +68 -0
  33. tooluniverse/data/gwas_tools.json +32 -0
  34. tooluniverse/data/interpro_tools.json +199 -0
  35. tooluniverse/data/jaspar_tools.json +70 -0
  36. tooluniverse/data/kegg_tools.json +356 -0
  37. tooluniverse/data/mpd_tools.json +87 -0
  38. tooluniverse/data/ols_tools.json +314 -0
  39. tooluniverse/data/package_discovery_tools.json +64 -0
  40. tooluniverse/data/packages/categorized_tools.txt +0 -1
  41. tooluniverse/data/packages/machine_learning_tools.json +0 -47
  42. tooluniverse/data/paleobiology_tools.json +91 -0
  43. tooluniverse/data/pride_tools.json +62 -0
  44. tooluniverse/data/pypi_package_inspector_tools.json +158 -0
  45. tooluniverse/data/python_executor_tools.json +341 -0
  46. tooluniverse/data/regulomedb_tools.json +50 -0
  47. tooluniverse/data/remap_tools.json +89 -0
  48. tooluniverse/data/screen_tools.json +89 -0
  49. tooluniverse/data/tool_discovery_agents.json +428 -0
  50. tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
  51. tooluniverse/data/uniprot_tools.json +77 -0
  52. tooluniverse/data/web_search_tools.json +250 -0
  53. tooluniverse/data/worms_tools.json +55 -0
  54. tooluniverse/dataset_tool.py +2 -2
  55. tooluniverse/dbsnp_tool.py +196 -58
  56. tooluniverse/default_config.py +36 -3
  57. tooluniverse/emdb_tool.py +30 -0
  58. tooluniverse/enrichr_tool.py +14 -14
  59. tooluniverse/ensembl_tool.py +140 -47
  60. tooluniverse/execute_function.py +594 -29
  61. tooluniverse/extended_hooks.py +4 -4
  62. tooluniverse/file_download_tool.py +269 -0
  63. tooluniverse/gene_ontology_tool.py +1 -1
  64. tooluniverse/generate_tools.py +3 -3
  65. tooluniverse/geo_tool.py +81 -28
  66. tooluniverse/gnomad_tool.py +100 -52
  67. tooluniverse/gtopdb_tool.py +41 -0
  68. tooluniverse/humanbase_tool.py +10 -10
  69. tooluniverse/interpro_tool.py +72 -0
  70. tooluniverse/jaspar_tool.py +30 -0
  71. tooluniverse/kegg_tool.py +230 -0
  72. tooluniverse/logging_config.py +2 -2
  73. tooluniverse/mcp_client_tool.py +57 -129
  74. tooluniverse/mcp_integration.py +52 -49
  75. tooluniverse/mcp_tool_registry.py +147 -528
  76. tooluniverse/mpd_tool.py +42 -0
  77. tooluniverse/ncbi_eutils_tool.py +96 -0
  78. tooluniverse/ols_tool.py +435 -0
  79. tooluniverse/openalex_tool.py +8 -8
  80. tooluniverse/openfda_tool.py +2 -2
  81. tooluniverse/output_hook.py +15 -15
  82. tooluniverse/package_discovery_tool.py +217 -0
  83. tooluniverse/package_tool.py +1 -1
  84. tooluniverse/paleobiology_tool.py +30 -0
  85. tooluniverse/pmc_tool.py +2 -2
  86. tooluniverse/pride_tool.py +30 -0
  87. tooluniverse/pypi_package_inspector_tool.py +593 -0
  88. tooluniverse/python_executor_tool.py +711 -0
  89. tooluniverse/regulomedb_tool.py +30 -0
  90. tooluniverse/remap_tool.py +44 -0
  91. tooluniverse/remote/boltz/boltz_mcp_server.py +1 -1
  92. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +3 -3
  93. tooluniverse/remote/immune_compass/compass_tool.py +3 -3
  94. tooluniverse/remote/pinnacle/pinnacle_tool.py +2 -2
  95. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +3 -3
  96. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -3
  97. tooluniverse/remote_tool.py +4 -4
  98. tooluniverse/screen_tool.py +44 -0
  99. tooluniverse/scripts/filter_tool_files.py +2 -2
  100. tooluniverse/smcp.py +93 -12
  101. tooluniverse/smcp_server.py +100 -21
  102. tooluniverse/space/__init__.py +46 -0
  103. tooluniverse/space/loader.py +133 -0
  104. tooluniverse/space/validator.py +353 -0
  105. tooluniverse/tool_finder_embedding.py +5 -3
  106. tooluniverse/tool_finder_keyword.py +12 -10
  107. tooluniverse/tool_finder_llm.py +12 -8
  108. tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
  109. tooluniverse/tools/BLAST_protein_search.py +63 -0
  110. tooluniverse/tools/ClinVar_search_variants.py +26 -15
  111. tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
  112. tooluniverse/tools/EMDB_get_structure.py +46 -0
  113. tooluniverse/tools/GtoPdb_get_targets.py +52 -0
  114. tooluniverse/tools/InterPro_get_domain_details.py +46 -0
  115. tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
  116. tooluniverse/tools/InterPro_search_domains.py +52 -0
  117. tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
  118. tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
  119. tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
  120. tooluniverse/tools/PackageAnalyzer.py +55 -0
  121. tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
  122. tooluniverse/tools/PyPIPackageInspector.py +59 -0
  123. tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
  124. tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
  125. tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
  126. tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
  127. tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
  128. tooluniverse/tools/ToolDiscover.py +11 -11
  129. tooluniverse/tools/UniProt_id_mapping.py +63 -0
  130. tooluniverse/tools/UniProt_search.py +63 -0
  131. tooluniverse/tools/UnifiedToolGenerator.py +59 -0
  132. tooluniverse/tools/WoRMS_search_species.py +49 -0
  133. tooluniverse/tools/XMLToolOptimizer.py +55 -0
  134. tooluniverse/tools/__init__.py +119 -29
  135. tooluniverse/tools/_shared_client.py +3 -3
  136. tooluniverse/tools/alphafold_get_annotations.py +3 -3
  137. tooluniverse/tools/alphafold_get_prediction.py +3 -3
  138. tooluniverse/tools/alphafold_get_summary.py +3 -3
  139. tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
  140. tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
  141. tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
  142. tooluniverse/tools/clinvar_get_variant_details.py +49 -0
  143. tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
  144. tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
  145. tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
  146. tooluniverse/tools/download_binary_file.py +66 -0
  147. tooluniverse/tools/download_file.py +71 -0
  148. tooluniverse/tools/download_text_content.py +55 -0
  149. tooluniverse/tools/dynamic_package_discovery.py +59 -0
  150. tooluniverse/tools/ensembl_get_sequence.py +52 -0
  151. tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
  152. tooluniverse/tools/ensembl_lookup_gene.py +46 -0
  153. tooluniverse/tools/geo_get_dataset_info.py +46 -0
  154. tooluniverse/tools/geo_get_sample_info.py +46 -0
  155. tooluniverse/tools/geo_search_datasets.py +67 -0
  156. tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
  157. tooluniverse/tools/kegg_find_genes.py +52 -0
  158. tooluniverse/tools/kegg_get_gene_info.py +46 -0
  159. tooluniverse/tools/kegg_get_pathway_info.py +46 -0
  160. tooluniverse/tools/kegg_list_organisms.py +44 -0
  161. tooluniverse/tools/kegg_search_pathway.py +46 -0
  162. tooluniverse/tools/ols_find_similar_terms.py +63 -0
  163. tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
  164. tooluniverse/tools/ols_get_term_ancestors.py +67 -0
  165. tooluniverse/tools/ols_get_term_children.py +67 -0
  166. tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
  167. tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
  168. tooluniverse/tools/ols_search_terms.py +71 -0
  169. tooluniverse/tools/python_code_executor.py +79 -0
  170. tooluniverse/tools/python_script_runner.py +79 -0
  171. tooluniverse/tools/web_api_documentation_search.py +63 -0
  172. tooluniverse/tools/web_search.py +71 -0
  173. tooluniverse/uniprot_tool.py +219 -16
  174. tooluniverse/url_tool.py +19 -1
  175. tooluniverse/uspto_tool.py +1 -1
  176. tooluniverse/utils.py +12 -12
  177. tooluniverse/web_search_tool.py +229 -0
  178. tooluniverse/worms_tool.py +64 -0
  179. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +8 -3
  180. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +184 -92
  181. tooluniverse/data/genomics_tools.json +0 -174
  182. tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
  183. tooluniverse/tools/ToolImplementationGenerator.py +0 -67
  184. tooluniverse/tools/ToolOptimizer.py +0 -59
  185. tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
  186. tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
  187. tooluniverse/ucsc_tool.py +0 -60
  188. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
  189. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
  190. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
  191. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import time
1
2
  import requests
2
3
  from typing import Any, Dict
3
4
  from .base_tool import BaseTool
@@ -22,7 +23,7 @@ class UniProtRESTTool(BaseTool):
22
23
  """Custom data extraction with support for filtering"""
23
24
 
24
25
  # Handle specific UniProt extraction patterns
25
- if extract_path == "comments[?(@.commentType=='FUNCTION')].texts[*].value":
26
+ if extract_path == ("comments[?(@.commentType==" "'FUNCTION')].texts[*].value"):
26
27
  # Extract function comments
27
28
  result = []
28
29
  for comment in data.get("comments", []):
@@ -32,41 +33,40 @@ class UniProtRESTTool(BaseTool):
32
33
  result.append(text["value"])
33
34
  return result
34
35
 
35
- elif (
36
- extract_path
37
- == "comments[?(@.commentType=='SUBCELLULAR LOCATION')].subcellularLocations[*].location.value"
36
+ elif extract_path == (
37
+ "comments[?(@.commentType=="
38
+ "'SUBCELLULAR LOCATION')].subcellularLocations[*].location.value"
38
39
  ):
39
40
  # Extract subcellular locations
40
41
  result = []
41
42
  for comment in data.get("comments", []):
42
43
  if comment.get("commentType") == "SUBCELLULAR LOCATION":
43
44
  for location in comment.get("subcellularLocations", []):
44
- if "location" in location and "value" in location["location"]:
45
+ if "location" in location and ("value" in location["location"]):
45
46
  result.append(location["location"]["value"])
46
47
  return result
47
48
 
48
49
  elif extract_path == "features[?(@.type=='VARIANT')]":
49
- # Extract variant features (correct type is "Natural variant")
50
+ # Extract variant features
50
51
  result = []
51
52
  for feature in data.get("features", []):
52
53
  if feature.get("type") == "Natural variant":
53
54
  result.append(feature)
54
55
  return result
55
56
 
56
- elif (
57
- extract_path
58
- == "features[?(@.type=='MODIFIED RESIDUE' || @.type=='SIGNAL')]"
57
+ elif extract_path == (
58
+ "features[?(@.type=='MODIFIED RESIDUE' || " "@.type=='SIGNAL')]"
59
59
  ):
60
- # Extract PTM and signal features (correct types are "Modified residue" and "Signal")
60
+ # Extract PTM and signal features
61
61
  result = []
62
62
  for feature in data.get("features", []):
63
63
  if feature.get("type") in ["Modified residue", "Signal"]:
64
64
  result.append(feature)
65
65
  return result
66
66
 
67
- elif (
68
- extract_path
69
- == "comments[?(@.commentType=='ALTERNATIVE PRODUCTS')].isoforms[*].isoformIds[*]"
67
+ elif extract_path == (
68
+ "comments[?(@.commentType=="
69
+ "'ALTERNATIVE PRODUCTS')].isoforms[*].isoformIds[*]"
70
70
  ):
71
71
  # Extract isoform IDs
72
72
  result = []
@@ -97,17 +97,220 @@ class UniProtRESTTool(BaseTool):
97
97
  return {"error": "jsonpath_ng library is required for data extraction"}
98
98
  except Exception as e:
99
99
  return {
100
- "error": f"Failed to extract UniProt fields using JSONPath '{extract_path}': {e}"
100
+ "error": (
101
+ f"Failed to extract UniProt fields using "
102
+ f"JSONPath '{extract_path}': {e}"
103
+ )
101
104
  }
102
105
 
106
+ def _handle_search(self, arguments: Dict[str, Any]) -> Any:
107
+ """Handle search queries with flexible parameters"""
108
+ query = arguments.get("query", "")
109
+ organism = arguments.get("organism", "")
110
+ limit = min(arguments.get("limit", 25), 500)
111
+ fields = arguments.get("fields")
112
+
113
+ # Build query string
114
+ query_parts = [query]
115
+ if organism:
116
+ # Support common organism names
117
+ organism_map = {
118
+ "human": "9606",
119
+ "mouse": "10090",
120
+ "rat": "10116",
121
+ "yeast": "559292",
122
+ }
123
+ taxon_id = organism_map.get(organism.lower(), organism)
124
+ query_parts.append(f"organism_id:{taxon_id}")
125
+
126
+ full_query = " AND ".join(query_parts)
127
+
128
+ # Build parameters
129
+ params = {"query": full_query, "size": str(limit), "format": "json"}
130
+
131
+ # Add fields parameter if specified
132
+ if fields and isinstance(fields, list):
133
+ params["fields"] = ",".join(fields)
134
+
135
+ url = "https://rest.uniprot.org/uniprotkb/search"
136
+
137
+ try:
138
+ resp = requests.get(url, params=params, timeout=self.timeout)
139
+ resp.raise_for_status()
140
+ data = resp.json()
141
+
142
+ # Extract results
143
+ results = data.get("results", [])
144
+ formatted_results = []
145
+
146
+ for entry in results:
147
+ formatted_entry = {
148
+ "accession": entry.get("primaryAccession", ""),
149
+ "id": entry.get("uniProtkbId", ""),
150
+ "protein_name": "",
151
+ "gene_names": [],
152
+ "organism": "",
153
+ "length": 0,
154
+ }
155
+
156
+ # Extract protein name
157
+ protein_desc = entry.get("proteinDescription", {})
158
+ rec_name = protein_desc.get("recommendedName", {})
159
+ if rec_name:
160
+ full_name = rec_name.get("fullName", {})
161
+ if full_name:
162
+ formatted_entry["protein_name"] = full_name.get("value", "")
163
+
164
+ # Extract gene names
165
+ genes = entry.get("genes", [])
166
+ for gene in genes:
167
+ gene_name = gene.get("geneName", {})
168
+ if gene_name:
169
+ formatted_entry["gene_names"].append(gene_name.get("value", ""))
170
+
171
+ # Extract organism
172
+ organism_info = entry.get("organism", {})
173
+ formatted_entry["organism"] = organism_info.get("scientificName", "")
174
+
175
+ # Extract sequence length
176
+ sequence = entry.get("sequence", {})
177
+ formatted_entry["length"] = sequence.get("length", 0)
178
+
179
+ formatted_results.append(formatted_entry)
180
+
181
+ return {
182
+ "total_results": data.get("resultsFound", len(results)),
183
+ "returned": len(results),
184
+ "results": formatted_results,
185
+ }
186
+
187
+ except requests.exceptions.Timeout:
188
+ return {"error": "Request to UniProt API timed out"}
189
+ except requests.exceptions.RequestException as e:
190
+ return {"error": f"Request to UniProt API failed: {e}"}
191
+ except ValueError as e:
192
+ return {"error": f"Failed to parse JSON response: {e}"}
193
+
194
+ def _handle_id_mapping(self, arguments: Dict[str, Any]) -> Any:
195
+ """Handle ID mapping requests"""
196
+
197
+ ids = arguments.get("ids", [])
198
+ from_db = arguments.get("from_db", "")
199
+ to_db = arguments.get("to_db", "UniProtKB")
200
+ max_wait_time = arguments.get("max_wait_time", 30)
201
+
202
+ # Normalize IDs to list
203
+ if isinstance(ids, str):
204
+ ids = [ids]
205
+
206
+ # Normalize database names
207
+ db_mapping = {
208
+ "Ensembl": "Ensembl",
209
+ "Gene_Name": "Gene_Name",
210
+ "RefSeq_Protein": "RefSeq_Protein_ID",
211
+ "PDB": "PDB_ID",
212
+ "EMBL": "EMBL_ID",
213
+ "UniProtKB": "UniProtKB_AC-ID",
214
+ }
215
+ from_db_normalized = db_mapping.get(from_db, from_db)
216
+ to_db_normalized = db_mapping.get(to_db, to_db)
217
+
218
+ # Step 1: Submit mapping job
219
+ submit_url = "https://rest.uniprot.org/idmapping/run"
220
+ payload = {"ids": ids, "from": from_db_normalized, "to": to_db_normalized}
221
+
222
+ try:
223
+ resp = requests.post(submit_url, json=payload, timeout=self.timeout)
224
+ resp.raise_for_status()
225
+ job_data = resp.json()
226
+ job_id = job_data.get("jobId")
227
+
228
+ if not job_id:
229
+ return {"error": "Failed to get job ID from UniProt ID mapping"}
230
+
231
+ # Step 2: Poll for job completion
232
+ status_url = f"https://rest.uniprot.org/idmapping/status/{job_id}"
233
+ results_url = f"https://rest.uniprot.org/idmapping/results/{job_id}"
234
+
235
+ start_time = time.time()
236
+ while time.time() - start_time < max_wait_time:
237
+ status_resp = requests.get(status_url, timeout=self.timeout)
238
+ status_data = status_resp.json()
239
+
240
+ if status_data.get("status") == "FINISHED":
241
+ # Step 3: Retrieve results
242
+ results_resp = requests.get(results_url, timeout=self.timeout)
243
+ results_data = results_resp.json()
244
+
245
+ # Format results
246
+ formatted_results = []
247
+ failed = []
248
+
249
+ # Extract mappings
250
+ results = results_data.get("results", [])
251
+ for result in results:
252
+ from_value = result.get("from", "")
253
+ to_values = result.get("to", {}).get("results", [])
254
+
255
+ if to_values:
256
+ for to_item in to_values:
257
+ to_info = to_item.get("to", {})
258
+ gene_names = to_info.get("geneNames", [])
259
+ gene_name = ""
260
+ if gene_names:
261
+ gene_name = gene_names[0].get("value", "")
262
+
263
+ formatted_results.append(
264
+ {
265
+ "from": from_value,
266
+ "to": {
267
+ "accession": to_info.get(
268
+ "primaryAccession", ""
269
+ ),
270
+ "id": to_info.get("uniProtkbId", ""),
271
+ "gene_name": gene_name,
272
+ },
273
+ }
274
+ )
275
+ else:
276
+ failed.append(from_value)
277
+
278
+ return {
279
+ "mapped_count": len(formatted_results),
280
+ "results": formatted_results,
281
+ "failed": list(set(failed)) if failed else [],
282
+ }
283
+ elif status_data.get("status") == "FAILED":
284
+ return {"error": "ID mapping job failed"}
285
+
286
+ time.sleep(1) # Wait 1 second before next poll
287
+
288
+ return {"error": (f"ID mapping timed out after {max_wait_time} seconds")}
289
+
290
+ except requests.exceptions.Timeout:
291
+ return {"error": "Request to UniProt API timed out"}
292
+ except requests.exceptions.RequestException as e:
293
+ return {"error": f"Request to UniProt API failed: {e}"}
294
+ except ValueError as e:
295
+ return {"error": f"Failed to parse JSON response: {e}"}
296
+
103
297
  def run(self, arguments: Dict[str, Any]) -> Any:
104
- # Build URL
298
+ # Check if this is a search request
299
+ search_type = self.tool_config.get("fields", {}).get("search_type")
300
+ mapping_type = self.tool_config.get("fields", {}).get("mapping_type")
301
+
302
+ if search_type == "search":
303
+ return self._handle_search(arguments)
304
+ elif mapping_type == "async":
305
+ return self._handle_id_mapping(arguments)
306
+
307
+ # Build URL for standard accession-based queries
105
308
  url = self._build_url(arguments)
106
309
  try:
107
310
  resp = requests.get(url, timeout=self.timeout)
108
311
  if resp.status_code != 200:
109
312
  return {
110
- "error": f"UniProt API returned status code: {resp.status_code}",
313
+ "error": (f"UniProt API returned status code: {resp.status_code}"),
111
314
  "detail": resp.text,
112
315
  }
113
316
  data = resp.json()
tooluniverse/url_tool.py CHANGED
@@ -94,7 +94,7 @@ class URLToPDFTextTool(BaseTool):
94
94
  """
95
95
  Ensure Playwright browser binaries are installed.
96
96
 
97
- Returns:
97
+ Returns
98
98
  None on success, or an error string on failure.
99
99
  """
100
100
  # Allow user to skip auto-install via env var
@@ -188,6 +188,24 @@ class URLToPDFTextTool(BaseTool):
188
188
 
189
189
  timeout = arguments.get("timeout", 30)
190
190
 
191
+ # First, check if the URL returns HTML or a downloadable file
192
+ try:
193
+ resp = requests.head(url, timeout=timeout, allow_redirects=True)
194
+ content_type = resp.headers.get("Content-Type", "").lower()
195
+ # If it's not HTML, handle it as a simple text download
196
+ is_html = "text/html" in content_type or "application/xhtml" in content_type
197
+ if not is_html:
198
+ # Download the file directly and return its text content
199
+ resp = requests.get(url, timeout=timeout, allow_redirects=True)
200
+ if resp.status_code != 200:
201
+ return {"error": f"HTTP {resp.status_code}"}
202
+ text = resp.text
203
+ if not text.strip():
204
+ return {"error": "File appears to be empty or binary."}
205
+ return {self.return_key: text.strip()}
206
+ except requests.exceptions.RequestException as e:
207
+ return {"error": f"Failed to check content type: {e}"}
208
+
191
209
  # Ensure browsers are installed (auto-install if needed)
192
210
  ensure_error = self._ensure_playwright_browsers(
193
211
  browsers=("chromium",), with_deps=False
@@ -132,7 +132,7 @@ class USPTOOpenDataPortalTool(BaseTool):
132
132
  Args:
133
133
  arguments: A dictionary of arguments for the tool, matching the parameters in the tool definition.
134
134
 
135
- Returns:
135
+ Returns
136
136
  The result of the API call, either as a dictionary (for JSON) or a string (for CSV).
137
137
  """
138
138
  endpoint = self.tool_config.get("api_endpoint")
tooluniverse/utils.py CHANGED
@@ -113,11 +113,11 @@ def yaml_to_dict(yaml_file_path):
113
113
  Args:
114
114
  yaml_file_path (str): Path to the YAML file.
115
115
 
116
- Returns:
116
+ Returns
117
117
  dict: Dictionary representation of the YAML file content.
118
118
  """
119
119
  try:
120
- with open(yaml_file_path, "r") as file:
120
+ with open(yaml_file_path, "r", encoding="utf-8") as file:
121
121
  yaml_dict = yaml.safe_load(file)
122
122
  return yaml_dict
123
123
  except FileNotFoundError:
@@ -130,13 +130,13 @@ def read_json_list(file_path):
130
130
  """
131
131
  Reads a list of JSON objects from a file.
132
132
 
133
- Parameters:
133
+ Parameters
134
134
  file_path (str): The path to the JSON file.
135
135
 
136
- Returns:
136
+ Returns
137
137
  list: A list of dictionaries containing the JSON objects.
138
138
  """
139
- with open(file_path, "r") as file:
139
+ with open(file_path, "r", encoding="utf-8") as file:
140
140
  data = json.load(file)
141
141
  return data
142
142
 
@@ -355,7 +355,7 @@ def format_error_response(
355
355
  tool_name (str, optional): Name of the tool that failed
356
356
  context (Dict[str, Any], optional): Additional context about the error
357
357
 
358
- Returns:
358
+ Returns
359
359
  Dict[str, Any]: Standardized error response
360
360
  """
361
361
  from .exceptions import ToolError
@@ -391,7 +391,7 @@ def get_parameter_schema(tool_config: Dict[str, Any]) -> Dict[str, Any]:
391
391
  Args:
392
392
  tool_config (Dict[str, Any]): Tool configuration dictionary
393
393
 
394
- Returns:
394
+ Returns
395
395
  Dict[str, Any]: Parameter schema dictionary
396
396
  """
397
397
  return tool_config.get("parameter", {})
@@ -404,7 +404,7 @@ def validate_query(query: Dict[str, Any]) -> bool:
404
404
  Args:
405
405
  query (Dict[str, Any]): The query dictionary to validate
406
406
 
407
- Returns:
407
+ Returns
408
408
  bool: True if query is valid, False otherwise
409
409
  """
410
410
  if not isinstance(query, dict):
@@ -427,7 +427,7 @@ def normalize_gene_symbol(gene_symbol: str) -> str:
427
427
  Args:
428
428
  gene_symbol (str): The gene symbol to normalize
429
429
 
430
- Returns:
430
+ Returns
431
431
  str: Normalized gene symbol
432
432
  """
433
433
  if not isinstance(gene_symbol, str):
@@ -454,7 +454,7 @@ def format_api_response(
454
454
  response_data (Any): The response data to format
455
455
  format_type (str): The desired output format ('json', 'pretty', 'minimal')
456
456
 
457
- Returns:
457
+ Returns
458
458
  Union[str, Dict[str, Any]]: Formatted response
459
459
  """
460
460
  if format_type == "json":
@@ -493,7 +493,7 @@ def validate_hook_config(config: Dict[str, Any]) -> bool:
493
493
  Args:
494
494
  config (Dict[str, Any]): Hook configuration to validate
495
495
 
496
- Returns:
496
+ Returns
497
497
  bool: True if configuration is valid, False otherwise
498
498
  """
499
499
  try:
@@ -561,7 +561,7 @@ def validate_hook_conditions(conditions: Dict[str, Any]) -> bool:
561
561
  Args:
562
562
  conditions (Dict[str, Any]): Hook conditions to validate
563
563
 
564
- Returns:
564
+ Returns
565
565
  bool: True if conditions are valid, False otherwise
566
566
  """
567
567
  try:
@@ -0,0 +1,229 @@
1
+ """
2
+ Web search tools for ToolUniverse using DDGS (Dux Distributed Global Search).
3
+
4
+ This module provides web search capabilities using the ddgs library,
5
+ which supports multiple search engines including DuckDuckGo, Google, Bing, etc.
6
+ """
7
+
8
+ import time
9
+ from typing import Dict, Any, List
10
+ from ddgs import DDGS
11
+ from .base_tool import BaseTool
12
+ from .tool_registry import register_tool
13
+
14
+
15
+ @register_tool("WebSearchTool")
16
+ class WebSearchTool(BaseTool):
17
+ """
18
+ Web search tool using DDGS library.
19
+
20
+ This tool performs web searches using the DDGS library which supports
21
+ multiple search engines including Google, Bing, Brave, Yahoo, DuckDuckGo, etc.
22
+ """
23
+
24
+ def __init__(self, tool_config: Dict[str, Any]):
25
+ super().__init__(tool_config)
26
+ # DDGS instance will be created per request to avoid session issues
27
+
28
+ def _search_with_ddgs(
29
+ self,
30
+ query: str,
31
+ max_results: int = 10,
32
+ backend: str = "auto",
33
+ region: str = "us-en",
34
+ safesearch: str = "moderate",
35
+ ) -> List[Dict[str, Any]]:
36
+ """
37
+ Perform a web search using DDGS library and return formatted results.
38
+
39
+ Args:
40
+ query: Search query string
41
+ max_results: Maximum number of results to return
42
+ backend: Search engine backend (auto, google, bing, brave, etc.)
43
+ region: Search region (e.g., 'us-en', 'cn-zh')
44
+ safesearch: Safe search level ('on', 'moderate', 'off')
45
+
46
+ Returns:
47
+ List of search results with title, url, and snippet
48
+ """
49
+ try:
50
+ # Create DDGS instance
51
+ ddgs = DDGS()
52
+
53
+ # Perform search using DDGS
54
+ search_results = list(
55
+ ddgs.text(
56
+ query=query,
57
+ max_results=max_results,
58
+ backend=backend,
59
+ region=region,
60
+ safesearch=safesearch,
61
+ )
62
+ )
63
+
64
+ # Convert DDGS results to our expected format
65
+ results = []
66
+ for i, result in enumerate(search_results):
67
+ results.append(
68
+ {
69
+ "title": result.get("title", ""),
70
+ "url": result.get("href", ""),
71
+ "snippet": result.get("body", ""),
72
+ "rank": i + 1,
73
+ }
74
+ )
75
+
76
+ return results
77
+
78
+ except Exception as e:
79
+ return [
80
+ {
81
+ "title": "Search Error",
82
+ "url": "",
83
+ "snippet": f"Failed to perform search: {str(e)}",
84
+ "rank": 0,
85
+ }
86
+ ]
87
+
88
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
89
+ """
90
+ Execute web search using DDGS.
91
+
92
+ Args:
93
+ arguments: Dictionary containing:
94
+ - query: Search query string
95
+ - max_results: Maximum number of results (default: 10)
96
+ - search_type: Type of search (default: 'general')
97
+ - backend: Search engine backend (default: 'auto')
98
+ - region: Search region (default: 'us-en')
99
+ - safesearch: Safe search level (default: 'moderate')
100
+
101
+ Returns:
102
+ Dictionary containing search results
103
+ """
104
+ try:
105
+ query = arguments.get("query", "").strip()
106
+ max_results = int(arguments.get("max_results", 10))
107
+ search_type = arguments.get("search_type", "general")
108
+ backend = arguments.get("backend", "auto")
109
+ region = arguments.get("region", "us-en")
110
+ safesearch = arguments.get("safesearch", "moderate")
111
+
112
+ if not query:
113
+ return {
114
+ "status": "error",
115
+ "error": "Query parameter is required",
116
+ "results": [],
117
+ }
118
+
119
+ # Validate max_results
120
+ max_results = max(1, min(max_results, 50)) # Limit between 1-50
121
+
122
+ # Modify query based on search type
123
+ if search_type == "api_documentation":
124
+ query = f"{query} API documentation python library"
125
+ elif search_type == "python_packages":
126
+ query = f"{query} python package pypi"
127
+ elif search_type == "github":
128
+ query = f"{query} site:github.com"
129
+
130
+ # Perform search using DDGS
131
+ results = self._search_with_ddgs(
132
+ query=query,
133
+ max_results=max_results,
134
+ backend=backend,
135
+ region=region,
136
+ safesearch=safesearch,
137
+ )
138
+
139
+ # Add rate limiting to be respectful
140
+ time.sleep(0.5)
141
+
142
+ return {
143
+ "status": "success",
144
+ "query": query,
145
+ "search_type": search_type,
146
+ "total_results": len(results),
147
+ "results": results,
148
+ }
149
+
150
+ except Exception as e:
151
+ return {"status": "error", "error": str(e), "results": []}
152
+
153
+
154
+ @register_tool("WebAPIDocumentationSearchTool")
155
+ class WebAPIDocumentationSearchTool(WebSearchTool):
156
+ """
157
+ Specialized web search tool for API documentation and Python libraries.
158
+
159
+ This tool is optimized for finding API documentation, Python packages,
160
+ and technical resources using DDGS with multiple search engines.
161
+ """
162
+
163
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
164
+ """
165
+ Execute API documentation focused search.
166
+
167
+ Args:
168
+ arguments: Dictionary containing:
169
+ - query: Search query string
170
+ - max_results: Maximum number of results (default: 10)
171
+ - focus: Focus area ('api_docs', 'python_packages', etc.)
172
+ - backend: Search engine backend (default: 'auto')
173
+
174
+ Returns:
175
+ Dictionary containing search results
176
+ """
177
+ try:
178
+ query = arguments.get("query", "").strip()
179
+ focus = arguments.get("focus", "api_docs")
180
+ backend = arguments.get("backend", "auto")
181
+
182
+ if not query:
183
+ return {
184
+ "status": "error",
185
+ "error": "Query parameter is required",
186
+ "results": [],
187
+ }
188
+
189
+ # Modify query based on focus
190
+ if focus == "api_docs":
191
+ enhanced_query = f'"{query}" API documentation official docs'
192
+ elif focus == "python_packages":
193
+ enhanced_query = f'"{query}" python package pypi install pip'
194
+ elif focus == "github_repos":
195
+ enhanced_query = f'"{query}" github repository source code'
196
+ else:
197
+ enhanced_query = f'"{query}" documentation API reference'
198
+
199
+ # Use parent class search with enhanced query
200
+ arguments["query"] = enhanced_query
201
+ arguments["search_type"] = "api_documentation"
202
+ arguments["backend"] = backend
203
+
204
+ result = super().run(arguments)
205
+
206
+ # Add focus-specific metadata
207
+ if result["status"] == "success":
208
+ result["focus"] = focus
209
+ result["enhanced_query"] = enhanced_query
210
+
211
+ # Filter results for better relevance
212
+ if focus == "python_packages":
213
+ result["results"] = [
214
+ r
215
+ for r in result["results"]
216
+ if (
217
+ "pypi.org" in r.get("url", "")
218
+ or "python" in r.get("title", "").lower()
219
+ )
220
+ ]
221
+ elif focus == "github_repos":
222
+ result["results"] = [
223
+ r for r in result["results"] if "github.com" in r.get("url", "")
224
+ ]
225
+
226
+ return result
227
+
228
+ except Exception as e:
229
+ return {"status": "error", "error": str(e), "results": []}