tooluniverse 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +57 -1
- tooluniverse/blast_tool.py +132 -0
- tooluniverse/boltz_tool.py +2 -2
- tooluniverse/cbioportal_tool.py +42 -0
- tooluniverse/clinvar_tool.py +268 -74
- tooluniverse/compose_scripts/tool_discover.py +1941 -443
- tooluniverse/data/agentic_tools.json +0 -370
- tooluniverse/data/alphafold_tools.json +6 -6
- tooluniverse/data/blast_tools.json +112 -0
- tooluniverse/data/cbioportal_tools.json +87 -0
- tooluniverse/data/clinvar_tools.json +235 -0
- tooluniverse/data/compose_tools.json +0 -89
- tooluniverse/data/dbsnp_tools.json +275 -0
- tooluniverse/data/emdb_tools.json +61 -0
- tooluniverse/data/ensembl_tools.json +259 -0
- tooluniverse/data/file_download_tools.json +275 -0
- tooluniverse/data/geo_tools.json +200 -48
- tooluniverse/data/gnomad_tools.json +109 -0
- tooluniverse/data/gtopdb_tools.json +68 -0
- tooluniverse/data/gwas_tools.json +32 -0
- tooluniverse/data/interpro_tools.json +199 -0
- tooluniverse/data/jaspar_tools.json +70 -0
- tooluniverse/data/kegg_tools.json +356 -0
- tooluniverse/data/mpd_tools.json +87 -0
- tooluniverse/data/ols_tools.json +314 -0
- tooluniverse/data/package_discovery_tools.json +64 -0
- tooluniverse/data/packages/categorized_tools.txt +0 -1
- tooluniverse/data/packages/machine_learning_tools.json +0 -47
- tooluniverse/data/paleobiology_tools.json +91 -0
- tooluniverse/data/pride_tools.json +62 -0
- tooluniverse/data/pypi_package_inspector_tools.json +158 -0
- tooluniverse/data/python_executor_tools.json +341 -0
- tooluniverse/data/regulomedb_tools.json +50 -0
- tooluniverse/data/remap_tools.json +89 -0
- tooluniverse/data/screen_tools.json +89 -0
- tooluniverse/data/tool_discovery_agents.json +428 -0
- tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
- tooluniverse/data/uniprot_tools.json +77 -0
- tooluniverse/data/web_search_tools.json +250 -0
- tooluniverse/data/worms_tools.json +55 -0
- tooluniverse/dbsnp_tool.py +196 -58
- tooluniverse/default_config.py +35 -2
- tooluniverse/emdb_tool.py +30 -0
- tooluniverse/ensembl_tool.py +140 -47
- tooluniverse/execute_function.py +74 -14
- tooluniverse/file_download_tool.py +269 -0
- tooluniverse/geo_tool.py +81 -28
- tooluniverse/gnomad_tool.py +100 -52
- tooluniverse/gtopdb_tool.py +41 -0
- tooluniverse/interpro_tool.py +72 -0
- tooluniverse/jaspar_tool.py +30 -0
- tooluniverse/kegg_tool.py +230 -0
- tooluniverse/mpd_tool.py +42 -0
- tooluniverse/ncbi_eutils_tool.py +96 -0
- tooluniverse/ols_tool.py +435 -0
- tooluniverse/package_discovery_tool.py +217 -0
- tooluniverse/paleobiology_tool.py +30 -0
- tooluniverse/pride_tool.py +30 -0
- tooluniverse/pypi_package_inspector_tool.py +593 -0
- tooluniverse/python_executor_tool.py +711 -0
- tooluniverse/regulomedb_tool.py +30 -0
- tooluniverse/remap_tool.py +44 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +1 -1
- tooluniverse/screen_tool.py +44 -0
- tooluniverse/smcp_server.py +3 -3
- tooluniverse/tool_finder_embedding.py +3 -1
- tooluniverse/tool_finder_keyword.py +3 -1
- tooluniverse/tool_finder_llm.py +6 -2
- tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
- tooluniverse/tools/BLAST_protein_search.py +63 -0
- tooluniverse/tools/ClinVar_search_variants.py +26 -15
- tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
- tooluniverse/tools/EMDB_get_structure.py +46 -0
- tooluniverse/tools/GtoPdb_get_targets.py +52 -0
- tooluniverse/tools/InterPro_get_domain_details.py +46 -0
- tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
- tooluniverse/tools/InterPro_search_domains.py +52 -0
- tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
- tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
- tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
- tooluniverse/tools/PackageAnalyzer.py +55 -0
- tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
- tooluniverse/tools/PyPIPackageInspector.py +59 -0
- tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
- tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
- tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
- tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
- tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
- tooluniverse/tools/ToolDiscover.py +11 -11
- tooluniverse/tools/UniProt_id_mapping.py +63 -0
- tooluniverse/tools/UniProt_search.py +63 -0
- tooluniverse/tools/UnifiedToolGenerator.py +59 -0
- tooluniverse/tools/WoRMS_search_species.py +49 -0
- tooluniverse/tools/XMLToolOptimizer.py +55 -0
- tooluniverse/tools/__init__.py +119 -29
- tooluniverse/tools/alphafold_get_annotations.py +3 -3
- tooluniverse/tools/alphafold_get_prediction.py +3 -3
- tooluniverse/tools/alphafold_get_summary.py +3 -3
- tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
- tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
- tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
- tooluniverse/tools/clinvar_get_variant_details.py +49 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
- tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
- tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
- tooluniverse/tools/download_binary_file.py +66 -0
- tooluniverse/tools/download_file.py +71 -0
- tooluniverse/tools/download_text_content.py +55 -0
- tooluniverse/tools/dynamic_package_discovery.py +59 -0
- tooluniverse/tools/ensembl_get_sequence.py +52 -0
- tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
- tooluniverse/tools/ensembl_lookup_gene.py +46 -0
- tooluniverse/tools/geo_get_dataset_info.py +46 -0
- tooluniverse/tools/geo_get_sample_info.py +46 -0
- tooluniverse/tools/geo_search_datasets.py +67 -0
- tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
- tooluniverse/tools/kegg_find_genes.py +52 -0
- tooluniverse/tools/kegg_get_gene_info.py +46 -0
- tooluniverse/tools/kegg_get_pathway_info.py +46 -0
- tooluniverse/tools/kegg_list_organisms.py +44 -0
- tooluniverse/tools/kegg_search_pathway.py +46 -0
- tooluniverse/tools/ols_find_similar_terms.py +63 -0
- tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
- tooluniverse/tools/ols_get_term_ancestors.py +67 -0
- tooluniverse/tools/ols_get_term_children.py +67 -0
- tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
- tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
- tooluniverse/tools/ols_search_terms.py +71 -0
- tooluniverse/tools/python_code_executor.py +79 -0
- tooluniverse/tools/python_script_runner.py +79 -0
- tooluniverse/tools/web_api_documentation_search.py +63 -0
- tooluniverse/tools/web_search.py +71 -0
- tooluniverse/uniprot_tool.py +219 -16
- tooluniverse/url_tool.py +18 -0
- tooluniverse/utils.py +2 -2
- tooluniverse/web_search_tool.py +229 -0
- tooluniverse/worms_tool.py +64 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +3 -2
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +143 -54
- tooluniverse/data/genomics_tools.json +0 -174
- tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
- tooluniverse/tools/ToolImplementationGenerator.py +0 -67
- tooluniverse/tools/ToolOptimizer.py +0 -59
- tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
- tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
- tooluniverse/ucsc_tool.py +0 -60
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
tooluniverse/uniprot_tool.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import time
|
|
1
2
|
import requests
|
|
2
3
|
from typing import Any, Dict
|
|
3
4
|
from .base_tool import BaseTool
|
|
@@ -22,7 +23,7 @@ class UniProtRESTTool(BaseTool):
|
|
|
22
23
|
"""Custom data extraction with support for filtering"""
|
|
23
24
|
|
|
24
25
|
# Handle specific UniProt extraction patterns
|
|
25
|
-
if extract_path == "comments[?(@.commentType=='FUNCTION')].texts[*].value":
|
|
26
|
+
if extract_path == ("comments[?(@.commentType==" "'FUNCTION')].texts[*].value"):
|
|
26
27
|
# Extract function comments
|
|
27
28
|
result = []
|
|
28
29
|
for comment in data.get("comments", []):
|
|
@@ -32,41 +33,40 @@ class UniProtRESTTool(BaseTool):
|
|
|
32
33
|
result.append(text["value"])
|
|
33
34
|
return result
|
|
34
35
|
|
|
35
|
-
elif (
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
elif extract_path == (
|
|
37
|
+
"comments[?(@.commentType=="
|
|
38
|
+
"'SUBCELLULAR LOCATION')].subcellularLocations[*].location.value"
|
|
38
39
|
):
|
|
39
40
|
# Extract subcellular locations
|
|
40
41
|
result = []
|
|
41
42
|
for comment in data.get("comments", []):
|
|
42
43
|
if comment.get("commentType") == "SUBCELLULAR LOCATION":
|
|
43
44
|
for location in comment.get("subcellularLocations", []):
|
|
44
|
-
if "location" in location and "value" in location["location"]:
|
|
45
|
+
if "location" in location and ("value" in location["location"]):
|
|
45
46
|
result.append(location["location"]["value"])
|
|
46
47
|
return result
|
|
47
48
|
|
|
48
49
|
elif extract_path == "features[?(@.type=='VARIANT')]":
|
|
49
|
-
# Extract variant features
|
|
50
|
+
# Extract variant features
|
|
50
51
|
result = []
|
|
51
52
|
for feature in data.get("features", []):
|
|
52
53
|
if feature.get("type") == "Natural variant":
|
|
53
54
|
result.append(feature)
|
|
54
55
|
return result
|
|
55
56
|
|
|
56
|
-
elif (
|
|
57
|
-
|
|
58
|
-
== "features[?(@.type=='MODIFIED RESIDUE' || @.type=='SIGNAL')]"
|
|
57
|
+
elif extract_path == (
|
|
58
|
+
"features[?(@.type=='MODIFIED RESIDUE' || " "@.type=='SIGNAL')]"
|
|
59
59
|
):
|
|
60
|
-
# Extract PTM and signal features
|
|
60
|
+
# Extract PTM and signal features
|
|
61
61
|
result = []
|
|
62
62
|
for feature in data.get("features", []):
|
|
63
63
|
if feature.get("type") in ["Modified residue", "Signal"]:
|
|
64
64
|
result.append(feature)
|
|
65
65
|
return result
|
|
66
66
|
|
|
67
|
-
elif (
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
elif extract_path == (
|
|
68
|
+
"comments[?(@.commentType=="
|
|
69
|
+
"'ALTERNATIVE PRODUCTS')].isoforms[*].isoformIds[*]"
|
|
70
70
|
):
|
|
71
71
|
# Extract isoform IDs
|
|
72
72
|
result = []
|
|
@@ -97,17 +97,220 @@ class UniProtRESTTool(BaseTool):
|
|
|
97
97
|
return {"error": "jsonpath_ng library is required for data extraction"}
|
|
98
98
|
except Exception as e:
|
|
99
99
|
return {
|
|
100
|
-
"error":
|
|
100
|
+
"error": (
|
|
101
|
+
f"Failed to extract UniProt fields using "
|
|
102
|
+
f"JSONPath '{extract_path}': {e}"
|
|
103
|
+
)
|
|
101
104
|
}
|
|
102
105
|
|
|
106
|
+
def _handle_search(self, arguments: Dict[str, Any]) -> Any:
|
|
107
|
+
"""Handle search queries with flexible parameters"""
|
|
108
|
+
query = arguments.get("query", "")
|
|
109
|
+
organism = arguments.get("organism", "")
|
|
110
|
+
limit = min(arguments.get("limit", 25), 500)
|
|
111
|
+
fields = arguments.get("fields")
|
|
112
|
+
|
|
113
|
+
# Build query string
|
|
114
|
+
query_parts = [query]
|
|
115
|
+
if organism:
|
|
116
|
+
# Support common organism names
|
|
117
|
+
organism_map = {
|
|
118
|
+
"human": "9606",
|
|
119
|
+
"mouse": "10090",
|
|
120
|
+
"rat": "10116",
|
|
121
|
+
"yeast": "559292",
|
|
122
|
+
}
|
|
123
|
+
taxon_id = organism_map.get(organism.lower(), organism)
|
|
124
|
+
query_parts.append(f"organism_id:{taxon_id}")
|
|
125
|
+
|
|
126
|
+
full_query = " AND ".join(query_parts)
|
|
127
|
+
|
|
128
|
+
# Build parameters
|
|
129
|
+
params = {"query": full_query, "size": str(limit), "format": "json"}
|
|
130
|
+
|
|
131
|
+
# Add fields parameter if specified
|
|
132
|
+
if fields and isinstance(fields, list):
|
|
133
|
+
params["fields"] = ",".join(fields)
|
|
134
|
+
|
|
135
|
+
url = "https://rest.uniprot.org/uniprotkb/search"
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
resp = requests.get(url, params=params, timeout=self.timeout)
|
|
139
|
+
resp.raise_for_status()
|
|
140
|
+
data = resp.json()
|
|
141
|
+
|
|
142
|
+
# Extract results
|
|
143
|
+
results = data.get("results", [])
|
|
144
|
+
formatted_results = []
|
|
145
|
+
|
|
146
|
+
for entry in results:
|
|
147
|
+
formatted_entry = {
|
|
148
|
+
"accession": entry.get("primaryAccession", ""),
|
|
149
|
+
"id": entry.get("uniProtkbId", ""),
|
|
150
|
+
"protein_name": "",
|
|
151
|
+
"gene_names": [],
|
|
152
|
+
"organism": "",
|
|
153
|
+
"length": 0,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Extract protein name
|
|
157
|
+
protein_desc = entry.get("proteinDescription", {})
|
|
158
|
+
rec_name = protein_desc.get("recommendedName", {})
|
|
159
|
+
if rec_name:
|
|
160
|
+
full_name = rec_name.get("fullName", {})
|
|
161
|
+
if full_name:
|
|
162
|
+
formatted_entry["protein_name"] = full_name.get("value", "")
|
|
163
|
+
|
|
164
|
+
# Extract gene names
|
|
165
|
+
genes = entry.get("genes", [])
|
|
166
|
+
for gene in genes:
|
|
167
|
+
gene_name = gene.get("geneName", {})
|
|
168
|
+
if gene_name:
|
|
169
|
+
formatted_entry["gene_names"].append(gene_name.get("value", ""))
|
|
170
|
+
|
|
171
|
+
# Extract organism
|
|
172
|
+
organism_info = entry.get("organism", {})
|
|
173
|
+
formatted_entry["organism"] = organism_info.get("scientificName", "")
|
|
174
|
+
|
|
175
|
+
# Extract sequence length
|
|
176
|
+
sequence = entry.get("sequence", {})
|
|
177
|
+
formatted_entry["length"] = sequence.get("length", 0)
|
|
178
|
+
|
|
179
|
+
formatted_results.append(formatted_entry)
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
"total_results": data.get("resultsFound", len(results)),
|
|
183
|
+
"returned": len(results),
|
|
184
|
+
"results": formatted_results,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
except requests.exceptions.Timeout:
|
|
188
|
+
return {"error": "Request to UniProt API timed out"}
|
|
189
|
+
except requests.exceptions.RequestException as e:
|
|
190
|
+
return {"error": f"Request to UniProt API failed: {e}"}
|
|
191
|
+
except ValueError as e:
|
|
192
|
+
return {"error": f"Failed to parse JSON response: {e}"}
|
|
193
|
+
|
|
194
|
+
def _handle_id_mapping(self, arguments: Dict[str, Any]) -> Any:
|
|
195
|
+
"""Handle ID mapping requests"""
|
|
196
|
+
|
|
197
|
+
ids = arguments.get("ids", [])
|
|
198
|
+
from_db = arguments.get("from_db", "")
|
|
199
|
+
to_db = arguments.get("to_db", "UniProtKB")
|
|
200
|
+
max_wait_time = arguments.get("max_wait_time", 30)
|
|
201
|
+
|
|
202
|
+
# Normalize IDs to list
|
|
203
|
+
if isinstance(ids, str):
|
|
204
|
+
ids = [ids]
|
|
205
|
+
|
|
206
|
+
# Normalize database names
|
|
207
|
+
db_mapping = {
|
|
208
|
+
"Ensembl": "Ensembl",
|
|
209
|
+
"Gene_Name": "Gene_Name",
|
|
210
|
+
"RefSeq_Protein": "RefSeq_Protein_ID",
|
|
211
|
+
"PDB": "PDB_ID",
|
|
212
|
+
"EMBL": "EMBL_ID",
|
|
213
|
+
"UniProtKB": "UniProtKB_AC-ID",
|
|
214
|
+
}
|
|
215
|
+
from_db_normalized = db_mapping.get(from_db, from_db)
|
|
216
|
+
to_db_normalized = db_mapping.get(to_db, to_db)
|
|
217
|
+
|
|
218
|
+
# Step 1: Submit mapping job
|
|
219
|
+
submit_url = "https://rest.uniprot.org/idmapping/run"
|
|
220
|
+
payload = {"ids": ids, "from": from_db_normalized, "to": to_db_normalized}
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
resp = requests.post(submit_url, json=payload, timeout=self.timeout)
|
|
224
|
+
resp.raise_for_status()
|
|
225
|
+
job_data = resp.json()
|
|
226
|
+
job_id = job_data.get("jobId")
|
|
227
|
+
|
|
228
|
+
if not job_id:
|
|
229
|
+
return {"error": "Failed to get job ID from UniProt ID mapping"}
|
|
230
|
+
|
|
231
|
+
# Step 2: Poll for job completion
|
|
232
|
+
status_url = f"https://rest.uniprot.org/idmapping/status/{job_id}"
|
|
233
|
+
results_url = f"https://rest.uniprot.org/idmapping/results/{job_id}"
|
|
234
|
+
|
|
235
|
+
start_time = time.time()
|
|
236
|
+
while time.time() - start_time < max_wait_time:
|
|
237
|
+
status_resp = requests.get(status_url, timeout=self.timeout)
|
|
238
|
+
status_data = status_resp.json()
|
|
239
|
+
|
|
240
|
+
if status_data.get("status") == "FINISHED":
|
|
241
|
+
# Step 3: Retrieve results
|
|
242
|
+
results_resp = requests.get(results_url, timeout=self.timeout)
|
|
243
|
+
results_data = results_resp.json()
|
|
244
|
+
|
|
245
|
+
# Format results
|
|
246
|
+
formatted_results = []
|
|
247
|
+
failed = []
|
|
248
|
+
|
|
249
|
+
# Extract mappings
|
|
250
|
+
results = results_data.get("results", [])
|
|
251
|
+
for result in results:
|
|
252
|
+
from_value = result.get("from", "")
|
|
253
|
+
to_values = result.get("to", {}).get("results", [])
|
|
254
|
+
|
|
255
|
+
if to_values:
|
|
256
|
+
for to_item in to_values:
|
|
257
|
+
to_info = to_item.get("to", {})
|
|
258
|
+
gene_names = to_info.get("geneNames", [])
|
|
259
|
+
gene_name = ""
|
|
260
|
+
if gene_names:
|
|
261
|
+
gene_name = gene_names[0].get("value", "")
|
|
262
|
+
|
|
263
|
+
formatted_results.append(
|
|
264
|
+
{
|
|
265
|
+
"from": from_value,
|
|
266
|
+
"to": {
|
|
267
|
+
"accession": to_info.get(
|
|
268
|
+
"primaryAccession", ""
|
|
269
|
+
),
|
|
270
|
+
"id": to_info.get("uniProtkbId", ""),
|
|
271
|
+
"gene_name": gene_name,
|
|
272
|
+
},
|
|
273
|
+
}
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
failed.append(from_value)
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
"mapped_count": len(formatted_results),
|
|
280
|
+
"results": formatted_results,
|
|
281
|
+
"failed": list(set(failed)) if failed else [],
|
|
282
|
+
}
|
|
283
|
+
elif status_data.get("status") == "FAILED":
|
|
284
|
+
return {"error": "ID mapping job failed"}
|
|
285
|
+
|
|
286
|
+
time.sleep(1) # Wait 1 second before next poll
|
|
287
|
+
|
|
288
|
+
return {"error": (f"ID mapping timed out after {max_wait_time} seconds")}
|
|
289
|
+
|
|
290
|
+
except requests.exceptions.Timeout:
|
|
291
|
+
return {"error": "Request to UniProt API timed out"}
|
|
292
|
+
except requests.exceptions.RequestException as e:
|
|
293
|
+
return {"error": f"Request to UniProt API failed: {e}"}
|
|
294
|
+
except ValueError as e:
|
|
295
|
+
return {"error": f"Failed to parse JSON response: {e}"}
|
|
296
|
+
|
|
103
297
|
def run(self, arguments: Dict[str, Any]) -> Any:
|
|
104
|
-
#
|
|
298
|
+
# Check if this is a search request
|
|
299
|
+
search_type = self.tool_config.get("fields", {}).get("search_type")
|
|
300
|
+
mapping_type = self.tool_config.get("fields", {}).get("mapping_type")
|
|
301
|
+
|
|
302
|
+
if search_type == "search":
|
|
303
|
+
return self._handle_search(arguments)
|
|
304
|
+
elif mapping_type == "async":
|
|
305
|
+
return self._handle_id_mapping(arguments)
|
|
306
|
+
|
|
307
|
+
# Build URL for standard accession-based queries
|
|
105
308
|
url = self._build_url(arguments)
|
|
106
309
|
try:
|
|
107
310
|
resp = requests.get(url, timeout=self.timeout)
|
|
108
311
|
if resp.status_code != 200:
|
|
109
312
|
return {
|
|
110
|
-
"error": f"UniProt API returned status code: {resp.status_code}",
|
|
313
|
+
"error": (f"UniProt API returned status code: {resp.status_code}"),
|
|
111
314
|
"detail": resp.text,
|
|
112
315
|
}
|
|
113
316
|
data = resp.json()
|
tooluniverse/url_tool.py
CHANGED
|
@@ -188,6 +188,24 @@ class URLToPDFTextTool(BaseTool):
|
|
|
188
188
|
|
|
189
189
|
timeout = arguments.get("timeout", 30)
|
|
190
190
|
|
|
191
|
+
# First, check if the URL returns HTML or a downloadable file
|
|
192
|
+
try:
|
|
193
|
+
resp = requests.head(url, timeout=timeout, allow_redirects=True)
|
|
194
|
+
content_type = resp.headers.get("Content-Type", "").lower()
|
|
195
|
+
# If it's not HTML, handle it as a simple text download
|
|
196
|
+
is_html = "text/html" in content_type or "application/xhtml" in content_type
|
|
197
|
+
if not is_html:
|
|
198
|
+
# Download the file directly and return its text content
|
|
199
|
+
resp = requests.get(url, timeout=timeout, allow_redirects=True)
|
|
200
|
+
if resp.status_code != 200:
|
|
201
|
+
return {"error": f"HTTP {resp.status_code}"}
|
|
202
|
+
text = resp.text
|
|
203
|
+
if not text.strip():
|
|
204
|
+
return {"error": "File appears to be empty or binary."}
|
|
205
|
+
return {self.return_key: text.strip()}
|
|
206
|
+
except requests.exceptions.RequestException as e:
|
|
207
|
+
return {"error": f"Failed to check content type: {e}"}
|
|
208
|
+
|
|
191
209
|
# Ensure browsers are installed (auto-install if needed)
|
|
192
210
|
ensure_error = self._ensure_playwright_browsers(
|
|
193
211
|
browsers=("chromium",), with_deps=False
|
tooluniverse/utils.py
CHANGED
|
@@ -117,7 +117,7 @@ def yaml_to_dict(yaml_file_path):
|
|
|
117
117
|
dict: Dictionary representation of the YAML file content.
|
|
118
118
|
"""
|
|
119
119
|
try:
|
|
120
|
-
with open(yaml_file_path, "r") as file:
|
|
120
|
+
with open(yaml_file_path, "r", encoding="utf-8") as file:
|
|
121
121
|
yaml_dict = yaml.safe_load(file)
|
|
122
122
|
return yaml_dict
|
|
123
123
|
except FileNotFoundError:
|
|
@@ -136,7 +136,7 @@ def read_json_list(file_path):
|
|
|
136
136
|
Returns
|
|
137
137
|
list: A list of dictionaries containing the JSON objects.
|
|
138
138
|
"""
|
|
139
|
-
with open(file_path, "r") as file:
|
|
139
|
+
with open(file_path, "r", encoding="utf-8") as file:
|
|
140
140
|
data = json.load(file)
|
|
141
141
|
return data
|
|
142
142
|
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web search tools for ToolUniverse using DDGS (Dux Distributed Global Search).
|
|
3
|
+
|
|
4
|
+
This module provides web search capabilities using the ddgs library,
|
|
5
|
+
which supports multiple search engines including DuckDuckGo, Google, Bing, etc.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from typing import Dict, Any, List
|
|
10
|
+
from ddgs import DDGS
|
|
11
|
+
from .base_tool import BaseTool
|
|
12
|
+
from .tool_registry import register_tool
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_tool("WebSearchTool")
|
|
16
|
+
class WebSearchTool(BaseTool):
|
|
17
|
+
"""
|
|
18
|
+
Web search tool using DDGS library.
|
|
19
|
+
|
|
20
|
+
This tool performs web searches using the DDGS library which supports
|
|
21
|
+
multiple search engines including Google, Bing, Brave, Yahoo, DuckDuckGo, etc.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, tool_config: Dict[str, Any]):
|
|
25
|
+
super().__init__(tool_config)
|
|
26
|
+
# DDGS instance will be created per request to avoid session issues
|
|
27
|
+
|
|
28
|
+
def _search_with_ddgs(
|
|
29
|
+
self,
|
|
30
|
+
query: str,
|
|
31
|
+
max_results: int = 10,
|
|
32
|
+
backend: str = "auto",
|
|
33
|
+
region: str = "us-en",
|
|
34
|
+
safesearch: str = "moderate",
|
|
35
|
+
) -> List[Dict[str, Any]]:
|
|
36
|
+
"""
|
|
37
|
+
Perform a web search using DDGS library and return formatted results.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
query: Search query string
|
|
41
|
+
max_results: Maximum number of results to return
|
|
42
|
+
backend: Search engine backend (auto, google, bing, brave, etc.)
|
|
43
|
+
region: Search region (e.g., 'us-en', 'cn-zh')
|
|
44
|
+
safesearch: Safe search level ('on', 'moderate', 'off')
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of search results with title, url, and snippet
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
# Create DDGS instance
|
|
51
|
+
ddgs = DDGS()
|
|
52
|
+
|
|
53
|
+
# Perform search using DDGS
|
|
54
|
+
search_results = list(
|
|
55
|
+
ddgs.text(
|
|
56
|
+
query=query,
|
|
57
|
+
max_results=max_results,
|
|
58
|
+
backend=backend,
|
|
59
|
+
region=region,
|
|
60
|
+
safesearch=safesearch,
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Convert DDGS results to our expected format
|
|
65
|
+
results = []
|
|
66
|
+
for i, result in enumerate(search_results):
|
|
67
|
+
results.append(
|
|
68
|
+
{
|
|
69
|
+
"title": result.get("title", ""),
|
|
70
|
+
"url": result.get("href", ""),
|
|
71
|
+
"snippet": result.get("body", ""),
|
|
72
|
+
"rank": i + 1,
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return results
|
|
77
|
+
|
|
78
|
+
except Exception as e:
|
|
79
|
+
return [
|
|
80
|
+
{
|
|
81
|
+
"title": "Search Error",
|
|
82
|
+
"url": "",
|
|
83
|
+
"snippet": f"Failed to perform search: {str(e)}",
|
|
84
|
+
"rank": 0,
|
|
85
|
+
}
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Execute web search using DDGS.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
arguments: Dictionary containing:
|
|
94
|
+
- query: Search query string
|
|
95
|
+
- max_results: Maximum number of results (default: 10)
|
|
96
|
+
- search_type: Type of search (default: 'general')
|
|
97
|
+
- backend: Search engine backend (default: 'auto')
|
|
98
|
+
- region: Search region (default: 'us-en')
|
|
99
|
+
- safesearch: Safe search level (default: 'moderate')
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Dictionary containing search results
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
query = arguments.get("query", "").strip()
|
|
106
|
+
max_results = int(arguments.get("max_results", 10))
|
|
107
|
+
search_type = arguments.get("search_type", "general")
|
|
108
|
+
backend = arguments.get("backend", "auto")
|
|
109
|
+
region = arguments.get("region", "us-en")
|
|
110
|
+
safesearch = arguments.get("safesearch", "moderate")
|
|
111
|
+
|
|
112
|
+
if not query:
|
|
113
|
+
return {
|
|
114
|
+
"status": "error",
|
|
115
|
+
"error": "Query parameter is required",
|
|
116
|
+
"results": [],
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Validate max_results
|
|
120
|
+
max_results = max(1, min(max_results, 50)) # Limit between 1-50
|
|
121
|
+
|
|
122
|
+
# Modify query based on search type
|
|
123
|
+
if search_type == "api_documentation":
|
|
124
|
+
query = f"{query} API documentation python library"
|
|
125
|
+
elif search_type == "python_packages":
|
|
126
|
+
query = f"{query} python package pypi"
|
|
127
|
+
elif search_type == "github":
|
|
128
|
+
query = f"{query} site:github.com"
|
|
129
|
+
|
|
130
|
+
# Perform search using DDGS
|
|
131
|
+
results = self._search_with_ddgs(
|
|
132
|
+
query=query,
|
|
133
|
+
max_results=max_results,
|
|
134
|
+
backend=backend,
|
|
135
|
+
region=region,
|
|
136
|
+
safesearch=safesearch,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Add rate limiting to be respectful
|
|
140
|
+
time.sleep(0.5)
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"status": "success",
|
|
144
|
+
"query": query,
|
|
145
|
+
"search_type": search_type,
|
|
146
|
+
"total_results": len(results),
|
|
147
|
+
"results": results,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
return {"status": "error", "error": str(e), "results": []}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@register_tool("WebAPIDocumentationSearchTool")
|
|
155
|
+
class WebAPIDocumentationSearchTool(WebSearchTool):
|
|
156
|
+
"""
|
|
157
|
+
Specialized web search tool for API documentation and Python libraries.
|
|
158
|
+
|
|
159
|
+
This tool is optimized for finding API documentation, Python packages,
|
|
160
|
+
and technical resources using DDGS with multiple search engines.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
164
|
+
"""
|
|
165
|
+
Execute API documentation focused search.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
arguments: Dictionary containing:
|
|
169
|
+
- query: Search query string
|
|
170
|
+
- max_results: Maximum number of results (default: 10)
|
|
171
|
+
- focus: Focus area ('api_docs', 'python_packages', etc.)
|
|
172
|
+
- backend: Search engine backend (default: 'auto')
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Dictionary containing search results
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
query = arguments.get("query", "").strip()
|
|
179
|
+
focus = arguments.get("focus", "api_docs")
|
|
180
|
+
backend = arguments.get("backend", "auto")
|
|
181
|
+
|
|
182
|
+
if not query:
|
|
183
|
+
return {
|
|
184
|
+
"status": "error",
|
|
185
|
+
"error": "Query parameter is required",
|
|
186
|
+
"results": [],
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
# Modify query based on focus
|
|
190
|
+
if focus == "api_docs":
|
|
191
|
+
enhanced_query = f'"{query}" API documentation official docs'
|
|
192
|
+
elif focus == "python_packages":
|
|
193
|
+
enhanced_query = f'"{query}" python package pypi install pip'
|
|
194
|
+
elif focus == "github_repos":
|
|
195
|
+
enhanced_query = f'"{query}" github repository source code'
|
|
196
|
+
else:
|
|
197
|
+
enhanced_query = f'"{query}" documentation API reference'
|
|
198
|
+
|
|
199
|
+
# Use parent class search with enhanced query
|
|
200
|
+
arguments["query"] = enhanced_query
|
|
201
|
+
arguments["search_type"] = "api_documentation"
|
|
202
|
+
arguments["backend"] = backend
|
|
203
|
+
|
|
204
|
+
result = super().run(arguments)
|
|
205
|
+
|
|
206
|
+
# Add focus-specific metadata
|
|
207
|
+
if result["status"] == "success":
|
|
208
|
+
result["focus"] = focus
|
|
209
|
+
result["enhanced_query"] = enhanced_query
|
|
210
|
+
|
|
211
|
+
# Filter results for better relevance
|
|
212
|
+
if focus == "python_packages":
|
|
213
|
+
result["results"] = [
|
|
214
|
+
r
|
|
215
|
+
for r in result["results"]
|
|
216
|
+
if (
|
|
217
|
+
"pypi.org" in r.get("url", "")
|
|
218
|
+
or "python" in r.get("title", "").lower()
|
|
219
|
+
)
|
|
220
|
+
]
|
|
221
|
+
elif focus == "github_repos":
|
|
222
|
+
result["results"] = [
|
|
223
|
+
r for r in result["results"] if "github.com" in r.get("url", "")
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
except Exception as e:
|
|
229
|
+
return {"status": "error", "error": str(e), "results": []}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import urllib.parse
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
from .base_tool import BaseTool
|
|
5
|
+
from .tool_registry import register_tool
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@register_tool("WoRMSRESTTool")
|
|
9
|
+
class WoRMSRESTTool(BaseTool):
|
|
10
|
+
def __init__(self, tool_config: Dict):
|
|
11
|
+
super().__init__(tool_config)
|
|
12
|
+
self.base_url = "https://www.marinespecies.org/rest"
|
|
13
|
+
self.session = requests.Session()
|
|
14
|
+
self.session.headers.update({"Accept": "application/json"})
|
|
15
|
+
self.timeout = 30
|
|
16
|
+
|
|
17
|
+
def _build_url(self, args: Dict[str, Any]) -> str:
|
|
18
|
+
url = self.tool_config["fields"]["endpoint"]
|
|
19
|
+
for k, v in args.items():
|
|
20
|
+
url = url.replace(f"{{{k}}}", str(v))
|
|
21
|
+
return url
|
|
22
|
+
|
|
23
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
24
|
+
try:
|
|
25
|
+
# Build URL with proper encoding
|
|
26
|
+
query = arguments.get("query", "")
|
|
27
|
+
if not query:
|
|
28
|
+
return {"status": "error", "error": "Query parameter is required"}
|
|
29
|
+
|
|
30
|
+
# URL encode the query
|
|
31
|
+
encoded_query = urllib.parse.quote(query)
|
|
32
|
+
url = (
|
|
33
|
+
f"https://www.marinespecies.org/rest/AphiaRecordsByName/{encoded_query}"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
37
|
+
response.raise_for_status()
|
|
38
|
+
|
|
39
|
+
# Check if response is empty
|
|
40
|
+
if not response.text.strip():
|
|
41
|
+
return {
|
|
42
|
+
"status": "success",
|
|
43
|
+
"data": [],
|
|
44
|
+
"url": url,
|
|
45
|
+
"message": "No results found for this query",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
data = response.json()
|
|
49
|
+
|
|
50
|
+
# WoRMS returns array of species, extract first few results
|
|
51
|
+
if isinstance(data, list) and len(data) > 0:
|
|
52
|
+
# Limit results to first 5 for better performance
|
|
53
|
+
limited_data = data[:5]
|
|
54
|
+
return {
|
|
55
|
+
"status": "success",
|
|
56
|
+
"data": limited_data,
|
|
57
|
+
"url": url,
|
|
58
|
+
"count": len(limited_data),
|
|
59
|
+
"total_found": len(data),
|
|
60
|
+
}
|
|
61
|
+
else:
|
|
62
|
+
return {"status": "success", "data": data, "url": url}
|
|
63
|
+
except Exception as e:
|
|
64
|
+
return {"status": "error", "error": f"WoRMS API error: {str(e)}"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tooluniverse
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.11
|
|
4
4
|
Summary: A comprehensive collection of scientific tools for Agentic AI, offering integration with the ToolUniverse SDK and MCP Server to support advanced scientific workflows.
|
|
5
5
|
Author-email: Shanghua Gao <shanghuagao@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/mims-harvard/ToolUniverse
|
|
@@ -29,7 +29,7 @@ Requires-Dist: sentence-transformers>=5.1.0
|
|
|
29
29
|
Requires-Dist: fitz>=0.0.1.dev2
|
|
30
30
|
Requires-Dist: pandas>=2.2.3
|
|
31
31
|
Requires-Dist: admet-ai>=1.2.0
|
|
32
|
-
Requires-Dist: setuptools
|
|
32
|
+
Requires-Dist: setuptools<81.0.0,>=70.0.0
|
|
33
33
|
Requires-Dist: pdfplumber>=0.11.0
|
|
34
34
|
Requires-Dist: playwright>=1.55.0
|
|
35
35
|
Requires-Dist: faiss-cpu>=1.12.0
|
|
@@ -39,6 +39,7 @@ Requires-Dist: aiohttp
|
|
|
39
39
|
Requires-Dist: beautifulsoup4>=4.12.0
|
|
40
40
|
Requires-Dist: markitdown[all]>=0.1.0
|
|
41
41
|
Requires-Dist: psutil>=5.9.0
|
|
42
|
+
Requires-Dist: ddgs>=9.0.0
|
|
42
43
|
Provides-Extra: dev
|
|
43
44
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
44
45
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|