tooluniverse 1.0.9.1__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +57 -1
- tooluniverse/admetai_tool.py +1 -1
- tooluniverse/agentic_tool.py +65 -17
- tooluniverse/base_tool.py +19 -8
- tooluniverse/blast_tool.py +132 -0
- tooluniverse/boltz_tool.py +3 -3
- tooluniverse/cache/result_cache_manager.py +167 -12
- tooluniverse/cbioportal_tool.py +42 -0
- tooluniverse/clinvar_tool.py +268 -74
- tooluniverse/compose_scripts/drug_safety_analyzer.py +1 -1
- tooluniverse/compose_scripts/multi_agent_literature_search.py +1 -1
- tooluniverse/compose_scripts/output_summarizer.py +4 -4
- tooluniverse/compose_scripts/tool_discover.py +1941 -443
- tooluniverse/compose_scripts/tool_graph_composer.py +1 -1
- tooluniverse/compose_scripts/tool_metadata_generator.py +1 -1
- tooluniverse/compose_tool.py +9 -9
- tooluniverse/core_tool.py +2 -2
- tooluniverse/ctg_tool.py +4 -4
- tooluniverse/custom_tool.py +1 -1
- tooluniverse/data/agentic_tools.json +0 -370
- tooluniverse/data/alphafold_tools.json +6 -6
- tooluniverse/data/blast_tools.json +112 -0
- tooluniverse/data/cbioportal_tools.json +87 -0
- tooluniverse/data/clinvar_tools.json +235 -0
- tooluniverse/data/compose_tools.json +0 -89
- tooluniverse/data/dbsnp_tools.json +275 -0
- tooluniverse/data/emdb_tools.json +61 -0
- tooluniverse/data/ensembl_tools.json +259 -0
- tooluniverse/data/file_download_tools.json +275 -0
- tooluniverse/data/geo_tools.json +200 -48
- tooluniverse/data/gnomad_tools.json +109 -0
- tooluniverse/data/gtopdb_tools.json +68 -0
- tooluniverse/data/gwas_tools.json +32 -0
- tooluniverse/data/interpro_tools.json +199 -0
- tooluniverse/data/jaspar_tools.json +70 -0
- tooluniverse/data/kegg_tools.json +356 -0
- tooluniverse/data/mpd_tools.json +87 -0
- tooluniverse/data/ols_tools.json +314 -0
- tooluniverse/data/package_discovery_tools.json +64 -0
- tooluniverse/data/packages/categorized_tools.txt +0 -1
- tooluniverse/data/packages/machine_learning_tools.json +0 -47
- tooluniverse/data/paleobiology_tools.json +91 -0
- tooluniverse/data/pride_tools.json +62 -0
- tooluniverse/data/pypi_package_inspector_tools.json +158 -0
- tooluniverse/data/python_executor_tools.json +341 -0
- tooluniverse/data/regulomedb_tools.json +50 -0
- tooluniverse/data/remap_tools.json +89 -0
- tooluniverse/data/screen_tools.json +89 -0
- tooluniverse/data/tool_discovery_agents.json +428 -0
- tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
- tooluniverse/data/uniprot_tools.json +77 -0
- tooluniverse/data/web_search_tools.json +250 -0
- tooluniverse/data/worms_tools.json +55 -0
- tooluniverse/dataset_tool.py +2 -2
- tooluniverse/dbsnp_tool.py +196 -58
- tooluniverse/default_config.py +36 -3
- tooluniverse/emdb_tool.py +30 -0
- tooluniverse/enrichr_tool.py +14 -14
- tooluniverse/ensembl_tool.py +140 -47
- tooluniverse/execute_function.py +594 -29
- tooluniverse/extended_hooks.py +4 -4
- tooluniverse/file_download_tool.py +269 -0
- tooluniverse/gene_ontology_tool.py +1 -1
- tooluniverse/generate_tools.py +3 -3
- tooluniverse/geo_tool.py +81 -28
- tooluniverse/gnomad_tool.py +100 -52
- tooluniverse/gtopdb_tool.py +41 -0
- tooluniverse/humanbase_tool.py +10 -10
- tooluniverse/interpro_tool.py +72 -0
- tooluniverse/jaspar_tool.py +30 -0
- tooluniverse/kegg_tool.py +230 -0
- tooluniverse/logging_config.py +2 -2
- tooluniverse/mcp_client_tool.py +57 -129
- tooluniverse/mcp_integration.py +52 -49
- tooluniverse/mcp_tool_registry.py +147 -528
- tooluniverse/mpd_tool.py +42 -0
- tooluniverse/ncbi_eutils_tool.py +96 -0
- tooluniverse/ols_tool.py +435 -0
- tooluniverse/openalex_tool.py +8 -8
- tooluniverse/openfda_tool.py +2 -2
- tooluniverse/output_hook.py +15 -15
- tooluniverse/package_discovery_tool.py +217 -0
- tooluniverse/package_tool.py +1 -1
- tooluniverse/paleobiology_tool.py +30 -0
- tooluniverse/pmc_tool.py +2 -2
- tooluniverse/pride_tool.py +30 -0
- tooluniverse/pypi_package_inspector_tool.py +593 -0
- tooluniverse/python_executor_tool.py +711 -0
- tooluniverse/regulomedb_tool.py +30 -0
- tooluniverse/remap_tool.py +44 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +1 -1
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +3 -3
- tooluniverse/remote/immune_compass/compass_tool.py +3 -3
- tooluniverse/remote/pinnacle/pinnacle_tool.py +2 -2
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +3 -3
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -3
- tooluniverse/remote_tool.py +4 -4
- tooluniverse/screen_tool.py +44 -0
- tooluniverse/scripts/filter_tool_files.py +2 -2
- tooluniverse/smcp.py +93 -12
- tooluniverse/smcp_server.py +100 -21
- tooluniverse/space/__init__.py +46 -0
- tooluniverse/space/loader.py +133 -0
- tooluniverse/space/validator.py +353 -0
- tooluniverse/tool_finder_embedding.py +5 -3
- tooluniverse/tool_finder_keyword.py +12 -10
- tooluniverse/tool_finder_llm.py +12 -8
- tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
- tooluniverse/tools/BLAST_protein_search.py +63 -0
- tooluniverse/tools/ClinVar_search_variants.py +26 -15
- tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
- tooluniverse/tools/EMDB_get_structure.py +46 -0
- tooluniverse/tools/GtoPdb_get_targets.py +52 -0
- tooluniverse/tools/InterPro_get_domain_details.py +46 -0
- tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
- tooluniverse/tools/InterPro_search_domains.py +52 -0
- tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
- tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
- tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
- tooluniverse/tools/PackageAnalyzer.py +55 -0
- tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
- tooluniverse/tools/PyPIPackageInspector.py +59 -0
- tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
- tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
- tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
- tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
- tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
- tooluniverse/tools/ToolDiscover.py +11 -11
- tooluniverse/tools/UniProt_id_mapping.py +63 -0
- tooluniverse/tools/UniProt_search.py +63 -0
- tooluniverse/tools/UnifiedToolGenerator.py +59 -0
- tooluniverse/tools/WoRMS_search_species.py +49 -0
- tooluniverse/tools/XMLToolOptimizer.py +55 -0
- tooluniverse/tools/__init__.py +119 -29
- tooluniverse/tools/_shared_client.py +3 -3
- tooluniverse/tools/alphafold_get_annotations.py +3 -3
- tooluniverse/tools/alphafold_get_prediction.py +3 -3
- tooluniverse/tools/alphafold_get_summary.py +3 -3
- tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
- tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
- tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
- tooluniverse/tools/clinvar_get_variant_details.py +49 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
- tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
- tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
- tooluniverse/tools/download_binary_file.py +66 -0
- tooluniverse/tools/download_file.py +71 -0
- tooluniverse/tools/download_text_content.py +55 -0
- tooluniverse/tools/dynamic_package_discovery.py +59 -0
- tooluniverse/tools/ensembl_get_sequence.py +52 -0
- tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
- tooluniverse/tools/ensembl_lookup_gene.py +46 -0
- tooluniverse/tools/geo_get_dataset_info.py +46 -0
- tooluniverse/tools/geo_get_sample_info.py +46 -0
- tooluniverse/tools/geo_search_datasets.py +67 -0
- tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
- tooluniverse/tools/kegg_find_genes.py +52 -0
- tooluniverse/tools/kegg_get_gene_info.py +46 -0
- tooluniverse/tools/kegg_get_pathway_info.py +46 -0
- tooluniverse/tools/kegg_list_organisms.py +44 -0
- tooluniverse/tools/kegg_search_pathway.py +46 -0
- tooluniverse/tools/ols_find_similar_terms.py +63 -0
- tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
- tooluniverse/tools/ols_get_term_ancestors.py +67 -0
- tooluniverse/tools/ols_get_term_children.py +67 -0
- tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
- tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
- tooluniverse/tools/ols_search_terms.py +71 -0
- tooluniverse/tools/python_code_executor.py +79 -0
- tooluniverse/tools/python_script_runner.py +79 -0
- tooluniverse/tools/web_api_documentation_search.py +63 -0
- tooluniverse/tools/web_search.py +71 -0
- tooluniverse/uniprot_tool.py +219 -16
- tooluniverse/url_tool.py +19 -1
- tooluniverse/uspto_tool.py +1 -1
- tooluniverse/utils.py +12 -12
- tooluniverse/web_search_tool.py +229 -0
- tooluniverse/worms_tool.py +64 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +8 -3
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +184 -92
- tooluniverse/data/genomics_tools.json +0 -174
- tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
- tooluniverse/tools/ToolImplementationGenerator.py +0 -67
- tooluniverse/tools/ToolOptimizer.py +0 -59
- tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
- tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
- tooluniverse/ucsc_tool.py +0 -60
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
tooluniverse/uniprot_tool.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import time
|
|
1
2
|
import requests
|
|
2
3
|
from typing import Any, Dict
|
|
3
4
|
from .base_tool import BaseTool
|
|
@@ -22,7 +23,7 @@ class UniProtRESTTool(BaseTool):
|
|
|
22
23
|
"""Custom data extraction with support for filtering"""
|
|
23
24
|
|
|
24
25
|
# Handle specific UniProt extraction patterns
|
|
25
|
-
if extract_path == "comments[?(@.commentType=='FUNCTION')].texts[*].value":
|
|
26
|
+
if extract_path == ("comments[?(@.commentType==" "'FUNCTION')].texts[*].value"):
|
|
26
27
|
# Extract function comments
|
|
27
28
|
result = []
|
|
28
29
|
for comment in data.get("comments", []):
|
|
@@ -32,41 +33,40 @@ class UniProtRESTTool(BaseTool):
|
|
|
32
33
|
result.append(text["value"])
|
|
33
34
|
return result
|
|
34
35
|
|
|
35
|
-
elif (
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
elif extract_path == (
|
|
37
|
+
"comments[?(@.commentType=="
|
|
38
|
+
"'SUBCELLULAR LOCATION')].subcellularLocations[*].location.value"
|
|
38
39
|
):
|
|
39
40
|
# Extract subcellular locations
|
|
40
41
|
result = []
|
|
41
42
|
for comment in data.get("comments", []):
|
|
42
43
|
if comment.get("commentType") == "SUBCELLULAR LOCATION":
|
|
43
44
|
for location in comment.get("subcellularLocations", []):
|
|
44
|
-
if "location" in location and "value" in location["location"]:
|
|
45
|
+
if "location" in location and ("value" in location["location"]):
|
|
45
46
|
result.append(location["location"]["value"])
|
|
46
47
|
return result
|
|
47
48
|
|
|
48
49
|
elif extract_path == "features[?(@.type=='VARIANT')]":
|
|
49
|
-
# Extract variant features
|
|
50
|
+
# Extract variant features
|
|
50
51
|
result = []
|
|
51
52
|
for feature in data.get("features", []):
|
|
52
53
|
if feature.get("type") == "Natural variant":
|
|
53
54
|
result.append(feature)
|
|
54
55
|
return result
|
|
55
56
|
|
|
56
|
-
elif (
|
|
57
|
-
|
|
58
|
-
== "features[?(@.type=='MODIFIED RESIDUE' || @.type=='SIGNAL')]"
|
|
57
|
+
elif extract_path == (
|
|
58
|
+
"features[?(@.type=='MODIFIED RESIDUE' || " "@.type=='SIGNAL')]"
|
|
59
59
|
):
|
|
60
|
-
# Extract PTM and signal features
|
|
60
|
+
# Extract PTM and signal features
|
|
61
61
|
result = []
|
|
62
62
|
for feature in data.get("features", []):
|
|
63
63
|
if feature.get("type") in ["Modified residue", "Signal"]:
|
|
64
64
|
result.append(feature)
|
|
65
65
|
return result
|
|
66
66
|
|
|
67
|
-
elif (
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
elif extract_path == (
|
|
68
|
+
"comments[?(@.commentType=="
|
|
69
|
+
"'ALTERNATIVE PRODUCTS')].isoforms[*].isoformIds[*]"
|
|
70
70
|
):
|
|
71
71
|
# Extract isoform IDs
|
|
72
72
|
result = []
|
|
@@ -97,17 +97,220 @@ class UniProtRESTTool(BaseTool):
|
|
|
97
97
|
return {"error": "jsonpath_ng library is required for data extraction"}
|
|
98
98
|
except Exception as e:
|
|
99
99
|
return {
|
|
100
|
-
"error":
|
|
100
|
+
"error": (
|
|
101
|
+
f"Failed to extract UniProt fields using "
|
|
102
|
+
f"JSONPath '{extract_path}': {e}"
|
|
103
|
+
)
|
|
101
104
|
}
|
|
102
105
|
|
|
106
|
+
def _handle_search(self, arguments: Dict[str, Any]) -> Any:
|
|
107
|
+
"""Handle search queries with flexible parameters"""
|
|
108
|
+
query = arguments.get("query", "")
|
|
109
|
+
organism = arguments.get("organism", "")
|
|
110
|
+
limit = min(arguments.get("limit", 25), 500)
|
|
111
|
+
fields = arguments.get("fields")
|
|
112
|
+
|
|
113
|
+
# Build query string
|
|
114
|
+
query_parts = [query]
|
|
115
|
+
if organism:
|
|
116
|
+
# Support common organism names
|
|
117
|
+
organism_map = {
|
|
118
|
+
"human": "9606",
|
|
119
|
+
"mouse": "10090",
|
|
120
|
+
"rat": "10116",
|
|
121
|
+
"yeast": "559292",
|
|
122
|
+
}
|
|
123
|
+
taxon_id = organism_map.get(organism.lower(), organism)
|
|
124
|
+
query_parts.append(f"organism_id:{taxon_id}")
|
|
125
|
+
|
|
126
|
+
full_query = " AND ".join(query_parts)
|
|
127
|
+
|
|
128
|
+
# Build parameters
|
|
129
|
+
params = {"query": full_query, "size": str(limit), "format": "json"}
|
|
130
|
+
|
|
131
|
+
# Add fields parameter if specified
|
|
132
|
+
if fields and isinstance(fields, list):
|
|
133
|
+
params["fields"] = ",".join(fields)
|
|
134
|
+
|
|
135
|
+
url = "https://rest.uniprot.org/uniprotkb/search"
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
resp = requests.get(url, params=params, timeout=self.timeout)
|
|
139
|
+
resp.raise_for_status()
|
|
140
|
+
data = resp.json()
|
|
141
|
+
|
|
142
|
+
# Extract results
|
|
143
|
+
results = data.get("results", [])
|
|
144
|
+
formatted_results = []
|
|
145
|
+
|
|
146
|
+
for entry in results:
|
|
147
|
+
formatted_entry = {
|
|
148
|
+
"accession": entry.get("primaryAccession", ""),
|
|
149
|
+
"id": entry.get("uniProtkbId", ""),
|
|
150
|
+
"protein_name": "",
|
|
151
|
+
"gene_names": [],
|
|
152
|
+
"organism": "",
|
|
153
|
+
"length": 0,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Extract protein name
|
|
157
|
+
protein_desc = entry.get("proteinDescription", {})
|
|
158
|
+
rec_name = protein_desc.get("recommendedName", {})
|
|
159
|
+
if rec_name:
|
|
160
|
+
full_name = rec_name.get("fullName", {})
|
|
161
|
+
if full_name:
|
|
162
|
+
formatted_entry["protein_name"] = full_name.get("value", "")
|
|
163
|
+
|
|
164
|
+
# Extract gene names
|
|
165
|
+
genes = entry.get("genes", [])
|
|
166
|
+
for gene in genes:
|
|
167
|
+
gene_name = gene.get("geneName", {})
|
|
168
|
+
if gene_name:
|
|
169
|
+
formatted_entry["gene_names"].append(gene_name.get("value", ""))
|
|
170
|
+
|
|
171
|
+
# Extract organism
|
|
172
|
+
organism_info = entry.get("organism", {})
|
|
173
|
+
formatted_entry["organism"] = organism_info.get("scientificName", "")
|
|
174
|
+
|
|
175
|
+
# Extract sequence length
|
|
176
|
+
sequence = entry.get("sequence", {})
|
|
177
|
+
formatted_entry["length"] = sequence.get("length", 0)
|
|
178
|
+
|
|
179
|
+
formatted_results.append(formatted_entry)
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
"total_results": data.get("resultsFound", len(results)),
|
|
183
|
+
"returned": len(results),
|
|
184
|
+
"results": formatted_results,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
except requests.exceptions.Timeout:
|
|
188
|
+
return {"error": "Request to UniProt API timed out"}
|
|
189
|
+
except requests.exceptions.RequestException as e:
|
|
190
|
+
return {"error": f"Request to UniProt API failed: {e}"}
|
|
191
|
+
except ValueError as e:
|
|
192
|
+
return {"error": f"Failed to parse JSON response: {e}"}
|
|
193
|
+
|
|
194
|
+
def _handle_id_mapping(self, arguments: Dict[str, Any]) -> Any:
|
|
195
|
+
"""Handle ID mapping requests"""
|
|
196
|
+
|
|
197
|
+
ids = arguments.get("ids", [])
|
|
198
|
+
from_db = arguments.get("from_db", "")
|
|
199
|
+
to_db = arguments.get("to_db", "UniProtKB")
|
|
200
|
+
max_wait_time = arguments.get("max_wait_time", 30)
|
|
201
|
+
|
|
202
|
+
# Normalize IDs to list
|
|
203
|
+
if isinstance(ids, str):
|
|
204
|
+
ids = [ids]
|
|
205
|
+
|
|
206
|
+
# Normalize database names
|
|
207
|
+
db_mapping = {
|
|
208
|
+
"Ensembl": "Ensembl",
|
|
209
|
+
"Gene_Name": "Gene_Name",
|
|
210
|
+
"RefSeq_Protein": "RefSeq_Protein_ID",
|
|
211
|
+
"PDB": "PDB_ID",
|
|
212
|
+
"EMBL": "EMBL_ID",
|
|
213
|
+
"UniProtKB": "UniProtKB_AC-ID",
|
|
214
|
+
}
|
|
215
|
+
from_db_normalized = db_mapping.get(from_db, from_db)
|
|
216
|
+
to_db_normalized = db_mapping.get(to_db, to_db)
|
|
217
|
+
|
|
218
|
+
# Step 1: Submit mapping job
|
|
219
|
+
submit_url = "https://rest.uniprot.org/idmapping/run"
|
|
220
|
+
payload = {"ids": ids, "from": from_db_normalized, "to": to_db_normalized}
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
resp = requests.post(submit_url, json=payload, timeout=self.timeout)
|
|
224
|
+
resp.raise_for_status()
|
|
225
|
+
job_data = resp.json()
|
|
226
|
+
job_id = job_data.get("jobId")
|
|
227
|
+
|
|
228
|
+
if not job_id:
|
|
229
|
+
return {"error": "Failed to get job ID from UniProt ID mapping"}
|
|
230
|
+
|
|
231
|
+
# Step 2: Poll for job completion
|
|
232
|
+
status_url = f"https://rest.uniprot.org/idmapping/status/{job_id}"
|
|
233
|
+
results_url = f"https://rest.uniprot.org/idmapping/results/{job_id}"
|
|
234
|
+
|
|
235
|
+
start_time = time.time()
|
|
236
|
+
while time.time() - start_time < max_wait_time:
|
|
237
|
+
status_resp = requests.get(status_url, timeout=self.timeout)
|
|
238
|
+
status_data = status_resp.json()
|
|
239
|
+
|
|
240
|
+
if status_data.get("status") == "FINISHED":
|
|
241
|
+
# Step 3: Retrieve results
|
|
242
|
+
results_resp = requests.get(results_url, timeout=self.timeout)
|
|
243
|
+
results_data = results_resp.json()
|
|
244
|
+
|
|
245
|
+
# Format results
|
|
246
|
+
formatted_results = []
|
|
247
|
+
failed = []
|
|
248
|
+
|
|
249
|
+
# Extract mappings
|
|
250
|
+
results = results_data.get("results", [])
|
|
251
|
+
for result in results:
|
|
252
|
+
from_value = result.get("from", "")
|
|
253
|
+
to_values = result.get("to", {}).get("results", [])
|
|
254
|
+
|
|
255
|
+
if to_values:
|
|
256
|
+
for to_item in to_values:
|
|
257
|
+
to_info = to_item.get("to", {})
|
|
258
|
+
gene_names = to_info.get("geneNames", [])
|
|
259
|
+
gene_name = ""
|
|
260
|
+
if gene_names:
|
|
261
|
+
gene_name = gene_names[0].get("value", "")
|
|
262
|
+
|
|
263
|
+
formatted_results.append(
|
|
264
|
+
{
|
|
265
|
+
"from": from_value,
|
|
266
|
+
"to": {
|
|
267
|
+
"accession": to_info.get(
|
|
268
|
+
"primaryAccession", ""
|
|
269
|
+
),
|
|
270
|
+
"id": to_info.get("uniProtkbId", ""),
|
|
271
|
+
"gene_name": gene_name,
|
|
272
|
+
},
|
|
273
|
+
}
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
failed.append(from_value)
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
"mapped_count": len(formatted_results),
|
|
280
|
+
"results": formatted_results,
|
|
281
|
+
"failed": list(set(failed)) if failed else [],
|
|
282
|
+
}
|
|
283
|
+
elif status_data.get("status") == "FAILED":
|
|
284
|
+
return {"error": "ID mapping job failed"}
|
|
285
|
+
|
|
286
|
+
time.sleep(1) # Wait 1 second before next poll
|
|
287
|
+
|
|
288
|
+
return {"error": (f"ID mapping timed out after {max_wait_time} seconds")}
|
|
289
|
+
|
|
290
|
+
except requests.exceptions.Timeout:
|
|
291
|
+
return {"error": "Request to UniProt API timed out"}
|
|
292
|
+
except requests.exceptions.RequestException as e:
|
|
293
|
+
return {"error": f"Request to UniProt API failed: {e}"}
|
|
294
|
+
except ValueError as e:
|
|
295
|
+
return {"error": f"Failed to parse JSON response: {e}"}
|
|
296
|
+
|
|
103
297
|
def run(self, arguments: Dict[str, Any]) -> Any:
|
|
104
|
-
#
|
|
298
|
+
# Check if this is a search request
|
|
299
|
+
search_type = self.tool_config.get("fields", {}).get("search_type")
|
|
300
|
+
mapping_type = self.tool_config.get("fields", {}).get("mapping_type")
|
|
301
|
+
|
|
302
|
+
if search_type == "search":
|
|
303
|
+
return self._handle_search(arguments)
|
|
304
|
+
elif mapping_type == "async":
|
|
305
|
+
return self._handle_id_mapping(arguments)
|
|
306
|
+
|
|
307
|
+
# Build URL for standard accession-based queries
|
|
105
308
|
url = self._build_url(arguments)
|
|
106
309
|
try:
|
|
107
310
|
resp = requests.get(url, timeout=self.timeout)
|
|
108
311
|
if resp.status_code != 200:
|
|
109
312
|
return {
|
|
110
|
-
"error": f"UniProt API returned status code: {resp.status_code}",
|
|
313
|
+
"error": (f"UniProt API returned status code: {resp.status_code}"),
|
|
111
314
|
"detail": resp.text,
|
|
112
315
|
}
|
|
113
316
|
data = resp.json()
|
tooluniverse/url_tool.py
CHANGED
|
@@ -94,7 +94,7 @@ class URLToPDFTextTool(BaseTool):
|
|
|
94
94
|
"""
|
|
95
95
|
Ensure Playwright browser binaries are installed.
|
|
96
96
|
|
|
97
|
-
Returns
|
|
97
|
+
Returns
|
|
98
98
|
None on success, or an error string on failure.
|
|
99
99
|
"""
|
|
100
100
|
# Allow user to skip auto-install via env var
|
|
@@ -188,6 +188,24 @@ class URLToPDFTextTool(BaseTool):
|
|
|
188
188
|
|
|
189
189
|
timeout = arguments.get("timeout", 30)
|
|
190
190
|
|
|
191
|
+
# First, check if the URL returns HTML or a downloadable file
|
|
192
|
+
try:
|
|
193
|
+
resp = requests.head(url, timeout=timeout, allow_redirects=True)
|
|
194
|
+
content_type = resp.headers.get("Content-Type", "").lower()
|
|
195
|
+
# If it's not HTML, handle it as a simple text download
|
|
196
|
+
is_html = "text/html" in content_type or "application/xhtml" in content_type
|
|
197
|
+
if not is_html:
|
|
198
|
+
# Download the file directly and return its text content
|
|
199
|
+
resp = requests.get(url, timeout=timeout, allow_redirects=True)
|
|
200
|
+
if resp.status_code != 200:
|
|
201
|
+
return {"error": f"HTTP {resp.status_code}"}
|
|
202
|
+
text = resp.text
|
|
203
|
+
if not text.strip():
|
|
204
|
+
return {"error": "File appears to be empty or binary."}
|
|
205
|
+
return {self.return_key: text.strip()}
|
|
206
|
+
except requests.exceptions.RequestException as e:
|
|
207
|
+
return {"error": f"Failed to check content type: {e}"}
|
|
208
|
+
|
|
191
209
|
# Ensure browsers are installed (auto-install if needed)
|
|
192
210
|
ensure_error = self._ensure_playwright_browsers(
|
|
193
211
|
browsers=("chromium",), with_deps=False
|
tooluniverse/uspto_tool.py
CHANGED
|
@@ -132,7 +132,7 @@ class USPTOOpenDataPortalTool(BaseTool):
|
|
|
132
132
|
Args:
|
|
133
133
|
arguments: A dictionary of arguments for the tool, matching the parameters in the tool definition.
|
|
134
134
|
|
|
135
|
-
Returns
|
|
135
|
+
Returns
|
|
136
136
|
The result of the API call, either as a dictionary (for JSON) or a string (for CSV).
|
|
137
137
|
"""
|
|
138
138
|
endpoint = self.tool_config.get("api_endpoint")
|
tooluniverse/utils.py
CHANGED
|
@@ -113,11 +113,11 @@ def yaml_to_dict(yaml_file_path):
|
|
|
113
113
|
Args:
|
|
114
114
|
yaml_file_path (str): Path to the YAML file.
|
|
115
115
|
|
|
116
|
-
Returns
|
|
116
|
+
Returns
|
|
117
117
|
dict: Dictionary representation of the YAML file content.
|
|
118
118
|
"""
|
|
119
119
|
try:
|
|
120
|
-
with open(yaml_file_path, "r") as file:
|
|
120
|
+
with open(yaml_file_path, "r", encoding="utf-8") as file:
|
|
121
121
|
yaml_dict = yaml.safe_load(file)
|
|
122
122
|
return yaml_dict
|
|
123
123
|
except FileNotFoundError:
|
|
@@ -130,13 +130,13 @@ def read_json_list(file_path):
|
|
|
130
130
|
"""
|
|
131
131
|
Reads a list of JSON objects from a file.
|
|
132
132
|
|
|
133
|
-
Parameters
|
|
133
|
+
Parameters
|
|
134
134
|
file_path (str): The path to the JSON file.
|
|
135
135
|
|
|
136
|
-
Returns
|
|
136
|
+
Returns
|
|
137
137
|
list: A list of dictionaries containing the JSON objects.
|
|
138
138
|
"""
|
|
139
|
-
with open(file_path, "r") as file:
|
|
139
|
+
with open(file_path, "r", encoding="utf-8") as file:
|
|
140
140
|
data = json.load(file)
|
|
141
141
|
return data
|
|
142
142
|
|
|
@@ -355,7 +355,7 @@ def format_error_response(
|
|
|
355
355
|
tool_name (str, optional): Name of the tool that failed
|
|
356
356
|
context (Dict[str, Any], optional): Additional context about the error
|
|
357
357
|
|
|
358
|
-
Returns
|
|
358
|
+
Returns
|
|
359
359
|
Dict[str, Any]: Standardized error response
|
|
360
360
|
"""
|
|
361
361
|
from .exceptions import ToolError
|
|
@@ -391,7 +391,7 @@ def get_parameter_schema(tool_config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
391
391
|
Args:
|
|
392
392
|
tool_config (Dict[str, Any]): Tool configuration dictionary
|
|
393
393
|
|
|
394
|
-
Returns
|
|
394
|
+
Returns
|
|
395
395
|
Dict[str, Any]: Parameter schema dictionary
|
|
396
396
|
"""
|
|
397
397
|
return tool_config.get("parameter", {})
|
|
@@ -404,7 +404,7 @@ def validate_query(query: Dict[str, Any]) -> bool:
|
|
|
404
404
|
Args:
|
|
405
405
|
query (Dict[str, Any]): The query dictionary to validate
|
|
406
406
|
|
|
407
|
-
Returns
|
|
407
|
+
Returns
|
|
408
408
|
bool: True if query is valid, False otherwise
|
|
409
409
|
"""
|
|
410
410
|
if not isinstance(query, dict):
|
|
@@ -427,7 +427,7 @@ def normalize_gene_symbol(gene_symbol: str) -> str:
|
|
|
427
427
|
Args:
|
|
428
428
|
gene_symbol (str): The gene symbol to normalize
|
|
429
429
|
|
|
430
|
-
Returns
|
|
430
|
+
Returns
|
|
431
431
|
str: Normalized gene symbol
|
|
432
432
|
"""
|
|
433
433
|
if not isinstance(gene_symbol, str):
|
|
@@ -454,7 +454,7 @@ def format_api_response(
|
|
|
454
454
|
response_data (Any): The response data to format
|
|
455
455
|
format_type (str): The desired output format ('json', 'pretty', 'minimal')
|
|
456
456
|
|
|
457
|
-
Returns
|
|
457
|
+
Returns
|
|
458
458
|
Union[str, Dict[str, Any]]: Formatted response
|
|
459
459
|
"""
|
|
460
460
|
if format_type == "json":
|
|
@@ -493,7 +493,7 @@ def validate_hook_config(config: Dict[str, Any]) -> bool:
|
|
|
493
493
|
Args:
|
|
494
494
|
config (Dict[str, Any]): Hook configuration to validate
|
|
495
495
|
|
|
496
|
-
Returns
|
|
496
|
+
Returns
|
|
497
497
|
bool: True if configuration is valid, False otherwise
|
|
498
498
|
"""
|
|
499
499
|
try:
|
|
@@ -561,7 +561,7 @@ def validate_hook_conditions(conditions: Dict[str, Any]) -> bool:
|
|
|
561
561
|
Args:
|
|
562
562
|
conditions (Dict[str, Any]): Hook conditions to validate
|
|
563
563
|
|
|
564
|
-
Returns
|
|
564
|
+
Returns
|
|
565
565
|
bool: True if conditions are valid, False otherwise
|
|
566
566
|
"""
|
|
567
567
|
try:
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web search tools for ToolUniverse using DDGS (Dux Distributed Global Search).
|
|
3
|
+
|
|
4
|
+
This module provides web search capabilities using the ddgs library,
|
|
5
|
+
which supports multiple search engines including DuckDuckGo, Google, Bing, etc.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from typing import Dict, Any, List
|
|
10
|
+
from ddgs import DDGS
|
|
11
|
+
from .base_tool import BaseTool
|
|
12
|
+
from .tool_registry import register_tool
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_tool("WebSearchTool")
|
|
16
|
+
class WebSearchTool(BaseTool):
|
|
17
|
+
"""
|
|
18
|
+
Web search tool using DDGS library.
|
|
19
|
+
|
|
20
|
+
This tool performs web searches using the DDGS library which supports
|
|
21
|
+
multiple search engines including Google, Bing, Brave, Yahoo, DuckDuckGo, etc.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, tool_config: Dict[str, Any]):
|
|
25
|
+
super().__init__(tool_config)
|
|
26
|
+
# DDGS instance will be created per request to avoid session issues
|
|
27
|
+
|
|
28
|
+
def _search_with_ddgs(
|
|
29
|
+
self,
|
|
30
|
+
query: str,
|
|
31
|
+
max_results: int = 10,
|
|
32
|
+
backend: str = "auto",
|
|
33
|
+
region: str = "us-en",
|
|
34
|
+
safesearch: str = "moderate",
|
|
35
|
+
) -> List[Dict[str, Any]]:
|
|
36
|
+
"""
|
|
37
|
+
Perform a web search using DDGS library and return formatted results.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
query: Search query string
|
|
41
|
+
max_results: Maximum number of results to return
|
|
42
|
+
backend: Search engine backend (auto, google, bing, brave, etc.)
|
|
43
|
+
region: Search region (e.g., 'us-en', 'cn-zh')
|
|
44
|
+
safesearch: Safe search level ('on', 'moderate', 'off')
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of search results with title, url, and snippet
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
# Create DDGS instance
|
|
51
|
+
ddgs = DDGS()
|
|
52
|
+
|
|
53
|
+
# Perform search using DDGS
|
|
54
|
+
search_results = list(
|
|
55
|
+
ddgs.text(
|
|
56
|
+
query=query,
|
|
57
|
+
max_results=max_results,
|
|
58
|
+
backend=backend,
|
|
59
|
+
region=region,
|
|
60
|
+
safesearch=safesearch,
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Convert DDGS results to our expected format
|
|
65
|
+
results = []
|
|
66
|
+
for i, result in enumerate(search_results):
|
|
67
|
+
results.append(
|
|
68
|
+
{
|
|
69
|
+
"title": result.get("title", ""),
|
|
70
|
+
"url": result.get("href", ""),
|
|
71
|
+
"snippet": result.get("body", ""),
|
|
72
|
+
"rank": i + 1,
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return results
|
|
77
|
+
|
|
78
|
+
except Exception as e:
|
|
79
|
+
return [
|
|
80
|
+
{
|
|
81
|
+
"title": "Search Error",
|
|
82
|
+
"url": "",
|
|
83
|
+
"snippet": f"Failed to perform search: {str(e)}",
|
|
84
|
+
"rank": 0,
|
|
85
|
+
}
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Execute web search using DDGS.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
arguments: Dictionary containing:
|
|
94
|
+
- query: Search query string
|
|
95
|
+
- max_results: Maximum number of results (default: 10)
|
|
96
|
+
- search_type: Type of search (default: 'general')
|
|
97
|
+
- backend: Search engine backend (default: 'auto')
|
|
98
|
+
- region: Search region (default: 'us-en')
|
|
99
|
+
- safesearch: Safe search level (default: 'moderate')
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Dictionary containing search results
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
query = arguments.get("query", "").strip()
|
|
106
|
+
max_results = int(arguments.get("max_results", 10))
|
|
107
|
+
search_type = arguments.get("search_type", "general")
|
|
108
|
+
backend = arguments.get("backend", "auto")
|
|
109
|
+
region = arguments.get("region", "us-en")
|
|
110
|
+
safesearch = arguments.get("safesearch", "moderate")
|
|
111
|
+
|
|
112
|
+
if not query:
|
|
113
|
+
return {
|
|
114
|
+
"status": "error",
|
|
115
|
+
"error": "Query parameter is required",
|
|
116
|
+
"results": [],
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Validate max_results
|
|
120
|
+
max_results = max(1, min(max_results, 50)) # Limit between 1-50
|
|
121
|
+
|
|
122
|
+
# Modify query based on search type
|
|
123
|
+
if search_type == "api_documentation":
|
|
124
|
+
query = f"{query} API documentation python library"
|
|
125
|
+
elif search_type == "python_packages":
|
|
126
|
+
query = f"{query} python package pypi"
|
|
127
|
+
elif search_type == "github":
|
|
128
|
+
query = f"{query} site:github.com"
|
|
129
|
+
|
|
130
|
+
# Perform search using DDGS
|
|
131
|
+
results = self._search_with_ddgs(
|
|
132
|
+
query=query,
|
|
133
|
+
max_results=max_results,
|
|
134
|
+
backend=backend,
|
|
135
|
+
region=region,
|
|
136
|
+
safesearch=safesearch,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Add rate limiting to be respectful
|
|
140
|
+
time.sleep(0.5)
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"status": "success",
|
|
144
|
+
"query": query,
|
|
145
|
+
"search_type": search_type,
|
|
146
|
+
"total_results": len(results),
|
|
147
|
+
"results": results,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
return {"status": "error", "error": str(e), "results": []}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@register_tool("WebAPIDocumentationSearchTool")
|
|
155
|
+
class WebAPIDocumentationSearchTool(WebSearchTool):
|
|
156
|
+
"""
|
|
157
|
+
Specialized web search tool for API documentation and Python libraries.
|
|
158
|
+
|
|
159
|
+
This tool is optimized for finding API documentation, Python packages,
|
|
160
|
+
and technical resources using DDGS with multiple search engines.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
164
|
+
"""
|
|
165
|
+
Execute API documentation focused search.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
arguments: Dictionary containing:
|
|
169
|
+
- query: Search query string
|
|
170
|
+
- max_results: Maximum number of results (default: 10)
|
|
171
|
+
- focus: Focus area ('api_docs', 'python_packages', etc.)
|
|
172
|
+
- backend: Search engine backend (default: 'auto')
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Dictionary containing search results
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
query = arguments.get("query", "").strip()
|
|
179
|
+
focus = arguments.get("focus", "api_docs")
|
|
180
|
+
backend = arguments.get("backend", "auto")
|
|
181
|
+
|
|
182
|
+
if not query:
|
|
183
|
+
return {
|
|
184
|
+
"status": "error",
|
|
185
|
+
"error": "Query parameter is required",
|
|
186
|
+
"results": [],
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
# Modify query based on focus
|
|
190
|
+
if focus == "api_docs":
|
|
191
|
+
enhanced_query = f'"{query}" API documentation official docs'
|
|
192
|
+
elif focus == "python_packages":
|
|
193
|
+
enhanced_query = f'"{query}" python package pypi install pip'
|
|
194
|
+
elif focus == "github_repos":
|
|
195
|
+
enhanced_query = f'"{query}" github repository source code'
|
|
196
|
+
else:
|
|
197
|
+
enhanced_query = f'"{query}" documentation API reference'
|
|
198
|
+
|
|
199
|
+
# Use parent class search with enhanced query
|
|
200
|
+
arguments["query"] = enhanced_query
|
|
201
|
+
arguments["search_type"] = "api_documentation"
|
|
202
|
+
arguments["backend"] = backend
|
|
203
|
+
|
|
204
|
+
result = super().run(arguments)
|
|
205
|
+
|
|
206
|
+
# Add focus-specific metadata
|
|
207
|
+
if result["status"] == "success":
|
|
208
|
+
result["focus"] = focus
|
|
209
|
+
result["enhanced_query"] = enhanced_query
|
|
210
|
+
|
|
211
|
+
# Filter results for better relevance
|
|
212
|
+
if focus == "python_packages":
|
|
213
|
+
result["results"] = [
|
|
214
|
+
r
|
|
215
|
+
for r in result["results"]
|
|
216
|
+
if (
|
|
217
|
+
"pypi.org" in r.get("url", "")
|
|
218
|
+
or "python" in r.get("title", "").lower()
|
|
219
|
+
)
|
|
220
|
+
]
|
|
221
|
+
elif focus == "github_repos":
|
|
222
|
+
result["results"] = [
|
|
223
|
+
r for r in result["results"] if "github.com" in r.get("url", "")
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
except Exception as e:
|
|
229
|
+
return {"status": "error", "error": str(e), "results": []}
|