tooluniverse 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +57 -1
- tooluniverse/blast_tool.py +132 -0
- tooluniverse/boltz_tool.py +2 -2
- tooluniverse/cbioportal_tool.py +42 -0
- tooluniverse/clinvar_tool.py +268 -74
- tooluniverse/compose_scripts/tool_discover.py +1941 -443
- tooluniverse/data/agentic_tools.json +0 -370
- tooluniverse/data/alphafold_tools.json +6 -6
- tooluniverse/data/blast_tools.json +112 -0
- tooluniverse/data/cbioportal_tools.json +87 -0
- tooluniverse/data/clinvar_tools.json +235 -0
- tooluniverse/data/compose_tools.json +0 -89
- tooluniverse/data/dbsnp_tools.json +275 -0
- tooluniverse/data/emdb_tools.json +61 -0
- tooluniverse/data/ensembl_tools.json +259 -0
- tooluniverse/data/file_download_tools.json +275 -0
- tooluniverse/data/geo_tools.json +200 -48
- tooluniverse/data/gnomad_tools.json +109 -0
- tooluniverse/data/gtopdb_tools.json +68 -0
- tooluniverse/data/gwas_tools.json +32 -0
- tooluniverse/data/interpro_tools.json +199 -0
- tooluniverse/data/jaspar_tools.json +70 -0
- tooluniverse/data/kegg_tools.json +356 -0
- tooluniverse/data/mpd_tools.json +87 -0
- tooluniverse/data/ols_tools.json +314 -0
- tooluniverse/data/package_discovery_tools.json +64 -0
- tooluniverse/data/packages/categorized_tools.txt +0 -1
- tooluniverse/data/packages/machine_learning_tools.json +0 -47
- tooluniverse/data/paleobiology_tools.json +91 -0
- tooluniverse/data/pride_tools.json +62 -0
- tooluniverse/data/pypi_package_inspector_tools.json +158 -0
- tooluniverse/data/python_executor_tools.json +341 -0
- tooluniverse/data/regulomedb_tools.json +50 -0
- tooluniverse/data/remap_tools.json +89 -0
- tooluniverse/data/screen_tools.json +89 -0
- tooluniverse/data/tool_discovery_agents.json +428 -0
- tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
- tooluniverse/data/uniprot_tools.json +77 -0
- tooluniverse/data/web_search_tools.json +250 -0
- tooluniverse/data/worms_tools.json +55 -0
- tooluniverse/dbsnp_tool.py +196 -58
- tooluniverse/default_config.py +35 -2
- tooluniverse/emdb_tool.py +30 -0
- tooluniverse/ensembl_tool.py +140 -47
- tooluniverse/execute_function.py +74 -14
- tooluniverse/file_download_tool.py +269 -0
- tooluniverse/geo_tool.py +81 -28
- tooluniverse/gnomad_tool.py +100 -52
- tooluniverse/gtopdb_tool.py +41 -0
- tooluniverse/interpro_tool.py +72 -0
- tooluniverse/jaspar_tool.py +30 -0
- tooluniverse/kegg_tool.py +230 -0
- tooluniverse/mpd_tool.py +42 -0
- tooluniverse/ncbi_eutils_tool.py +96 -0
- tooluniverse/ols_tool.py +435 -0
- tooluniverse/package_discovery_tool.py +217 -0
- tooluniverse/paleobiology_tool.py +30 -0
- tooluniverse/pride_tool.py +30 -0
- tooluniverse/pypi_package_inspector_tool.py +593 -0
- tooluniverse/python_executor_tool.py +711 -0
- tooluniverse/regulomedb_tool.py +30 -0
- tooluniverse/remap_tool.py +44 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +1 -1
- tooluniverse/screen_tool.py +44 -0
- tooluniverse/smcp_server.py +3 -3
- tooluniverse/tool_finder_embedding.py +3 -1
- tooluniverse/tool_finder_keyword.py +3 -1
- tooluniverse/tool_finder_llm.py +6 -2
- tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
- tooluniverse/tools/BLAST_protein_search.py +63 -0
- tooluniverse/tools/ClinVar_search_variants.py +26 -15
- tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
- tooluniverse/tools/EMDB_get_structure.py +46 -0
- tooluniverse/tools/GtoPdb_get_targets.py +52 -0
- tooluniverse/tools/InterPro_get_domain_details.py +46 -0
- tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
- tooluniverse/tools/InterPro_search_domains.py +52 -0
- tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
- tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
- tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
- tooluniverse/tools/PackageAnalyzer.py +55 -0
- tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
- tooluniverse/tools/PyPIPackageInspector.py +59 -0
- tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
- tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
- tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
- tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
- tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
- tooluniverse/tools/ToolDiscover.py +11 -11
- tooluniverse/tools/UniProt_id_mapping.py +63 -0
- tooluniverse/tools/UniProt_search.py +63 -0
- tooluniverse/tools/UnifiedToolGenerator.py +59 -0
- tooluniverse/tools/WoRMS_search_species.py +49 -0
- tooluniverse/tools/XMLToolOptimizer.py +55 -0
- tooluniverse/tools/__init__.py +119 -29
- tooluniverse/tools/alphafold_get_annotations.py +3 -3
- tooluniverse/tools/alphafold_get_prediction.py +3 -3
- tooluniverse/tools/alphafold_get_summary.py +3 -3
- tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
- tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
- tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
- tooluniverse/tools/clinvar_get_variant_details.py +49 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
- tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
- tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
- tooluniverse/tools/download_binary_file.py +66 -0
- tooluniverse/tools/download_file.py +71 -0
- tooluniverse/tools/download_text_content.py +55 -0
- tooluniverse/tools/dynamic_package_discovery.py +59 -0
- tooluniverse/tools/ensembl_get_sequence.py +52 -0
- tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
- tooluniverse/tools/ensembl_lookup_gene.py +46 -0
- tooluniverse/tools/geo_get_dataset_info.py +46 -0
- tooluniverse/tools/geo_get_sample_info.py +46 -0
- tooluniverse/tools/geo_search_datasets.py +67 -0
- tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
- tooluniverse/tools/kegg_find_genes.py +52 -0
- tooluniverse/tools/kegg_get_gene_info.py +46 -0
- tooluniverse/tools/kegg_get_pathway_info.py +46 -0
- tooluniverse/tools/kegg_list_organisms.py +44 -0
- tooluniverse/tools/kegg_search_pathway.py +46 -0
- tooluniverse/tools/ols_find_similar_terms.py +63 -0
- tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
- tooluniverse/tools/ols_get_term_ancestors.py +67 -0
- tooluniverse/tools/ols_get_term_children.py +67 -0
- tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
- tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
- tooluniverse/tools/ols_search_terms.py +71 -0
- tooluniverse/tools/python_code_executor.py +79 -0
- tooluniverse/tools/python_script_runner.py +79 -0
- tooluniverse/tools/web_api_documentation_search.py +63 -0
- tooluniverse/tools/web_search.py +71 -0
- tooluniverse/uniprot_tool.py +219 -16
- tooluniverse/url_tool.py +18 -0
- tooluniverse/utils.py +2 -2
- tooluniverse/web_search_tool.py +229 -0
- tooluniverse/worms_tool.py +64 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +3 -2
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +143 -54
- tooluniverse/data/genomics_tools.json +0 -174
- tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
- tooluniverse/tools/ToolImplementationGenerator.py +0 -67
- tooluniverse/tools/ToolOptimizer.py +0 -59
- tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
- tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
- tooluniverse/ucsc_tool.py +0 -60
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from .base_tool import BaseTool
|
|
4
|
+
from .tool_registry import register_tool
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@register_tool("JASPARRESTTool")
|
|
8
|
+
class JASPARRESTTool(BaseTool):
|
|
9
|
+
def __init__(self, tool_config: Dict):
|
|
10
|
+
super().__init__(tool_config)
|
|
11
|
+
self.base_url = "https://jaspar.elixir.no/api/v1"
|
|
12
|
+
self.session = requests.Session()
|
|
13
|
+
self.session.headers.update({"Accept": "application/json"})
|
|
14
|
+
self.timeout = 30
|
|
15
|
+
|
|
16
|
+
def _build_url(self, args: Dict[str, Any]) -> str:
|
|
17
|
+
url = self.tool_config["fields"]["endpoint"]
|
|
18
|
+
for k, v in args.items():
|
|
19
|
+
url = url.replace(f"{{{k}}}", str(v))
|
|
20
|
+
return url
|
|
21
|
+
|
|
22
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
23
|
+
try:
|
|
24
|
+
url = self._build_url(arguments)
|
|
25
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
26
|
+
response.raise_for_status()
|
|
27
|
+
data = response.json()
|
|
28
|
+
return {"status": "success", "data": data, "url": url}
|
|
29
|
+
except Exception as e:
|
|
30
|
+
return {"status": "error", "error": f"JASPAR API error: {str(e)}"}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KEGG Database REST API Tool
|
|
3
|
+
|
|
4
|
+
This tool provides access to the KEGG (Kyoto Encyclopedia of Genes and Genomes)
|
|
5
|
+
database for pathway analysis, gene information, and organism data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from typing import Dict, Any, Optional
|
|
10
|
+
from .base_tool import BaseTool
|
|
11
|
+
from .tool_registry import register_tool
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class KEGGRESTTool(BaseTool):
|
|
15
|
+
"""Base class for KEGG REST API tools."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, tool_config):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = "https://rest.kegg.jp"
|
|
20
|
+
self.session = requests.Session()
|
|
21
|
+
self.session.headers.update(
|
|
22
|
+
{"Accept": "text/plain, application/json", "User-Agent": "ToolUniverse/1.0"}
|
|
23
|
+
)
|
|
24
|
+
self.timeout = 30
|
|
25
|
+
|
|
26
|
+
def _make_request(
|
|
27
|
+
self, endpoint: str, params: Optional[Dict] = None
|
|
28
|
+
) -> Dict[str, Any]:
|
|
29
|
+
"""Make a request to the KEGG API."""
|
|
30
|
+
url = f"{self.base_url}{endpoint}"
|
|
31
|
+
try:
|
|
32
|
+
response = self.session.get(url, params=params, timeout=self.timeout)
|
|
33
|
+
response.raise_for_status()
|
|
34
|
+
|
|
35
|
+
# KEGG API returns text/plain by default, parse as text
|
|
36
|
+
content = response.text.strip()
|
|
37
|
+
|
|
38
|
+
# Try to parse as structured data if possible
|
|
39
|
+
if content.startswith("{") or content.startswith("["):
|
|
40
|
+
try:
|
|
41
|
+
return {"status": "success", "data": response.json()}
|
|
42
|
+
except Exception:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
# Return as text data
|
|
46
|
+
return {
|
|
47
|
+
"status": "success",
|
|
48
|
+
"data": content,
|
|
49
|
+
"url": url,
|
|
50
|
+
"content_type": response.headers.get("content-type", "text/plain"),
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
except requests.exceptions.RequestException as e:
|
|
54
|
+
return {
|
|
55
|
+
"status": "error",
|
|
56
|
+
"error": f"KEGG API request failed: {str(e)}",
|
|
57
|
+
"url": url,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
61
|
+
"""Execute the tool with given arguments."""
|
|
62
|
+
return self._make_request(self.endpoint, arguments)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@register_tool("KEGGSearchPathway")
|
|
66
|
+
class KEGGSearchPathway(KEGGRESTTool):
|
|
67
|
+
"""Search KEGG pathways by keyword."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, tool_config):
|
|
70
|
+
super().__init__(tool_config)
|
|
71
|
+
self.endpoint = "/find/pathway"
|
|
72
|
+
|
|
73
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
74
|
+
"""Search pathways with keyword."""
|
|
75
|
+
keyword = arguments.get("keyword", "")
|
|
76
|
+
if not keyword:
|
|
77
|
+
return {"status": "error", "error": "keyword is required"}
|
|
78
|
+
|
|
79
|
+
# KEGG API requires the search term in the URL path
|
|
80
|
+
endpoint = f"{self.endpoint}/{keyword}"
|
|
81
|
+
result = self._make_request(endpoint)
|
|
82
|
+
|
|
83
|
+
# Parse pathway results
|
|
84
|
+
if result.get("status") == "success" and isinstance(result.get("data"), str):
|
|
85
|
+
lines = result["data"].split("\n")
|
|
86
|
+
pathways = []
|
|
87
|
+
for line in lines:
|
|
88
|
+
if "\t" in line:
|
|
89
|
+
parts = line.split("\t", 1)
|
|
90
|
+
if len(parts) == 2:
|
|
91
|
+
pathways.append(
|
|
92
|
+
{"pathway_id": parts[0], "description": parts[1]}
|
|
93
|
+
)
|
|
94
|
+
result["data"] = pathways
|
|
95
|
+
result["count"] = len(pathways)
|
|
96
|
+
|
|
97
|
+
return result
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@register_tool("KEGGGetPathwayInfo")
|
|
101
|
+
class KEGGGetPathwayInfo(KEGGRESTTool):
|
|
102
|
+
"""Get detailed pathway information by pathway ID."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, tool_config):
|
|
105
|
+
super().__init__(tool_config)
|
|
106
|
+
self.endpoint = "/get"
|
|
107
|
+
|
|
108
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
109
|
+
"""Get pathway information."""
|
|
110
|
+
pathway_id = arguments.get("pathway_id", "")
|
|
111
|
+
if not pathway_id:
|
|
112
|
+
return {"status": "error", "error": "pathway_id is required"}
|
|
113
|
+
|
|
114
|
+
# Add pathway prefix if not present
|
|
115
|
+
if not pathway_id.startswith("path:"):
|
|
116
|
+
pathway_id = f"path:{pathway_id}"
|
|
117
|
+
|
|
118
|
+
# KEGG API requires the ID in the URL path
|
|
119
|
+
endpoint = f"{self.endpoint}/{pathway_id}"
|
|
120
|
+
result = self._make_request(endpoint)
|
|
121
|
+
|
|
122
|
+
# Parse pathway data
|
|
123
|
+
if result.get("status") == "success" and isinstance(result.get("data"), str):
|
|
124
|
+
lines = result["data"].split("\n")
|
|
125
|
+
pathway_info = {
|
|
126
|
+
"pathway_id": pathway_id,
|
|
127
|
+
"raw_data": result["data"],
|
|
128
|
+
"lines": len(lines),
|
|
129
|
+
}
|
|
130
|
+
result["data"] = pathway_info
|
|
131
|
+
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@register_tool("KEGGFindGenes")
|
|
136
|
+
class KEGGFindGenes(KEGGRESTTool):
|
|
137
|
+
"""Find genes by keyword in KEGG database."""
|
|
138
|
+
|
|
139
|
+
def __init__(self, tool_config):
|
|
140
|
+
super().__init__(tool_config)
|
|
141
|
+
self.endpoint = "/find/genes"
|
|
142
|
+
|
|
143
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
144
|
+
"""Find genes with keyword."""
|
|
145
|
+
keyword = arguments.get("keyword", "")
|
|
146
|
+
if not keyword:
|
|
147
|
+
return {"status": "error", "error": "keyword is required"}
|
|
148
|
+
|
|
149
|
+
# KEGG API requires the search term in the URL path
|
|
150
|
+
# For gene search, we don't need organism prefix in the URL
|
|
151
|
+
endpoint = f"{self.endpoint}/{keyword}"
|
|
152
|
+
result = self._make_request(endpoint)
|
|
153
|
+
|
|
154
|
+
# Parse gene results
|
|
155
|
+
if result.get("status") == "success" and isinstance(result.get("data"), str):
|
|
156
|
+
lines = result["data"].split("\n")
|
|
157
|
+
genes = []
|
|
158
|
+
for line in lines:
|
|
159
|
+
if "\t" in line:
|
|
160
|
+
parts = line.split("\t", 1)
|
|
161
|
+
if len(parts) == 2:
|
|
162
|
+
genes.append({"gene_id": parts[0], "description": parts[1]})
|
|
163
|
+
result["data"] = genes
|
|
164
|
+
result["count"] = len(genes)
|
|
165
|
+
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@register_tool("KEGGGetGeneInfo")
|
|
170
|
+
class KEGGGetGeneInfo(KEGGRESTTool):
|
|
171
|
+
"""Get detailed gene information by gene ID."""
|
|
172
|
+
|
|
173
|
+
def __init__(self, tool_config):
|
|
174
|
+
super().__init__(tool_config)
|
|
175
|
+
self.endpoint = "/get"
|
|
176
|
+
|
|
177
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
178
|
+
"""Get gene information."""
|
|
179
|
+
gene_id = arguments.get("gene_id", "")
|
|
180
|
+
if not gene_id:
|
|
181
|
+
return {"status": "error", "error": "gene_id is required"}
|
|
182
|
+
|
|
183
|
+
# KEGG API requires the ID in the URL path
|
|
184
|
+
endpoint = f"{self.endpoint}/{gene_id}"
|
|
185
|
+
result = self._make_request(endpoint)
|
|
186
|
+
|
|
187
|
+
# Parse gene data
|
|
188
|
+
if result.get("status") == "success" and isinstance(result.get("data"), str):
|
|
189
|
+
lines = result["data"].split("\n")
|
|
190
|
+
gene_info = {
|
|
191
|
+
"gene_id": gene_id,
|
|
192
|
+
"raw_data": result["data"],
|
|
193
|
+
"lines": len(lines),
|
|
194
|
+
}
|
|
195
|
+
result["data"] = gene_info
|
|
196
|
+
|
|
197
|
+
return result
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@register_tool("KEGGListOrganisms")
|
|
201
|
+
class KEGGListOrganisms(KEGGRESTTool):
|
|
202
|
+
"""List available organisms in KEGG database."""
|
|
203
|
+
|
|
204
|
+
def __init__(self, tool_config):
|
|
205
|
+
super().__init__(tool_config)
|
|
206
|
+
self.endpoint = "/list/organism"
|
|
207
|
+
|
|
208
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
209
|
+
"""List organisms."""
|
|
210
|
+
result = self._make_request(self.endpoint)
|
|
211
|
+
|
|
212
|
+
# Parse organism list
|
|
213
|
+
if result.get("status") == "success" and isinstance(result.get("data"), str):
|
|
214
|
+
lines = result["data"].split("\n")
|
|
215
|
+
organisms = []
|
|
216
|
+
for line in lines:
|
|
217
|
+
if "\t" in line:
|
|
218
|
+
parts = line.split("\t")
|
|
219
|
+
if len(parts) >= 3:
|
|
220
|
+
organisms.append(
|
|
221
|
+
{
|
|
222
|
+
"organism_code": parts[0],
|
|
223
|
+
"organism_name": parts[1],
|
|
224
|
+
"description": parts[2] if len(parts) > 2 else "",
|
|
225
|
+
}
|
|
226
|
+
)
|
|
227
|
+
result["data"] = organisms
|
|
228
|
+
result["count"] = len(organisms)
|
|
229
|
+
|
|
230
|
+
return result
|
tooluniverse/mpd_tool.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from .base_tool import BaseTool
|
|
4
|
+
from .tool_registry import register_tool
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@register_tool("MPDRESTTool")
|
|
8
|
+
class MPDRESTTool(BaseTool):
|
|
9
|
+
def __init__(self, tool_config: Dict):
|
|
10
|
+
super().__init__(tool_config)
|
|
11
|
+
self.session = requests.Session()
|
|
12
|
+
self.session.headers.update({"Accept": "application/json"})
|
|
13
|
+
self.timeout = 30
|
|
14
|
+
|
|
15
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
16
|
+
try:
|
|
17
|
+
# Use ENCODE as alternative data source for biological samples
|
|
18
|
+
strain = arguments.get("strain", "C57BL/6J")
|
|
19
|
+
limit = arguments.get("limit", 5)
|
|
20
|
+
|
|
21
|
+
# Build ENCODE API URL for experiments
|
|
22
|
+
# Query for general experiments as MPD alternative
|
|
23
|
+
url = f"https://www.encodeproject.org/search/?type=Experiment&format=json&limit={limit}"
|
|
24
|
+
|
|
25
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
26
|
+
response.raise_for_status()
|
|
27
|
+
|
|
28
|
+
# Parse JSON response
|
|
29
|
+
data = response.json()
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
"status": "success",
|
|
33
|
+
"data": data,
|
|
34
|
+
"url": url,
|
|
35
|
+
"query_info": {
|
|
36
|
+
"strain": strain,
|
|
37
|
+
"limit": limit,
|
|
38
|
+
"data_source": "ENCODE (MPD alternative)",
|
|
39
|
+
},
|
|
40
|
+
}
|
|
41
|
+
except Exception as e:
|
|
42
|
+
return {"status": "error", "error": f"MPD API error: {str(e)}"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NCBI E-utilities Tool with Rate Limiting
|
|
3
|
+
|
|
4
|
+
This module provides a base class for NCBI E-utilities API tools with
|
|
5
|
+
built-in rate limiting and retry logic to handle 429 errors.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
import requests
|
|
10
|
+
from typing import Dict, Any, Optional
|
|
11
|
+
from .base_tool import BaseTool
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NCBIEUtilsTool(BaseTool):
|
|
15
|
+
"""Base class for NCBI E-utilities tools with rate limiting."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, tool_config):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
|
20
|
+
self.last_request_time = 0
|
|
21
|
+
self.min_interval = 0.34 # ~3 requests/second (NCBI limit without API key)
|
|
22
|
+
self.max_retries = 3
|
|
23
|
+
self.initial_retry_delay = 1
|
|
24
|
+
self.session = requests.Session()
|
|
25
|
+
self.session.headers.update(
|
|
26
|
+
{"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"}
|
|
27
|
+
)
|
|
28
|
+
self.timeout = 30
|
|
29
|
+
|
|
30
|
+
def _make_request(
|
|
31
|
+
self, endpoint: str, params: Optional[Dict] = None
|
|
32
|
+
) -> Dict[str, Any]:
|
|
33
|
+
"""Make request with rate limiting and retry logic."""
|
|
34
|
+
url = f"{self.base_url}{endpoint}"
|
|
35
|
+
|
|
36
|
+
for attempt in range(self.max_retries):
|
|
37
|
+
# Rate limiting
|
|
38
|
+
elapsed = time.time() - self.last_request_time
|
|
39
|
+
if elapsed < self.min_interval:
|
|
40
|
+
time.sleep(self.min_interval - elapsed)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
response = self.session.get(url, params=params, timeout=self.timeout)
|
|
44
|
+
self.last_request_time = time.time()
|
|
45
|
+
response.raise_for_status()
|
|
46
|
+
|
|
47
|
+
# Try to parse JSON response
|
|
48
|
+
try:
|
|
49
|
+
data = response.json()
|
|
50
|
+
except ValueError:
|
|
51
|
+
# If not JSON, return text
|
|
52
|
+
data = response.text
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
"status": "success",
|
|
56
|
+
"data": data,
|
|
57
|
+
"url": url,
|
|
58
|
+
"content_type": response.headers.get(
|
|
59
|
+
"content-type", "application/json"
|
|
60
|
+
),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
except requests.exceptions.HTTPError as e:
|
|
64
|
+
if e.response.status_code == 429 and attempt < self.max_retries - 1:
|
|
65
|
+
# Exponential backoff for rate limiting
|
|
66
|
+
delay = self.initial_retry_delay * (2**attempt)
|
|
67
|
+
print(
|
|
68
|
+
f"Rate limited, retrying in {delay} seconds... (attempt {attempt + 1}/{self.max_retries})"
|
|
69
|
+
)
|
|
70
|
+
time.sleep(delay)
|
|
71
|
+
continue
|
|
72
|
+
else:
|
|
73
|
+
return {
|
|
74
|
+
"status": "error",
|
|
75
|
+
"error": f"NCBI E-utilities API request failed: {str(e)}",
|
|
76
|
+
"url": url,
|
|
77
|
+
"status_code": (
|
|
78
|
+
e.response.status_code if hasattr(e, "response") else None
|
|
79
|
+
),
|
|
80
|
+
}
|
|
81
|
+
except requests.exceptions.RequestException as e:
|
|
82
|
+
return {
|
|
83
|
+
"status": "error",
|
|
84
|
+
"error": f"NCBI E-utilities API request failed: {str(e)}",
|
|
85
|
+
"url": url,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
"status": "error",
|
|
90
|
+
"error": f"NCBI E-utilities API request failed after {self.max_retries} attempts",
|
|
91
|
+
"url": url,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
95
|
+
"""Execute the tool with given arguments."""
|
|
96
|
+
return self._make_request(self.endpoint, arguments)
|