tooluniverse 1.0.9.1__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +57 -1
- tooluniverse/admetai_tool.py +1 -1
- tooluniverse/agentic_tool.py +65 -17
- tooluniverse/base_tool.py +19 -8
- tooluniverse/blast_tool.py +132 -0
- tooluniverse/boltz_tool.py +3 -3
- tooluniverse/cache/result_cache_manager.py +167 -12
- tooluniverse/cbioportal_tool.py +42 -0
- tooluniverse/clinvar_tool.py +268 -74
- tooluniverse/compose_scripts/drug_safety_analyzer.py +1 -1
- tooluniverse/compose_scripts/multi_agent_literature_search.py +1 -1
- tooluniverse/compose_scripts/output_summarizer.py +4 -4
- tooluniverse/compose_scripts/tool_discover.py +1941 -443
- tooluniverse/compose_scripts/tool_graph_composer.py +1 -1
- tooluniverse/compose_scripts/tool_metadata_generator.py +1 -1
- tooluniverse/compose_tool.py +9 -9
- tooluniverse/core_tool.py +2 -2
- tooluniverse/ctg_tool.py +4 -4
- tooluniverse/custom_tool.py +1 -1
- tooluniverse/data/agentic_tools.json +0 -370
- tooluniverse/data/alphafold_tools.json +6 -6
- tooluniverse/data/blast_tools.json +112 -0
- tooluniverse/data/cbioportal_tools.json +87 -0
- tooluniverse/data/clinvar_tools.json +235 -0
- tooluniverse/data/compose_tools.json +0 -89
- tooluniverse/data/dbsnp_tools.json +275 -0
- tooluniverse/data/emdb_tools.json +61 -0
- tooluniverse/data/ensembl_tools.json +259 -0
- tooluniverse/data/file_download_tools.json +275 -0
- tooluniverse/data/geo_tools.json +200 -48
- tooluniverse/data/gnomad_tools.json +109 -0
- tooluniverse/data/gtopdb_tools.json +68 -0
- tooluniverse/data/gwas_tools.json +32 -0
- tooluniverse/data/interpro_tools.json +199 -0
- tooluniverse/data/jaspar_tools.json +70 -0
- tooluniverse/data/kegg_tools.json +356 -0
- tooluniverse/data/mpd_tools.json +87 -0
- tooluniverse/data/ols_tools.json +314 -0
- tooluniverse/data/package_discovery_tools.json +64 -0
- tooluniverse/data/packages/categorized_tools.txt +0 -1
- tooluniverse/data/packages/machine_learning_tools.json +0 -47
- tooluniverse/data/paleobiology_tools.json +91 -0
- tooluniverse/data/pride_tools.json +62 -0
- tooluniverse/data/pypi_package_inspector_tools.json +158 -0
- tooluniverse/data/python_executor_tools.json +341 -0
- tooluniverse/data/regulomedb_tools.json +50 -0
- tooluniverse/data/remap_tools.json +89 -0
- tooluniverse/data/screen_tools.json +89 -0
- tooluniverse/data/tool_discovery_agents.json +428 -0
- tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
- tooluniverse/data/uniprot_tools.json +77 -0
- tooluniverse/data/web_search_tools.json +250 -0
- tooluniverse/data/worms_tools.json +55 -0
- tooluniverse/dataset_tool.py +2 -2
- tooluniverse/dbsnp_tool.py +196 -58
- tooluniverse/default_config.py +36 -3
- tooluniverse/emdb_tool.py +30 -0
- tooluniverse/enrichr_tool.py +14 -14
- tooluniverse/ensembl_tool.py +140 -47
- tooluniverse/execute_function.py +594 -29
- tooluniverse/extended_hooks.py +4 -4
- tooluniverse/file_download_tool.py +269 -0
- tooluniverse/gene_ontology_tool.py +1 -1
- tooluniverse/generate_tools.py +3 -3
- tooluniverse/geo_tool.py +81 -28
- tooluniverse/gnomad_tool.py +100 -52
- tooluniverse/gtopdb_tool.py +41 -0
- tooluniverse/humanbase_tool.py +10 -10
- tooluniverse/interpro_tool.py +72 -0
- tooluniverse/jaspar_tool.py +30 -0
- tooluniverse/kegg_tool.py +230 -0
- tooluniverse/logging_config.py +2 -2
- tooluniverse/mcp_client_tool.py +57 -129
- tooluniverse/mcp_integration.py +52 -49
- tooluniverse/mcp_tool_registry.py +147 -528
- tooluniverse/mpd_tool.py +42 -0
- tooluniverse/ncbi_eutils_tool.py +96 -0
- tooluniverse/ols_tool.py +435 -0
- tooluniverse/openalex_tool.py +8 -8
- tooluniverse/openfda_tool.py +2 -2
- tooluniverse/output_hook.py +15 -15
- tooluniverse/package_discovery_tool.py +217 -0
- tooluniverse/package_tool.py +1 -1
- tooluniverse/paleobiology_tool.py +30 -0
- tooluniverse/pmc_tool.py +2 -2
- tooluniverse/pride_tool.py +30 -0
- tooluniverse/pypi_package_inspector_tool.py +593 -0
- tooluniverse/python_executor_tool.py +711 -0
- tooluniverse/regulomedb_tool.py +30 -0
- tooluniverse/remap_tool.py +44 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +1 -1
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +3 -3
- tooluniverse/remote/immune_compass/compass_tool.py +3 -3
- tooluniverse/remote/pinnacle/pinnacle_tool.py +2 -2
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +3 -3
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -3
- tooluniverse/remote_tool.py +4 -4
- tooluniverse/screen_tool.py +44 -0
- tooluniverse/scripts/filter_tool_files.py +2 -2
- tooluniverse/smcp.py +93 -12
- tooluniverse/smcp_server.py +100 -21
- tooluniverse/space/__init__.py +46 -0
- tooluniverse/space/loader.py +133 -0
- tooluniverse/space/validator.py +353 -0
- tooluniverse/tool_finder_embedding.py +5 -3
- tooluniverse/tool_finder_keyword.py +12 -10
- tooluniverse/tool_finder_llm.py +12 -8
- tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
- tooluniverse/tools/BLAST_protein_search.py +63 -0
- tooluniverse/tools/ClinVar_search_variants.py +26 -15
- tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
- tooluniverse/tools/EMDB_get_structure.py +46 -0
- tooluniverse/tools/GtoPdb_get_targets.py +52 -0
- tooluniverse/tools/InterPro_get_domain_details.py +46 -0
- tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
- tooluniverse/tools/InterPro_search_domains.py +52 -0
- tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
- tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
- tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
- tooluniverse/tools/PackageAnalyzer.py +55 -0
- tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
- tooluniverse/tools/PyPIPackageInspector.py +59 -0
- tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
- tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
- tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
- tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
- tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
- tooluniverse/tools/ToolDiscover.py +11 -11
- tooluniverse/tools/UniProt_id_mapping.py +63 -0
- tooluniverse/tools/UniProt_search.py +63 -0
- tooluniverse/tools/UnifiedToolGenerator.py +59 -0
- tooluniverse/tools/WoRMS_search_species.py +49 -0
- tooluniverse/tools/XMLToolOptimizer.py +55 -0
- tooluniverse/tools/__init__.py +119 -29
- tooluniverse/tools/_shared_client.py +3 -3
- tooluniverse/tools/alphafold_get_annotations.py +3 -3
- tooluniverse/tools/alphafold_get_prediction.py +3 -3
- tooluniverse/tools/alphafold_get_summary.py +3 -3
- tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
- tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
- tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
- tooluniverse/tools/clinvar_get_variant_details.py +49 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
- tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
- tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
- tooluniverse/tools/download_binary_file.py +66 -0
- tooluniverse/tools/download_file.py +71 -0
- tooluniverse/tools/download_text_content.py +55 -0
- tooluniverse/tools/dynamic_package_discovery.py +59 -0
- tooluniverse/tools/ensembl_get_sequence.py +52 -0
- tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
- tooluniverse/tools/ensembl_lookup_gene.py +46 -0
- tooluniverse/tools/geo_get_dataset_info.py +46 -0
- tooluniverse/tools/geo_get_sample_info.py +46 -0
- tooluniverse/tools/geo_search_datasets.py +67 -0
- tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
- tooluniverse/tools/kegg_find_genes.py +52 -0
- tooluniverse/tools/kegg_get_gene_info.py +46 -0
- tooluniverse/tools/kegg_get_pathway_info.py +46 -0
- tooluniverse/tools/kegg_list_organisms.py +44 -0
- tooluniverse/tools/kegg_search_pathway.py +46 -0
- tooluniverse/tools/ols_find_similar_terms.py +63 -0
- tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
- tooluniverse/tools/ols_get_term_ancestors.py +67 -0
- tooluniverse/tools/ols_get_term_children.py +67 -0
- tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
- tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
- tooluniverse/tools/ols_search_terms.py +71 -0
- tooluniverse/tools/python_code_executor.py +79 -0
- tooluniverse/tools/python_script_runner.py +79 -0
- tooluniverse/tools/web_api_documentation_search.py +63 -0
- tooluniverse/tools/web_search.py +71 -0
- tooluniverse/uniprot_tool.py +219 -16
- tooluniverse/url_tool.py +19 -1
- tooluniverse/uspto_tool.py +1 -1
- tooluniverse/utils.py +12 -12
- tooluniverse/web_search_tool.py +229 -0
- tooluniverse/worms_tool.py +64 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +8 -3
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +184 -92
- tooluniverse/data/genomics_tools.json +0 -174
- tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
- tooluniverse/tools/ToolImplementationGenerator.py +0 -67
- tooluniverse/tools/ToolOptimizer.py +0 -59
- tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
- tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
- tooluniverse/ucsc_tool.py +0 -60
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
|
@@ -6,9 +6,11 @@ from __future__ import annotations
|
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
8
|
import os
|
|
9
|
+
import queue
|
|
10
|
+
import threading
|
|
9
11
|
import time
|
|
10
12
|
from dataclasses import dataclass
|
|
11
|
-
from typing import Any, Dict, Iterator, Optional
|
|
13
|
+
from typing import Any, Dict, Iterator, Optional, Sequence
|
|
12
14
|
|
|
13
15
|
from .memory_cache import LRUCache, SingleFlight
|
|
14
16
|
from .sqlite_backend import CacheEntry, PersistentCache
|
|
@@ -36,6 +38,8 @@ class ResultCacheManager:
|
|
|
36
38
|
persistence_enabled: bool = True,
|
|
37
39
|
singleflight: bool = True,
|
|
38
40
|
default_ttl: Optional[int] = None,
|
|
41
|
+
async_persist: Optional[bool] = None,
|
|
42
|
+
async_queue_size: int = 10000,
|
|
39
43
|
):
|
|
40
44
|
self.enabled = enabled
|
|
41
45
|
self.default_ttl = default_ttl
|
|
@@ -55,6 +59,7 @@ class ResultCacheManager:
|
|
|
55
59
|
self.persistent = None
|
|
56
60
|
|
|
57
61
|
self.singleflight = SingleFlight() if singleflight else None
|
|
62
|
+
self._init_async_persistence(async_persist, async_queue_size)
|
|
58
63
|
|
|
59
64
|
# ------------------------------------------------------------------
|
|
60
65
|
# Helper methods
|
|
@@ -69,6 +74,33 @@ class ResultCacheManager:
|
|
|
69
74
|
def _ttl_or_default(self, ttl: Optional[int]) -> Optional[int]:
|
|
70
75
|
return ttl if ttl is not None else self.default_ttl
|
|
71
76
|
|
|
77
|
+
def _init_async_persistence(
|
|
78
|
+
self, async_persist: Optional[bool], async_queue_size: int
|
|
79
|
+
) -> None:
|
|
80
|
+
if async_persist is None:
|
|
81
|
+
async_persist = os.getenv(
|
|
82
|
+
"TOOLUNIVERSE_CACHE_ASYNC_PERSIST", "true"
|
|
83
|
+
).lower() in ("true", "1", "yes")
|
|
84
|
+
|
|
85
|
+
self.async_persist = (
|
|
86
|
+
async_persist and self.persistent is not None and self.enabled
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
self._persist_queue: Optional["queue.Queue[tuple[str, Dict[str, Any]]]"] = None
|
|
90
|
+
self._worker_thread: Optional[threading.Thread] = None
|
|
91
|
+
|
|
92
|
+
if not self.async_persist:
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
queue_size = max(1, async_queue_size)
|
|
96
|
+
self._persist_queue = queue.Queue(maxsize=queue_size)
|
|
97
|
+
self._worker_thread = threading.Thread(
|
|
98
|
+
target=self._async_worker,
|
|
99
|
+
name="ResultCacheWriter",
|
|
100
|
+
daemon=True,
|
|
101
|
+
)
|
|
102
|
+
self._worker_thread.start()
|
|
103
|
+
|
|
72
104
|
# ------------------------------------------------------------------
|
|
73
105
|
# Public API
|
|
74
106
|
# ------------------------------------------------------------------
|
|
@@ -126,17 +158,15 @@ class ResultCacheManager:
|
|
|
126
158
|
)
|
|
127
159
|
|
|
128
160
|
if self.persistent:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
logger.warning("Persistent cache write failed: %s", exc)
|
|
139
|
-
self.persistent = None
|
|
161
|
+
payload = {
|
|
162
|
+
"composed": composed,
|
|
163
|
+
"value": value,
|
|
164
|
+
"namespace": namespace,
|
|
165
|
+
"version": version,
|
|
166
|
+
"ttl": effective_ttl,
|
|
167
|
+
}
|
|
168
|
+
if not self._schedule_persist("set", payload):
|
|
169
|
+
self._perform_persist_set(**payload)
|
|
140
170
|
|
|
141
171
|
def delete(self, *, namespace: str, version: str, cache_key: str):
|
|
142
172
|
composed = self.compose_key(namespace, version, cache_key)
|
|
@@ -162,10 +192,40 @@ class ResultCacheManager:
|
|
|
162
192
|
|
|
163
193
|
if self.persistent:
|
|
164
194
|
try:
|
|
195
|
+
self.flush()
|
|
165
196
|
self.persistent.clear(namespace=namespace)
|
|
166
197
|
except Exception as exc:
|
|
167
198
|
logger.warning("Persistent cache clear failed: %s", exc)
|
|
168
199
|
|
|
200
|
+
def bulk_get(self, requests: Sequence[Dict[str, str]]) -> Dict[str, Any]:
|
|
201
|
+
"""Fetch multiple cache entries at once.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
requests: Iterable of dicts containing ``namespace``, ``version`` and ``cache_key``.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
Mapping of composed cache keys to cached values.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
if not self.enabled:
|
|
211
|
+
return {}
|
|
212
|
+
|
|
213
|
+
hits: Dict[str, Any] = {}
|
|
214
|
+
for request in requests:
|
|
215
|
+
namespace = request["namespace"]
|
|
216
|
+
version = request["version"]
|
|
217
|
+
cache_key = request["cache_key"]
|
|
218
|
+
value = self.get(
|
|
219
|
+
namespace=namespace,
|
|
220
|
+
version=version,
|
|
221
|
+
cache_key=cache_key,
|
|
222
|
+
)
|
|
223
|
+
if value is not None:
|
|
224
|
+
composed = self.compose_key(namespace, version, cache_key)
|
|
225
|
+
hits[composed] = value
|
|
226
|
+
|
|
227
|
+
return hits
|
|
228
|
+
|
|
169
229
|
def stats(self) -> Dict[str, Any]:
|
|
170
230
|
return {
|
|
171
231
|
"enabled": self.enabled,
|
|
@@ -173,11 +233,18 @@ class ResultCacheManager:
|
|
|
173
233
|
"persistent": (
|
|
174
234
|
self.persistent.stats() if self.persistent else {"enabled": False}
|
|
175
235
|
),
|
|
236
|
+
"async_persist": self.async_persist,
|
|
237
|
+
"pending_writes": (
|
|
238
|
+
self._persist_queue.qsize()
|
|
239
|
+
if self.async_persist and self._persist_queue is not None
|
|
240
|
+
else 0
|
|
241
|
+
),
|
|
176
242
|
}
|
|
177
243
|
|
|
178
244
|
def dump(self, namespace: Optional[str] = None) -> Iterator[Dict[str, Any]]:
|
|
179
245
|
if not self.persistent:
|
|
180
246
|
return iter([])
|
|
247
|
+
self.flush()
|
|
181
248
|
return (
|
|
182
249
|
{
|
|
183
250
|
"cache_key": entry.key,
|
|
@@ -220,12 +287,100 @@ class ResultCacheManager:
|
|
|
220
287
|
return _DummyContext()
|
|
221
288
|
|
|
222
289
|
def close(self):
|
|
290
|
+
self.flush()
|
|
291
|
+
self._shutdown_async_worker()
|
|
223
292
|
if self.persistent:
|
|
224
293
|
try:
|
|
225
294
|
self.persistent.close()
|
|
226
295
|
except Exception as exc:
|
|
227
296
|
logger.warning("Persistent cache close failed: %s", exc)
|
|
228
297
|
|
|
298
|
+
# ------------------------------------------------------------------
|
|
299
|
+
# Async persistence helpers
|
|
300
|
+
# ------------------------------------------------------------------
|
|
301
|
+
|
|
302
|
+
def flush(self):
|
|
303
|
+
if self.async_persist and self._persist_queue is not None:
|
|
304
|
+
self._persist_queue.join()
|
|
305
|
+
|
|
306
|
+
def _schedule_persist(self, op: str, payload: Dict[str, Any]) -> bool:
|
|
307
|
+
if not self.async_persist or self._persist_queue is None:
|
|
308
|
+
return False
|
|
309
|
+
try:
|
|
310
|
+
self._persist_queue.put_nowait((op, payload))
|
|
311
|
+
return True
|
|
312
|
+
except queue.Full:
|
|
313
|
+
logger.warning(
|
|
314
|
+
"Async cache queue full; falling back to synchronous persistence"
|
|
315
|
+
)
|
|
316
|
+
return False
|
|
317
|
+
|
|
318
|
+
def _async_worker(self):
|
|
319
|
+
queue_ref = self._persist_queue
|
|
320
|
+
if queue_ref is None:
|
|
321
|
+
return
|
|
322
|
+
|
|
323
|
+
while True:
|
|
324
|
+
try:
|
|
325
|
+
op, payload = queue_ref.get()
|
|
326
|
+
except Exception:
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
if op == "__STOP__":
|
|
330
|
+
queue_ref.task_done()
|
|
331
|
+
break
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
if op == "set":
|
|
335
|
+
self._perform_persist_set(**payload)
|
|
336
|
+
else:
|
|
337
|
+
logger.warning("Unknown async cache operation: %s", op)
|
|
338
|
+
except Exception as exc:
|
|
339
|
+
logger.warning("Async cache write failed: %s", exc)
|
|
340
|
+
# Disable async persistence to avoid repeated failures
|
|
341
|
+
self.async_persist = False
|
|
342
|
+
finally:
|
|
343
|
+
queue_ref.task_done()
|
|
344
|
+
|
|
345
|
+
def _perform_persist_set(
|
|
346
|
+
self,
|
|
347
|
+
*,
|
|
348
|
+
composed: str,
|
|
349
|
+
value: Any,
|
|
350
|
+
namespace: str,
|
|
351
|
+
version: str,
|
|
352
|
+
ttl: Optional[int],
|
|
353
|
+
):
|
|
354
|
+
if not self.persistent:
|
|
355
|
+
return
|
|
356
|
+
try:
|
|
357
|
+
self.persistent.set(
|
|
358
|
+
composed,
|
|
359
|
+
value,
|
|
360
|
+
namespace=namespace,
|
|
361
|
+
version=version,
|
|
362
|
+
ttl=ttl,
|
|
363
|
+
)
|
|
364
|
+
except Exception as exc:
|
|
365
|
+
logger.warning("Persistent cache write failed: %s", exc)
|
|
366
|
+
self.persistent = None
|
|
367
|
+
raise
|
|
368
|
+
|
|
369
|
+
def _shutdown_async_worker(self) -> None:
|
|
370
|
+
if not self.async_persist or self._persist_queue is None:
|
|
371
|
+
return
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
self._persist_queue.put_nowait(("__STOP__", {}))
|
|
375
|
+
except queue.Full:
|
|
376
|
+
self._persist_queue.put(("__STOP__", {}))
|
|
377
|
+
|
|
378
|
+
if self._worker_thread is not None:
|
|
379
|
+
self._worker_thread.join(timeout=5)
|
|
380
|
+
|
|
381
|
+
self._worker_thread = None
|
|
382
|
+
self._persist_queue = None
|
|
383
|
+
|
|
229
384
|
|
|
230
385
|
class _DummyContext:
|
|
231
386
|
def __enter__(self):
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
from .base_tool import BaseTool
|
|
4
|
+
from .tool_registry import register_tool
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@register_tool("CBioPortalRESTTool")
|
|
8
|
+
class CBioPortalRESTTool(BaseTool):
|
|
9
|
+
def __init__(self, tool_config: Dict):
|
|
10
|
+
super().__init__(tool_config)
|
|
11
|
+
self.base_url = "https://www.cbioportal.org/api"
|
|
12
|
+
self.session = requests.Session()
|
|
13
|
+
self.session.headers.update(
|
|
14
|
+
{"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"}
|
|
15
|
+
)
|
|
16
|
+
self.timeout = 30
|
|
17
|
+
|
|
18
|
+
def _build_url(self, args: Dict[str, Any]) -> str:
|
|
19
|
+
url = self.tool_config["fields"]["endpoint"]
|
|
20
|
+
for k, v in args.items():
|
|
21
|
+
url = url.replace(f"{{{k}}}", str(v))
|
|
22
|
+
return url
|
|
23
|
+
|
|
24
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
25
|
+
try:
|
|
26
|
+
url = self._build_url(arguments)
|
|
27
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
28
|
+
response.raise_for_status()
|
|
29
|
+
data = response.json()
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
"status": "success",
|
|
33
|
+
"data": data,
|
|
34
|
+
"url": url,
|
|
35
|
+
"count": len(data) if isinstance(data, list) else 1,
|
|
36
|
+
}
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return {
|
|
39
|
+
"status": "error",
|
|
40
|
+
"error": f"cBioPortal API error: {str(e)}",
|
|
41
|
+
"url": url,
|
|
42
|
+
}
|
tooluniverse/clinvar_tool.py
CHANGED
|
@@ -1,90 +1,284 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ClinVar REST API Tool
|
|
3
|
+
|
|
4
|
+
This tool provides access to the ClinVar database for clinical variant information,
|
|
5
|
+
disease associations, and clinical significance data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
import requests
|
|
9
|
+
import time
|
|
10
|
+
from typing import Dict, Any, Optional
|
|
2
11
|
from .base_tool import BaseTool
|
|
3
12
|
from .tool_registry import register_tool
|
|
4
13
|
|
|
5
14
|
|
|
6
|
-
|
|
7
|
-
class
|
|
8
|
-
"""
|
|
9
|
-
Local tool wrapper for ClinVar via NCBI E-utilities.
|
|
10
|
-
Uses esearch + esummary to fetch variant records.
|
|
11
|
-
"""
|
|
15
|
+
class ClinVarRESTTool(BaseTool):
|
|
16
|
+
"""Base class for ClinVar REST API tools."""
|
|
12
17
|
|
|
13
18
|
def __init__(self, tool_config):
|
|
14
19
|
super().__init__(tool_config)
|
|
15
|
-
self.
|
|
20
|
+
self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
|
16
21
|
self.session = requests.Session()
|
|
22
|
+
self.session.headers.update(
|
|
23
|
+
{"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"}
|
|
24
|
+
)
|
|
25
|
+
self.timeout = 30
|
|
17
26
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
27
|
+
def _make_request(
|
|
28
|
+
self, endpoint: str, params: Optional[Dict] = None, max_retries: int = 3
|
|
29
|
+
) -> Dict[str, Any]:
|
|
30
|
+
"""Make a request to the ClinVar API with automatic retry for rate limiting."""
|
|
31
|
+
url = f"{self.base_url}{endpoint}"
|
|
23
32
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
33
|
+
for attempt in range(max_retries + 1):
|
|
34
|
+
try:
|
|
35
|
+
response = self.session.get(url, params=params, timeout=self.timeout)
|
|
36
|
+
|
|
37
|
+
# Handle rate limiting (429 error)
|
|
38
|
+
if response.status_code == 429:
|
|
39
|
+
retry_after = response.headers.get("Retry-After")
|
|
40
|
+
if retry_after:
|
|
41
|
+
wait_time = int(retry_after)
|
|
42
|
+
else:
|
|
43
|
+
# Default exponential backoff: 1, 2, 4 seconds
|
|
44
|
+
wait_time = 2**attempt
|
|
45
|
+
|
|
46
|
+
if attempt < max_retries:
|
|
47
|
+
print(
|
|
48
|
+
f"Rate limited (429). Waiting {wait_time} seconds before retry {attempt + 1}/{max_retries}..."
|
|
49
|
+
)
|
|
50
|
+
time.sleep(wait_time)
|
|
51
|
+
continue
|
|
52
|
+
else:
|
|
53
|
+
return {
|
|
54
|
+
"status": "error",
|
|
55
|
+
"error": f"Rate limited after {max_retries} retries. Please wait before making more requests.",
|
|
56
|
+
"url": url,
|
|
57
|
+
"retry_after": retry_after,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
response.raise_for_status()
|
|
61
|
+
|
|
62
|
+
# ClinVar API returns XML by default, but we can request JSON
|
|
63
|
+
if params and params.get("retmode") == "json":
|
|
64
|
+
data = response.json()
|
|
65
|
+
else:
|
|
66
|
+
# Parse XML response
|
|
67
|
+
data = response.text
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
"status": "success",
|
|
71
|
+
"data": data,
|
|
72
|
+
"url": url,
|
|
73
|
+
"content_type": response.headers.get(
|
|
74
|
+
"content-type", "application/xml"
|
|
75
|
+
),
|
|
76
|
+
"rate_limit_info": {
|
|
77
|
+
"limit": response.headers.get("X-RateLimit-Limit"),
|
|
78
|
+
"remaining": response.headers.get("X-RateLimit-Remaining"),
|
|
79
|
+
},
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
except requests.exceptions.RequestException as e:
|
|
83
|
+
if attempt < max_retries:
|
|
84
|
+
wait_time = 2**attempt
|
|
85
|
+
print(
|
|
86
|
+
f"Request failed: {str(e)}. Retrying in {wait_time} seconds..."
|
|
87
|
+
)
|
|
88
|
+
time.sleep(wait_time)
|
|
89
|
+
continue
|
|
90
|
+
else:
|
|
91
|
+
return {
|
|
92
|
+
"status": "error",
|
|
93
|
+
"error": f"ClinVar API request failed after {max_retries} retries: {str(e)}",
|
|
94
|
+
"url": url,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return {"status": "error", "error": "Maximum retries exceeded", "url": url}
|
|
98
|
+
|
|
99
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
100
|
+
"""Execute the tool with given arguments."""
|
|
101
|
+
return self._make_request(self.endpoint, arguments)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@register_tool("ClinVarSearchVariants")
|
|
105
|
+
class ClinVarSearchVariants(ClinVarRESTTool):
|
|
106
|
+
"""Search for variants in ClinVar by gene or condition."""
|
|
107
|
+
|
|
108
|
+
def __init__(self, tool_config):
|
|
109
|
+
super().__init__(tool_config)
|
|
110
|
+
self.endpoint = "/esearch.fcgi"
|
|
111
|
+
|
|
112
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
113
|
+
"""Search variants by gene or condition."""
|
|
114
|
+
params = {
|
|
42
115
|
"db": "clinvar",
|
|
43
|
-
"id": ",".join(uids),
|
|
44
116
|
"retmode": "json",
|
|
117
|
+
"retmax": arguments.get("max_results", 20),
|
|
45
118
|
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
"
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
"
|
|
85
|
-
"
|
|
86
|
-
"
|
|
119
|
+
|
|
120
|
+
# Build search query
|
|
121
|
+
query_parts = []
|
|
122
|
+
|
|
123
|
+
if "gene" in arguments:
|
|
124
|
+
query_parts.append(f"{arguments['gene']}[gene]")
|
|
125
|
+
|
|
126
|
+
if "condition" in arguments:
|
|
127
|
+
query_parts.append(f"{arguments['condition']}[condition]")
|
|
128
|
+
|
|
129
|
+
if "variant_id" in arguments:
|
|
130
|
+
query_parts.append(f"{arguments['variant_id']}[variant_id]")
|
|
131
|
+
|
|
132
|
+
if not query_parts:
|
|
133
|
+
return {
|
|
134
|
+
"status": "error",
|
|
135
|
+
"error": "At least one search parameter is required",
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
params["term"] = " AND ".join(query_parts)
|
|
139
|
+
|
|
140
|
+
result = self._make_request(self.endpoint, params)
|
|
141
|
+
|
|
142
|
+
# Add search parameters to result and format data
|
|
143
|
+
if result.get("status") == "success":
|
|
144
|
+
result["search_params"] = {
|
|
145
|
+
"gene": arguments.get("gene"),
|
|
146
|
+
"condition": arguments.get("condition"),
|
|
147
|
+
"variant_id": arguments.get("variant_id"),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Format search results for better usability
|
|
151
|
+
data = result.get("data", {})
|
|
152
|
+
if "esearchresult" in data:
|
|
153
|
+
esearch = data["esearchresult"]
|
|
154
|
+
formatted_results = {
|
|
155
|
+
"total_count": int(esearch.get("count", 0)),
|
|
156
|
+
"variant_ids": esearch.get("idlist", []),
|
|
157
|
+
"query_translation": esearch.get("querytranslation", ""),
|
|
158
|
+
"search_params": result["search_params"],
|
|
159
|
+
"summary": f"Found {esearch.get('count', 0)} variants matching the search criteria",
|
|
87
160
|
}
|
|
88
|
-
|
|
161
|
+
result["formatted_results"] = formatted_results
|
|
162
|
+
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@register_tool("ClinVarGetVariantDetails")
|
|
167
|
+
class ClinVarGetVariantDetails(ClinVarRESTTool):
|
|
168
|
+
"""Get detailed variant information by ClinVar ID."""
|
|
169
|
+
|
|
170
|
+
def __init__(self, tool_config):
|
|
171
|
+
super().__init__(tool_config)
|
|
172
|
+
self.endpoint = "/esummary.fcgi"
|
|
173
|
+
|
|
174
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
175
|
+
"""Get variant details by ClinVar ID."""
|
|
176
|
+
variant_id = arguments.get("variant_id", "")
|
|
177
|
+
if not variant_id:
|
|
178
|
+
return {"status": "error", "error": "variant_id is required"}
|
|
179
|
+
|
|
180
|
+
params = {"db": "clinvar", "id": variant_id, "retmode": "json"}
|
|
181
|
+
|
|
182
|
+
result = self._make_request(self.endpoint, params)
|
|
183
|
+
|
|
184
|
+
# Add variant_id to result and format data
|
|
185
|
+
if result.get("status") == "success":
|
|
186
|
+
result["variant_id"] = variant_id
|
|
187
|
+
|
|
188
|
+
# Format the data for better usability
|
|
189
|
+
data = result.get("data", {})
|
|
190
|
+
if "result" in data and variant_id in data["result"]:
|
|
191
|
+
variant_data = data["result"][variant_id]
|
|
192
|
+
|
|
193
|
+
# Extract key information
|
|
194
|
+
formatted_data = {
|
|
195
|
+
"variant_id": variant_id,
|
|
196
|
+
"accession": variant_data.get("accession", ""),
|
|
197
|
+
"title": variant_data.get("title", ""),
|
|
198
|
+
"obj_type": variant_data.get("obj_type", ""),
|
|
199
|
+
"genes": [
|
|
200
|
+
gene.get("symbol", "") for gene in variant_data.get("genes", [])
|
|
201
|
+
],
|
|
202
|
+
"clinical_significance": variant_data.get(
|
|
203
|
+
"germline_classification", {}
|
|
204
|
+
).get("description", ""),
|
|
205
|
+
"review_status": variant_data.get(
|
|
206
|
+
"germline_classification", {}
|
|
207
|
+
).get("review_status", ""),
|
|
208
|
+
"chromosome": variant_data.get("chr_sort", ""),
|
|
209
|
+
"location": variant_data.get("variation_set", [{}])[0]
|
|
210
|
+
.get("variation_loc", [{}])[0]
|
|
211
|
+
.get("band", ""),
|
|
212
|
+
"variation_name": variant_data.get("variation_set", [{}])[0].get(
|
|
213
|
+
"variation_name", ""
|
|
214
|
+
),
|
|
215
|
+
"raw_data": variant_data, # Keep original data for advanced users
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
result["formatted_data"] = formatted_data
|
|
219
|
+
|
|
220
|
+
return result
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@register_tool("ClinVarGetClinicalSignificance")
|
|
224
|
+
class ClinVarGetClinicalSignificance(ClinVarRESTTool):
|
|
225
|
+
"""Get clinical significance information for variants."""
|
|
226
|
+
|
|
227
|
+
def __init__(self, tool_config):
|
|
228
|
+
super().__init__(tool_config)
|
|
229
|
+
self.endpoint = "/esummary.fcgi"
|
|
230
|
+
|
|
231
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
232
|
+
"""Get clinical significance by variant ID."""
|
|
233
|
+
variant_id = arguments.get("variant_id", "")
|
|
234
|
+
if not variant_id:
|
|
235
|
+
return {"status": "error", "error": "variant_id is required"}
|
|
236
|
+
|
|
237
|
+
params = {"db": "clinvar", "id": variant_id, "retmode": "json"}
|
|
238
|
+
|
|
239
|
+
result = self._make_request(self.endpoint, params)
|
|
240
|
+
|
|
241
|
+
# Add variant_id to result and format clinical significance data
|
|
242
|
+
if result.get("status") == "success":
|
|
243
|
+
result["variant_id"] = variant_id
|
|
244
|
+
|
|
245
|
+
# Format the clinical significance data
|
|
246
|
+
data = result.get("data", {})
|
|
247
|
+
if "result" in data and variant_id in data["result"]:
|
|
248
|
+
variant_data = data["result"][variant_id]
|
|
249
|
+
|
|
250
|
+
# Extract clinical significance information
|
|
251
|
+
germline_class = variant_data.get("germline_classification", {})
|
|
252
|
+
clinical_impact = variant_data.get("clinical_impact_classification", {})
|
|
253
|
+
oncogenicity = variant_data.get("oncogenicity_classification", {})
|
|
254
|
+
|
|
255
|
+
formatted_data = {
|
|
256
|
+
"variant_id": variant_id,
|
|
257
|
+
"germline_classification": {
|
|
258
|
+
"description": germline_class.get("description", ""),
|
|
259
|
+
"review_status": germline_class.get("review_status", ""),
|
|
260
|
+
"last_evaluated": germline_class.get("last_evaluated", ""),
|
|
261
|
+
"fda_recognized": germline_class.get(
|
|
262
|
+
"fda_recognized_database", ""
|
|
263
|
+
),
|
|
264
|
+
"traits": [
|
|
265
|
+
trait.get("trait_name", "")
|
|
266
|
+
for trait in germline_class.get("trait_set", [])
|
|
267
|
+
],
|
|
268
|
+
},
|
|
269
|
+
"clinical_impact": {
|
|
270
|
+
"description": clinical_impact.get("description", ""),
|
|
271
|
+
"review_status": clinical_impact.get("review_status", ""),
|
|
272
|
+
"last_evaluated": clinical_impact.get("last_evaluated", ""),
|
|
273
|
+
},
|
|
274
|
+
"oncogenicity": {
|
|
275
|
+
"description": oncogenicity.get("description", ""),
|
|
276
|
+
"review_status": oncogenicity.get("review_status", ""),
|
|
277
|
+
"last_evaluated": oncogenicity.get("last_evaluated", ""),
|
|
278
|
+
},
|
|
279
|
+
"raw_data": variant_data, # Keep original data for advanced users
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
result["formatted_data"] = formatted_data
|
|
89
283
|
|
|
90
|
-
return
|
|
284
|
+
return result
|
|
@@ -46,7 +46,7 @@ def compose(arguments, tooluniverse, call_tool, stream_callback=None):
|
|
|
46
46
|
call_tool (function): Function to call other tools
|
|
47
47
|
stream_callback (callable, optional): Callback function for streaming output
|
|
48
48
|
|
|
49
|
-
Returns
|
|
49
|
+
Returns
|
|
50
50
|
dict: The result of the multi-agent search
|
|
51
51
|
"""
|
|
52
52
|
query = arguments.get("query", "")
|