tooluniverse 1.0.9.1__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (191) hide show
  1. tooluniverse/__init__.py +57 -1
  2. tooluniverse/admetai_tool.py +1 -1
  3. tooluniverse/agentic_tool.py +65 -17
  4. tooluniverse/base_tool.py +19 -8
  5. tooluniverse/blast_tool.py +132 -0
  6. tooluniverse/boltz_tool.py +3 -3
  7. tooluniverse/cache/result_cache_manager.py +167 -12
  8. tooluniverse/cbioportal_tool.py +42 -0
  9. tooluniverse/clinvar_tool.py +268 -74
  10. tooluniverse/compose_scripts/drug_safety_analyzer.py +1 -1
  11. tooluniverse/compose_scripts/multi_agent_literature_search.py +1 -1
  12. tooluniverse/compose_scripts/output_summarizer.py +4 -4
  13. tooluniverse/compose_scripts/tool_discover.py +1941 -443
  14. tooluniverse/compose_scripts/tool_graph_composer.py +1 -1
  15. tooluniverse/compose_scripts/tool_metadata_generator.py +1 -1
  16. tooluniverse/compose_tool.py +9 -9
  17. tooluniverse/core_tool.py +2 -2
  18. tooluniverse/ctg_tool.py +4 -4
  19. tooluniverse/custom_tool.py +1 -1
  20. tooluniverse/data/agentic_tools.json +0 -370
  21. tooluniverse/data/alphafold_tools.json +6 -6
  22. tooluniverse/data/blast_tools.json +112 -0
  23. tooluniverse/data/cbioportal_tools.json +87 -0
  24. tooluniverse/data/clinvar_tools.json +235 -0
  25. tooluniverse/data/compose_tools.json +0 -89
  26. tooluniverse/data/dbsnp_tools.json +275 -0
  27. tooluniverse/data/emdb_tools.json +61 -0
  28. tooluniverse/data/ensembl_tools.json +259 -0
  29. tooluniverse/data/file_download_tools.json +275 -0
  30. tooluniverse/data/geo_tools.json +200 -48
  31. tooluniverse/data/gnomad_tools.json +109 -0
  32. tooluniverse/data/gtopdb_tools.json +68 -0
  33. tooluniverse/data/gwas_tools.json +32 -0
  34. tooluniverse/data/interpro_tools.json +199 -0
  35. tooluniverse/data/jaspar_tools.json +70 -0
  36. tooluniverse/data/kegg_tools.json +356 -0
  37. tooluniverse/data/mpd_tools.json +87 -0
  38. tooluniverse/data/ols_tools.json +314 -0
  39. tooluniverse/data/package_discovery_tools.json +64 -0
  40. tooluniverse/data/packages/categorized_tools.txt +0 -1
  41. tooluniverse/data/packages/machine_learning_tools.json +0 -47
  42. tooluniverse/data/paleobiology_tools.json +91 -0
  43. tooluniverse/data/pride_tools.json +62 -0
  44. tooluniverse/data/pypi_package_inspector_tools.json +158 -0
  45. tooluniverse/data/python_executor_tools.json +341 -0
  46. tooluniverse/data/regulomedb_tools.json +50 -0
  47. tooluniverse/data/remap_tools.json +89 -0
  48. tooluniverse/data/screen_tools.json +89 -0
  49. tooluniverse/data/tool_discovery_agents.json +428 -0
  50. tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
  51. tooluniverse/data/uniprot_tools.json +77 -0
  52. tooluniverse/data/web_search_tools.json +250 -0
  53. tooluniverse/data/worms_tools.json +55 -0
  54. tooluniverse/dataset_tool.py +2 -2
  55. tooluniverse/dbsnp_tool.py +196 -58
  56. tooluniverse/default_config.py +36 -3
  57. tooluniverse/emdb_tool.py +30 -0
  58. tooluniverse/enrichr_tool.py +14 -14
  59. tooluniverse/ensembl_tool.py +140 -47
  60. tooluniverse/execute_function.py +594 -29
  61. tooluniverse/extended_hooks.py +4 -4
  62. tooluniverse/file_download_tool.py +269 -0
  63. tooluniverse/gene_ontology_tool.py +1 -1
  64. tooluniverse/generate_tools.py +3 -3
  65. tooluniverse/geo_tool.py +81 -28
  66. tooluniverse/gnomad_tool.py +100 -52
  67. tooluniverse/gtopdb_tool.py +41 -0
  68. tooluniverse/humanbase_tool.py +10 -10
  69. tooluniverse/interpro_tool.py +72 -0
  70. tooluniverse/jaspar_tool.py +30 -0
  71. tooluniverse/kegg_tool.py +230 -0
  72. tooluniverse/logging_config.py +2 -2
  73. tooluniverse/mcp_client_tool.py +57 -129
  74. tooluniverse/mcp_integration.py +52 -49
  75. tooluniverse/mcp_tool_registry.py +147 -528
  76. tooluniverse/mpd_tool.py +42 -0
  77. tooluniverse/ncbi_eutils_tool.py +96 -0
  78. tooluniverse/ols_tool.py +435 -0
  79. tooluniverse/openalex_tool.py +8 -8
  80. tooluniverse/openfda_tool.py +2 -2
  81. tooluniverse/output_hook.py +15 -15
  82. tooluniverse/package_discovery_tool.py +217 -0
  83. tooluniverse/package_tool.py +1 -1
  84. tooluniverse/paleobiology_tool.py +30 -0
  85. tooluniverse/pmc_tool.py +2 -2
  86. tooluniverse/pride_tool.py +30 -0
  87. tooluniverse/pypi_package_inspector_tool.py +593 -0
  88. tooluniverse/python_executor_tool.py +711 -0
  89. tooluniverse/regulomedb_tool.py +30 -0
  90. tooluniverse/remap_tool.py +44 -0
  91. tooluniverse/remote/boltz/boltz_mcp_server.py +1 -1
  92. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +3 -3
  93. tooluniverse/remote/immune_compass/compass_tool.py +3 -3
  94. tooluniverse/remote/pinnacle/pinnacle_tool.py +2 -2
  95. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +3 -3
  96. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -3
  97. tooluniverse/remote_tool.py +4 -4
  98. tooluniverse/screen_tool.py +44 -0
  99. tooluniverse/scripts/filter_tool_files.py +2 -2
  100. tooluniverse/smcp.py +93 -12
  101. tooluniverse/smcp_server.py +100 -21
  102. tooluniverse/space/__init__.py +46 -0
  103. tooluniverse/space/loader.py +133 -0
  104. tooluniverse/space/validator.py +353 -0
  105. tooluniverse/tool_finder_embedding.py +5 -3
  106. tooluniverse/tool_finder_keyword.py +12 -10
  107. tooluniverse/tool_finder_llm.py +12 -8
  108. tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
  109. tooluniverse/tools/BLAST_protein_search.py +63 -0
  110. tooluniverse/tools/ClinVar_search_variants.py +26 -15
  111. tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
  112. tooluniverse/tools/EMDB_get_structure.py +46 -0
  113. tooluniverse/tools/GtoPdb_get_targets.py +52 -0
  114. tooluniverse/tools/InterPro_get_domain_details.py +46 -0
  115. tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
  116. tooluniverse/tools/InterPro_search_domains.py +52 -0
  117. tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
  118. tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
  119. tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
  120. tooluniverse/tools/PackageAnalyzer.py +55 -0
  121. tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
  122. tooluniverse/tools/PyPIPackageInspector.py +59 -0
  123. tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
  124. tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
  125. tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
  126. tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
  127. tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
  128. tooluniverse/tools/ToolDiscover.py +11 -11
  129. tooluniverse/tools/UniProt_id_mapping.py +63 -0
  130. tooluniverse/tools/UniProt_search.py +63 -0
  131. tooluniverse/tools/UnifiedToolGenerator.py +59 -0
  132. tooluniverse/tools/WoRMS_search_species.py +49 -0
  133. tooluniverse/tools/XMLToolOptimizer.py +55 -0
  134. tooluniverse/tools/__init__.py +119 -29
  135. tooluniverse/tools/_shared_client.py +3 -3
  136. tooluniverse/tools/alphafold_get_annotations.py +3 -3
  137. tooluniverse/tools/alphafold_get_prediction.py +3 -3
  138. tooluniverse/tools/alphafold_get_summary.py +3 -3
  139. tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
  140. tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
  141. tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
  142. tooluniverse/tools/clinvar_get_variant_details.py +49 -0
  143. tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
  144. tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
  145. tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
  146. tooluniverse/tools/download_binary_file.py +66 -0
  147. tooluniverse/tools/download_file.py +71 -0
  148. tooluniverse/tools/download_text_content.py +55 -0
  149. tooluniverse/tools/dynamic_package_discovery.py +59 -0
  150. tooluniverse/tools/ensembl_get_sequence.py +52 -0
  151. tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
  152. tooluniverse/tools/ensembl_lookup_gene.py +46 -0
  153. tooluniverse/tools/geo_get_dataset_info.py +46 -0
  154. tooluniverse/tools/geo_get_sample_info.py +46 -0
  155. tooluniverse/tools/geo_search_datasets.py +67 -0
  156. tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
  157. tooluniverse/tools/kegg_find_genes.py +52 -0
  158. tooluniverse/tools/kegg_get_gene_info.py +46 -0
  159. tooluniverse/tools/kegg_get_pathway_info.py +46 -0
  160. tooluniverse/tools/kegg_list_organisms.py +44 -0
  161. tooluniverse/tools/kegg_search_pathway.py +46 -0
  162. tooluniverse/tools/ols_find_similar_terms.py +63 -0
  163. tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
  164. tooluniverse/tools/ols_get_term_ancestors.py +67 -0
  165. tooluniverse/tools/ols_get_term_children.py +67 -0
  166. tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
  167. tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
  168. tooluniverse/tools/ols_search_terms.py +71 -0
  169. tooluniverse/tools/python_code_executor.py +79 -0
  170. tooluniverse/tools/python_script_runner.py +79 -0
  171. tooluniverse/tools/web_api_documentation_search.py +63 -0
  172. tooluniverse/tools/web_search.py +71 -0
  173. tooluniverse/uniprot_tool.py +219 -16
  174. tooluniverse/url_tool.py +19 -1
  175. tooluniverse/uspto_tool.py +1 -1
  176. tooluniverse/utils.py +12 -12
  177. tooluniverse/web_search_tool.py +229 -0
  178. tooluniverse/worms_tool.py +64 -0
  179. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +8 -3
  180. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +184 -92
  181. tooluniverse/data/genomics_tools.json +0 -174
  182. tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
  183. tooluniverse/tools/ToolImplementationGenerator.py +0 -67
  184. tooluniverse/tools/ToolOptimizer.py +0 -59
  185. tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
  186. tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
  187. tooluniverse/ucsc_tool.py +0 -60
  188. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
  189. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
  190. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
  191. {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
@@ -6,9 +6,11 @@ from __future__ import annotations
6
6
 
7
7
  import logging
8
8
  import os
9
+ import queue
10
+ import threading
9
11
  import time
10
12
  from dataclasses import dataclass
11
- from typing import Any, Dict, Iterator, Optional
13
+ from typing import Any, Dict, Iterator, Optional, Sequence
12
14
 
13
15
  from .memory_cache import LRUCache, SingleFlight
14
16
  from .sqlite_backend import CacheEntry, PersistentCache
@@ -36,6 +38,8 @@ class ResultCacheManager:
36
38
  persistence_enabled: bool = True,
37
39
  singleflight: bool = True,
38
40
  default_ttl: Optional[int] = None,
41
+ async_persist: Optional[bool] = None,
42
+ async_queue_size: int = 10000,
39
43
  ):
40
44
  self.enabled = enabled
41
45
  self.default_ttl = default_ttl
@@ -55,6 +59,7 @@ class ResultCacheManager:
55
59
  self.persistent = None
56
60
 
57
61
  self.singleflight = SingleFlight() if singleflight else None
62
+ self._init_async_persistence(async_persist, async_queue_size)
58
63
 
59
64
  # ------------------------------------------------------------------
60
65
  # Helper methods
@@ -69,6 +74,33 @@ class ResultCacheManager:
69
74
  def _ttl_or_default(self, ttl: Optional[int]) -> Optional[int]:
70
75
  return ttl if ttl is not None else self.default_ttl
71
76
 
77
+ def _init_async_persistence(
78
+ self, async_persist: Optional[bool], async_queue_size: int
79
+ ) -> None:
80
+ if async_persist is None:
81
+ async_persist = os.getenv(
82
+ "TOOLUNIVERSE_CACHE_ASYNC_PERSIST", "true"
83
+ ).lower() in ("true", "1", "yes")
84
+
85
+ self.async_persist = (
86
+ async_persist and self.persistent is not None and self.enabled
87
+ )
88
+
89
+ self._persist_queue: Optional["queue.Queue[tuple[str, Dict[str, Any]]]"] = None
90
+ self._worker_thread: Optional[threading.Thread] = None
91
+
92
+ if not self.async_persist:
93
+ return
94
+
95
+ queue_size = max(1, async_queue_size)
96
+ self._persist_queue = queue.Queue(maxsize=queue_size)
97
+ self._worker_thread = threading.Thread(
98
+ target=self._async_worker,
99
+ name="ResultCacheWriter",
100
+ daemon=True,
101
+ )
102
+ self._worker_thread.start()
103
+
72
104
  # ------------------------------------------------------------------
73
105
  # Public API
74
106
  # ------------------------------------------------------------------
@@ -126,17 +158,15 @@ class ResultCacheManager:
126
158
  )
127
159
 
128
160
  if self.persistent:
129
- try:
130
- self.persistent.set(
131
- composed,
132
- value,
133
- namespace=namespace,
134
- version=version,
135
- ttl=effective_ttl,
136
- )
137
- except Exception as exc:
138
- logger.warning("Persistent cache write failed: %s", exc)
139
- self.persistent = None
161
+ payload = {
162
+ "composed": composed,
163
+ "value": value,
164
+ "namespace": namespace,
165
+ "version": version,
166
+ "ttl": effective_ttl,
167
+ }
168
+ if not self._schedule_persist("set", payload):
169
+ self._perform_persist_set(**payload)
140
170
 
141
171
  def delete(self, *, namespace: str, version: str, cache_key: str):
142
172
  composed = self.compose_key(namespace, version, cache_key)
@@ -162,10 +192,40 @@ class ResultCacheManager:
162
192
 
163
193
  if self.persistent:
164
194
  try:
195
+ self.flush()
165
196
  self.persistent.clear(namespace=namespace)
166
197
  except Exception as exc:
167
198
  logger.warning("Persistent cache clear failed: %s", exc)
168
199
 
200
+ def bulk_get(self, requests: Sequence[Dict[str, str]]) -> Dict[str, Any]:
201
+ """Fetch multiple cache entries at once.
202
+
203
+ Args:
204
+ requests: Iterable of dicts containing ``namespace``, ``version`` and ``cache_key``.
205
+
206
+ Returns
207
+ Mapping of composed cache keys to cached values.
208
+ """
209
+
210
+ if not self.enabled:
211
+ return {}
212
+
213
+ hits: Dict[str, Any] = {}
214
+ for request in requests:
215
+ namespace = request["namespace"]
216
+ version = request["version"]
217
+ cache_key = request["cache_key"]
218
+ value = self.get(
219
+ namespace=namespace,
220
+ version=version,
221
+ cache_key=cache_key,
222
+ )
223
+ if value is not None:
224
+ composed = self.compose_key(namespace, version, cache_key)
225
+ hits[composed] = value
226
+
227
+ return hits
228
+
169
229
  def stats(self) -> Dict[str, Any]:
170
230
  return {
171
231
  "enabled": self.enabled,
@@ -173,11 +233,18 @@ class ResultCacheManager:
173
233
  "persistent": (
174
234
  self.persistent.stats() if self.persistent else {"enabled": False}
175
235
  ),
236
+ "async_persist": self.async_persist,
237
+ "pending_writes": (
238
+ self._persist_queue.qsize()
239
+ if self.async_persist and self._persist_queue is not None
240
+ else 0
241
+ ),
176
242
  }
177
243
 
178
244
  def dump(self, namespace: Optional[str] = None) -> Iterator[Dict[str, Any]]:
179
245
  if not self.persistent:
180
246
  return iter([])
247
+ self.flush()
181
248
  return (
182
249
  {
183
250
  "cache_key": entry.key,
@@ -220,12 +287,100 @@ class ResultCacheManager:
220
287
  return _DummyContext()
221
288
 
222
289
  def close(self):
290
+ self.flush()
291
+ self._shutdown_async_worker()
223
292
  if self.persistent:
224
293
  try:
225
294
  self.persistent.close()
226
295
  except Exception as exc:
227
296
  logger.warning("Persistent cache close failed: %s", exc)
228
297
 
298
+ # ------------------------------------------------------------------
299
+ # Async persistence helpers
300
+ # ------------------------------------------------------------------
301
+
302
+ def flush(self):
303
+ if self.async_persist and self._persist_queue is not None:
304
+ self._persist_queue.join()
305
+
306
+ def _schedule_persist(self, op: str, payload: Dict[str, Any]) -> bool:
307
+ if not self.async_persist or self._persist_queue is None:
308
+ return False
309
+ try:
310
+ self._persist_queue.put_nowait((op, payload))
311
+ return True
312
+ except queue.Full:
313
+ logger.warning(
314
+ "Async cache queue full; falling back to synchronous persistence"
315
+ )
316
+ return False
317
+
318
+ def _async_worker(self):
319
+ queue_ref = self._persist_queue
320
+ if queue_ref is None:
321
+ return
322
+
323
+ while True:
324
+ try:
325
+ op, payload = queue_ref.get()
326
+ except Exception:
327
+ continue
328
+
329
+ if op == "__STOP__":
330
+ queue_ref.task_done()
331
+ break
332
+
333
+ try:
334
+ if op == "set":
335
+ self._perform_persist_set(**payload)
336
+ else:
337
+ logger.warning("Unknown async cache operation: %s", op)
338
+ except Exception as exc:
339
+ logger.warning("Async cache write failed: %s", exc)
340
+ # Disable async persistence to avoid repeated failures
341
+ self.async_persist = False
342
+ finally:
343
+ queue_ref.task_done()
344
+
345
+ def _perform_persist_set(
346
+ self,
347
+ *,
348
+ composed: str,
349
+ value: Any,
350
+ namespace: str,
351
+ version: str,
352
+ ttl: Optional[int],
353
+ ):
354
+ if not self.persistent:
355
+ return
356
+ try:
357
+ self.persistent.set(
358
+ composed,
359
+ value,
360
+ namespace=namespace,
361
+ version=version,
362
+ ttl=ttl,
363
+ )
364
+ except Exception as exc:
365
+ logger.warning("Persistent cache write failed: %s", exc)
366
+ self.persistent = None
367
+ raise
368
+
369
+ def _shutdown_async_worker(self) -> None:
370
+ if not self.async_persist or self._persist_queue is None:
371
+ return
372
+
373
+ try:
374
+ self._persist_queue.put_nowait(("__STOP__", {}))
375
+ except queue.Full:
376
+ self._persist_queue.put(("__STOP__", {}))
377
+
378
+ if self._worker_thread is not None:
379
+ self._worker_thread.join(timeout=5)
380
+
381
+ self._worker_thread = None
382
+ self._persist_queue = None
383
+
229
384
 
230
385
  class _DummyContext:
231
386
  def __enter__(self):
@@ -0,0 +1,42 @@
1
+ import requests
2
+ from typing import Any, Dict
3
+ from .base_tool import BaseTool
4
+ from .tool_registry import register_tool
5
+
6
+
7
+ @register_tool("CBioPortalRESTTool")
8
+ class CBioPortalRESTTool(BaseTool):
9
+ def __init__(self, tool_config: Dict):
10
+ super().__init__(tool_config)
11
+ self.base_url = "https://www.cbioportal.org/api"
12
+ self.session = requests.Session()
13
+ self.session.headers.update(
14
+ {"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"}
15
+ )
16
+ self.timeout = 30
17
+
18
+ def _build_url(self, args: Dict[str, Any]) -> str:
19
+ url = self.tool_config["fields"]["endpoint"]
20
+ for k, v in args.items():
21
+ url = url.replace(f"{{{k}}}", str(v))
22
+ return url
23
+
24
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
25
+ try:
26
+ url = self._build_url(arguments)
27
+ response = self.session.get(url, timeout=self.timeout)
28
+ response.raise_for_status()
29
+ data = response.json()
30
+
31
+ return {
32
+ "status": "success",
33
+ "data": data,
34
+ "url": url,
35
+ "count": len(data) if isinstance(data, list) else 1,
36
+ }
37
+ except Exception as e:
38
+ return {
39
+ "status": "error",
40
+ "error": f"cBioPortal API error: {str(e)}",
41
+ "url": url,
42
+ }
@@ -1,90 +1,284 @@
1
+ """
2
+ ClinVar REST API Tool
3
+
4
+ This tool provides access to the ClinVar database for clinical variant information,
5
+ disease associations, and clinical significance data.
6
+ """
7
+
1
8
  import requests
9
+ import time
10
+ from typing import Dict, Any, Optional
2
11
  from .base_tool import BaseTool
3
12
  from .tool_registry import register_tool
4
13
 
5
14
 
6
- @register_tool("ClinVarTool")
7
- class ClinVarTool(BaseTool):
8
- """
9
- Local tool wrapper for ClinVar via NCBI E-utilities.
10
- Uses esearch + esummary to fetch variant records.
11
- """
15
+ class ClinVarRESTTool(BaseTool):
16
+ """Base class for ClinVar REST API tools."""
12
17
 
13
18
  def __init__(self, tool_config):
14
19
  super().__init__(tool_config)
15
- self.base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
20
+ self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
16
21
  self.session = requests.Session()
22
+ self.session.headers.update(
23
+ {"Accept": "application/json", "User-Agent": "ToolUniverse/1.0"}
24
+ )
25
+ self.timeout = 30
17
26
 
18
- def run(self, arguments):
19
- query = arguments.get("query")
20
- retmax = arguments.get("retmax", 5)
21
- if not query:
22
- return {"error": "Missing required parameter: query"}
27
+ def _make_request(
28
+ self, endpoint: str, params: Optional[Dict] = None, max_retries: int = 3
29
+ ) -> Dict[str, Any]:
30
+ """Make a request to the ClinVar API with automatic retry for rate limiting."""
31
+ url = f"{self.base_url}{endpoint}"
23
32
 
24
- # 1) esearch to get UIDs
25
- search_url = f"{self.base}/esearch.fcgi"
26
- search_params = {
27
- "db": "clinvar",
28
- "term": query,
29
- "retmode": "json",
30
- "retmax": retmax,
31
- }
32
- search_resp = self.session.get(search_url, params=search_params, timeout=20)
33
- search_resp.raise_for_status()
34
- search_data = search_resp.json()
35
- uids = search_data.get("esearchresult", {}).get("idlist", [])
36
- if not uids:
37
- return []
38
-
39
- # 2) esummary to get details
40
- summary_url = f"{self.base}/esummary.fcgi"
41
- summary_params = {
33
+ for attempt in range(max_retries + 1):
34
+ try:
35
+ response = self.session.get(url, params=params, timeout=self.timeout)
36
+
37
+ # Handle rate limiting (429 error)
38
+ if response.status_code == 429:
39
+ retry_after = response.headers.get("Retry-After")
40
+ if retry_after:
41
+ wait_time = int(retry_after)
42
+ else:
43
+ # Default exponential backoff: 1, 2, 4 seconds
44
+ wait_time = 2**attempt
45
+
46
+ if attempt < max_retries:
47
+ print(
48
+ f"Rate limited (429). Waiting {wait_time} seconds before retry {attempt + 1}/{max_retries}..."
49
+ )
50
+ time.sleep(wait_time)
51
+ continue
52
+ else:
53
+ return {
54
+ "status": "error",
55
+ "error": f"Rate limited after {max_retries} retries. Please wait before making more requests.",
56
+ "url": url,
57
+ "retry_after": retry_after,
58
+ }
59
+
60
+ response.raise_for_status()
61
+
62
+ # ClinVar API returns XML by default, but we can request JSON
63
+ if params and params.get("retmode") == "json":
64
+ data = response.json()
65
+ else:
66
+ # Parse XML response
67
+ data = response.text
68
+
69
+ return {
70
+ "status": "success",
71
+ "data": data,
72
+ "url": url,
73
+ "content_type": response.headers.get(
74
+ "content-type", "application/xml"
75
+ ),
76
+ "rate_limit_info": {
77
+ "limit": response.headers.get("X-RateLimit-Limit"),
78
+ "remaining": response.headers.get("X-RateLimit-Remaining"),
79
+ },
80
+ }
81
+
82
+ except requests.exceptions.RequestException as e:
83
+ if attempt < max_retries:
84
+ wait_time = 2**attempt
85
+ print(
86
+ f"Request failed: {str(e)}. Retrying in {wait_time} seconds..."
87
+ )
88
+ time.sleep(wait_time)
89
+ continue
90
+ else:
91
+ return {
92
+ "status": "error",
93
+ "error": f"ClinVar API request failed after {max_retries} retries: {str(e)}",
94
+ "url": url,
95
+ }
96
+
97
+ return {"status": "error", "error": "Maximum retries exceeded", "url": url}
98
+
99
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
100
+ """Execute the tool with given arguments."""
101
+ return self._make_request(self.endpoint, arguments)
102
+
103
+
104
+ @register_tool("ClinVarSearchVariants")
105
+ class ClinVarSearchVariants(ClinVarRESTTool):
106
+ """Search for variants in ClinVar by gene or condition."""
107
+
108
+ def __init__(self, tool_config):
109
+ super().__init__(tool_config)
110
+ self.endpoint = "/esearch.fcgi"
111
+
112
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
113
+ """Search variants by gene or condition."""
114
+ params = {
42
115
  "db": "clinvar",
43
- "id": ",".join(uids),
44
116
  "retmode": "json",
117
+ "retmax": arguments.get("max_results", 20),
45
118
  }
46
- summary_resp = self.session.get(summary_url, params=summary_params, timeout=30)
47
- summary_resp.raise_for_status()
48
- summary_data = summary_resp.json()
49
-
50
- results = []
51
- for uid in uids:
52
- record = summary_data.get("result", {}).get(uid, {})
53
- if not record:
54
- continue
55
-
56
- # Extract key fields
57
- variation_set = record.get("variation_set", [])
58
- gene = ""
59
- chr_name = ""
60
- start = None
61
- stop = None
62
- spdi = ""
63
- if variation_set:
64
- var = variation_set[0]
65
- gene = record.get("genes", [{}])[0].get("symbol", "")
66
- var_loc = var.get("variation_loc", [{}])[0]
67
- chr_name = var_loc.get("chr", "")
68
- start = var_loc.get("start")
69
- stop = var_loc.get("stop")
70
- spdi = var.get("canonical_spdi", "")
71
-
72
- clinical_sig = record.get("germline_classification", {}).get(
73
- "description", ""
74
- )
75
-
76
- results.append(
77
- {
78
- "uid": uid,
79
- "accession": record.get("accession", ""),
80
- "title": record.get("title", ""),
81
- "gene": gene,
82
- "chr": chr_name,
83
- "start": start,
84
- "stop": stop,
85
- "spdi": spdi,
86
- "clinical_significance": clinical_sig,
119
+
120
+ # Build search query
121
+ query_parts = []
122
+
123
+ if "gene" in arguments:
124
+ query_parts.append(f"{arguments['gene']}[gene]")
125
+
126
+ if "condition" in arguments:
127
+ query_parts.append(f"{arguments['condition']}[condition]")
128
+
129
+ if "variant_id" in arguments:
130
+ query_parts.append(f"{arguments['variant_id']}[variant_id]")
131
+
132
+ if not query_parts:
133
+ return {
134
+ "status": "error",
135
+ "error": "At least one search parameter is required",
136
+ }
137
+
138
+ params["term"] = " AND ".join(query_parts)
139
+
140
+ result = self._make_request(self.endpoint, params)
141
+
142
+ # Add search parameters to result and format data
143
+ if result.get("status") == "success":
144
+ result["search_params"] = {
145
+ "gene": arguments.get("gene"),
146
+ "condition": arguments.get("condition"),
147
+ "variant_id": arguments.get("variant_id"),
148
+ }
149
+
150
+ # Format search results for better usability
151
+ data = result.get("data", {})
152
+ if "esearchresult" in data:
153
+ esearch = data["esearchresult"]
154
+ formatted_results = {
155
+ "total_count": int(esearch.get("count", 0)),
156
+ "variant_ids": esearch.get("idlist", []),
157
+ "query_translation": esearch.get("querytranslation", ""),
158
+ "search_params": result["search_params"],
159
+ "summary": f"Found {esearch.get('count', 0)} variants matching the search criteria",
87
160
  }
88
- )
161
+ result["formatted_results"] = formatted_results
162
+
163
+ return result
164
+
165
+
166
+ @register_tool("ClinVarGetVariantDetails")
167
+ class ClinVarGetVariantDetails(ClinVarRESTTool):
168
+ """Get detailed variant information by ClinVar ID."""
169
+
170
+ def __init__(self, tool_config):
171
+ super().__init__(tool_config)
172
+ self.endpoint = "/esummary.fcgi"
173
+
174
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
175
+ """Get variant details by ClinVar ID."""
176
+ variant_id = arguments.get("variant_id", "")
177
+ if not variant_id:
178
+ return {"status": "error", "error": "variant_id is required"}
179
+
180
+ params = {"db": "clinvar", "id": variant_id, "retmode": "json"}
181
+
182
+ result = self._make_request(self.endpoint, params)
183
+
184
+ # Add variant_id to result and format data
185
+ if result.get("status") == "success":
186
+ result["variant_id"] = variant_id
187
+
188
+ # Format the data for better usability
189
+ data = result.get("data", {})
190
+ if "result" in data and variant_id in data["result"]:
191
+ variant_data = data["result"][variant_id]
192
+
193
+ # Extract key information
194
+ formatted_data = {
195
+ "variant_id": variant_id,
196
+ "accession": variant_data.get("accession", ""),
197
+ "title": variant_data.get("title", ""),
198
+ "obj_type": variant_data.get("obj_type", ""),
199
+ "genes": [
200
+ gene.get("symbol", "") for gene in variant_data.get("genes", [])
201
+ ],
202
+ "clinical_significance": variant_data.get(
203
+ "germline_classification", {}
204
+ ).get("description", ""),
205
+ "review_status": variant_data.get(
206
+ "germline_classification", {}
207
+ ).get("review_status", ""),
208
+ "chromosome": variant_data.get("chr_sort", ""),
209
+ "location": variant_data.get("variation_set", [{}])[0]
210
+ .get("variation_loc", [{}])[0]
211
+ .get("band", ""),
212
+ "variation_name": variant_data.get("variation_set", [{}])[0].get(
213
+ "variation_name", ""
214
+ ),
215
+ "raw_data": variant_data, # Keep original data for advanced users
216
+ }
217
+
218
+ result["formatted_data"] = formatted_data
219
+
220
+ return result
221
+
222
+
223
+ @register_tool("ClinVarGetClinicalSignificance")
224
+ class ClinVarGetClinicalSignificance(ClinVarRESTTool):
225
+ """Get clinical significance information for variants."""
226
+
227
+ def __init__(self, tool_config):
228
+ super().__init__(tool_config)
229
+ self.endpoint = "/esummary.fcgi"
230
+
231
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
232
+ """Get clinical significance by variant ID."""
233
+ variant_id = arguments.get("variant_id", "")
234
+ if not variant_id:
235
+ return {"status": "error", "error": "variant_id is required"}
236
+
237
+ params = {"db": "clinvar", "id": variant_id, "retmode": "json"}
238
+
239
+ result = self._make_request(self.endpoint, params)
240
+
241
+ # Add variant_id to result and format clinical significance data
242
+ if result.get("status") == "success":
243
+ result["variant_id"] = variant_id
244
+
245
+ # Format the clinical significance data
246
+ data = result.get("data", {})
247
+ if "result" in data and variant_id in data["result"]:
248
+ variant_data = data["result"][variant_id]
249
+
250
+ # Extract clinical significance information
251
+ germline_class = variant_data.get("germline_classification", {})
252
+ clinical_impact = variant_data.get("clinical_impact_classification", {})
253
+ oncogenicity = variant_data.get("oncogenicity_classification", {})
254
+
255
+ formatted_data = {
256
+ "variant_id": variant_id,
257
+ "germline_classification": {
258
+ "description": germline_class.get("description", ""),
259
+ "review_status": germline_class.get("review_status", ""),
260
+ "last_evaluated": germline_class.get("last_evaluated", ""),
261
+ "fda_recognized": germline_class.get(
262
+ "fda_recognized_database", ""
263
+ ),
264
+ "traits": [
265
+ trait.get("trait_name", "")
266
+ for trait in germline_class.get("trait_set", [])
267
+ ],
268
+ },
269
+ "clinical_impact": {
270
+ "description": clinical_impact.get("description", ""),
271
+ "review_status": clinical_impact.get("review_status", ""),
272
+ "last_evaluated": clinical_impact.get("last_evaluated", ""),
273
+ },
274
+ "oncogenicity": {
275
+ "description": oncogenicity.get("description", ""),
276
+ "review_status": oncogenicity.get("review_status", ""),
277
+ "last_evaluated": oncogenicity.get("last_evaluated", ""),
278
+ },
279
+ "raw_data": variant_data, # Keep original data for advanced users
280
+ }
281
+
282
+ result["formatted_data"] = formatted_data
89
283
 
90
- return results
284
+ return result
@@ -13,7 +13,7 @@ def compose(arguments, tooluniverse, call_tool):
13
13
  tooluniverse: ToolUniverse instance
14
14
  call_tool: Function to call other tools
15
15
 
16
- Returns:
16
+ Returns
17
17
  dict: Comprehensive drug safety analysis result
18
18
  """
19
19
  drug_name = arguments["drug_name"]
@@ -46,7 +46,7 @@ def compose(arguments, tooluniverse, call_tool, stream_callback=None):
46
46
  call_tool (function): Function to call other tools
47
47
  stream_callback (callable, optional): Callback function for streaming output
48
48
 
49
- Returns:
49
+ Returns
50
50
  dict: The result of the multi-agent search
51
51
  """
52
52
  query = arguments.get("query", "")