tooluniverse 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (190) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/agentic_tools.json +1156 -0
  23. tooluniverse/data/alphafold_tools.json +87 -0
  24. tooluniverse/data/boltz_tools.json +9 -0
  25. tooluniverse/data/chembl_tools.json +16 -0
  26. tooluniverse/data/clait_tools.json +108 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +1 -1
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +1 -1
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/software_tools.json +4954 -0
  66. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  67. tooluniverse/data/packages/visualization_tools.json +399 -0
  68. tooluniverse/data/pubchem_tools.json +215 -0
  69. tooluniverse/data/pubtator_tools.json +68 -0
  70. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  71. tooluniverse/data/reactome_tools.json +19 -0
  72. tooluniverse/data/semantic_scholar_tools.json +26 -0
  73. tooluniverse/data/special_tools.json +2 -25
  74. tooluniverse/data/tool_composition_tools.json +88 -0
  75. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  76. tooluniverse/data/txagent_client_tools.json +9 -0
  77. tooluniverse/data/uniprot_tools.json +211 -0
  78. tooluniverse/data/url_fetch_tools.json +94 -0
  79. tooluniverse/data/uspto_downloader_tools.json +9 -0
  80. tooluniverse/data/uspto_tools.json +811 -0
  81. tooluniverse/data/xml_tools.json +3275 -0
  82. tooluniverse/dataset_tool.py +296 -0
  83. tooluniverse/default_config.py +165 -0
  84. tooluniverse/efo_tool.py +42 -0
  85. tooluniverse/embedding_database.py +630 -0
  86. tooluniverse/embedding_sync.py +396 -0
  87. tooluniverse/enrichr_tool.py +266 -0
  88. tooluniverse/europe_pmc_tool.py +52 -0
  89. tooluniverse/execute_function.py +1775 -95
  90. tooluniverse/extended_hooks.py +444 -0
  91. tooluniverse/gene_ontology_tool.py +194 -0
  92. tooluniverse/graphql_tool.py +158 -36
  93. tooluniverse/gwas_tool.py +358 -0
  94. tooluniverse/hpa_tool.py +1645 -0
  95. tooluniverse/humanbase_tool.py +389 -0
  96. tooluniverse/logging_config.py +254 -0
  97. tooluniverse/mcp_client_tool.py +764 -0
  98. tooluniverse/mcp_integration.py +413 -0
  99. tooluniverse/mcp_tool_registry.py +925 -0
  100. tooluniverse/medlineplus_tool.py +337 -0
  101. tooluniverse/openalex_tool.py +228 -0
  102. tooluniverse/openfda_adv_tool.py +283 -0
  103. tooluniverse/openfda_tool.py +393 -160
  104. tooluniverse/output_hook.py +1122 -0
  105. tooluniverse/package_tool.py +195 -0
  106. tooluniverse/pubchem_tool.py +158 -0
  107. tooluniverse/pubtator_tool.py +168 -0
  108. tooluniverse/rcsb_pdb_tool.py +38 -0
  109. tooluniverse/reactome_tool.py +108 -0
  110. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  111. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  112. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  113. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  114. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  115. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  116. tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
  117. tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
  118. tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
  119. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  120. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  121. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  122. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  123. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  124. tooluniverse/remote_tool.py +99 -0
  125. tooluniverse/restful_tool.py +53 -30
  126. tooluniverse/scripts/generate_tool_graph.py +408 -0
  127. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  128. tooluniverse/semantic_scholar_tool.py +62 -0
  129. tooluniverse/smcp.py +2452 -0
  130. tooluniverse/smcp_server.py +975 -0
  131. tooluniverse/test/mcp_server_test.py +0 -0
  132. tooluniverse/test/test_admetai_tool.py +370 -0
  133. tooluniverse/test/test_agentic_tool.py +129 -0
  134. tooluniverse/test/test_alphafold_tool.py +71 -0
  135. tooluniverse/test/test_chem_tool.py +37 -0
  136. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  137. tooluniverse/test/test_compose_tool.py +448 -0
  138. tooluniverse/test/test_dailymed.py +69 -0
  139. tooluniverse/test/test_dataset_tool.py +200 -0
  140. tooluniverse/test/test_disease_target_score.py +56 -0
  141. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  142. tooluniverse/test/test_efo.py +31 -0
  143. tooluniverse/test/test_enrichr_tool.py +21 -0
  144. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  145. tooluniverse/test/test_fda_adv.py +95 -0
  146. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  147. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  148. tooluniverse/test/test_gwas_tool.py +139 -0
  149. tooluniverse/test/test_hpa.py +625 -0
  150. tooluniverse/test/test_humanbase_tool.py +20 -0
  151. tooluniverse/test/test_idmap_tools.py +61 -0
  152. tooluniverse/test/test_mcp_server.py +211 -0
  153. tooluniverse/test/test_mcp_tool.py +247 -0
  154. tooluniverse/test/test_medlineplus.py +220 -0
  155. tooluniverse/test/test_openalex_tool.py +32 -0
  156. tooluniverse/test/test_opentargets.py +28 -0
  157. tooluniverse/test/test_pubchem_tool.py +116 -0
  158. tooluniverse/test/test_pubtator_tool.py +37 -0
  159. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  160. tooluniverse/test/test_reactome.py +54 -0
  161. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  162. tooluniverse/test/test_software_tools.py +147 -0
  163. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  164. tooluniverse/test/test_tool_finder.py +26 -0
  165. tooluniverse/test/test_tool_finder_llm.py +252 -0
  166. tooluniverse/test/test_tools_find.py +195 -0
  167. tooluniverse/test/test_uniprot_tools.py +74 -0
  168. tooluniverse/test/test_uspto_tool.py +72 -0
  169. tooluniverse/test/test_xml_tool.py +113 -0
  170. tooluniverse/tool_finder_embedding.py +267 -0
  171. tooluniverse/tool_finder_keyword.py +693 -0
  172. tooluniverse/tool_finder_llm.py +699 -0
  173. tooluniverse/tool_graph_web_ui.py +955 -0
  174. tooluniverse/tool_registry.py +416 -0
  175. tooluniverse/uniprot_tool.py +155 -0
  176. tooluniverse/url_tool.py +253 -0
  177. tooluniverse/uspto_tool.py +240 -0
  178. tooluniverse/utils.py +369 -41
  179. tooluniverse/xml_tool.py +369 -0
  180. tooluniverse-1.0.0.dist-info/METADATA +377 -0
  181. tooluniverse-1.0.0.dist-info/RECORD +186 -0
  182. tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
  183. tooluniverse/generate_mcp_tools.py +0 -113
  184. tooluniverse/mcp_server.py +0 -3340
  185. tooluniverse-0.2.0.dist-info/METADATA +0 -139
  186. tooluniverse-0.2.0.dist-info/RECORD +0 -21
  187. tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
  188. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
  189. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
  190. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,396 @@
1
+ """
2
+ Embedding Sync Tool for ToolUniverse
3
+
4
+ Synchronize embedding databases with HuggingFace Hub for sharing and collaboration.
5
+ Supports uploading local databases to HuggingFace and downloading databases from HuggingFace.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import shutil
11
+ from pathlib import Path
12
+ from typing import Dict
13
+ from datetime import datetime
14
+
15
+ try:
16
+ from huggingface_hub import HfApi, upload_folder, snapshot_download
17
+ from huggingface_hub.utils import HfHubHTTPError
18
+ except ImportError:
19
+ raise ImportError(
20
+ "huggingface_hub is required. Install with: pip install huggingface_hub"
21
+ )
22
+
23
+ from .base_tool import BaseTool
24
+ from .tool_registry import register_tool
25
+ from .logging_config import get_logger
26
+
27
+
28
+ @register_tool("EmbeddingSync")
29
+ class EmbeddingSync(BaseTool):
30
+ """
31
+ Sync embedding databases with HuggingFace Hub.
32
+ Supports uploading local databases and downloading shared databases.
33
+ """
34
+
35
+ def __init__(self, tool_config):
36
+ super().__init__(tool_config)
37
+ self.logger = get_logger("EmbeddingSync")
38
+
39
+ # HuggingFace configuration
40
+ hf_config = tool_config.get("configs", {}).get("huggingface_config", {})
41
+ self.hf_token = hf_config.get("token") or os.getenv("HF_TOKEN")
42
+ self.hf_endpoint = hf_config.get("endpoint", "https://huggingface.co")
43
+
44
+ if not self.hf_token:
45
+ self.logger.warning(
46
+ "HuggingFace token not found. Some operations may fail."
47
+ )
48
+
49
+ # Initialize HF API
50
+ self.hf_api = HfApi(endpoint=self.hf_endpoint, token=self.hf_token)
51
+
52
+ # Storage configuration
53
+ storage_config = tool_config.get("configs", {}).get("storage_config", {})
54
+ self.data_dir = Path(storage_config.get("data_dir", "./data/embeddings"))
55
+ self.export_dir = Path(storage_config.get("export_dir", "./exports"))
56
+
57
+ # Ensure directories exist
58
+ self.data_dir.mkdir(parents=True, exist_ok=True)
59
+ self.export_dir.mkdir(parents=True, exist_ok=True)
60
+
61
+ def run(self, arguments):
62
+ """Main entry point for the tool"""
63
+ action = arguments.get("action")
64
+
65
+ if action == "upload":
66
+ return self._upload_to_huggingface(arguments)
67
+ elif action == "download":
68
+ return self._download_from_huggingface(arguments)
69
+ else:
70
+ return {"error": f"Unknown action: {action}"}
71
+
72
+ def _upload_to_huggingface(self, arguments):
73
+ """Upload local database to HuggingFace Hub"""
74
+ database_name = arguments.get("database_name")
75
+ repository = arguments.get("repository")
76
+ description = arguments.get("description", "")
77
+ private = arguments.get("private", False)
78
+ commit_message = arguments.get(
79
+ "commit_message", f"Upload {database_name} database"
80
+ )
81
+
82
+ if not database_name:
83
+ return {"error": "database_name is required"}
84
+ if not repository:
85
+ return {"error": "repository is required (format: username/repo-name)"}
86
+ if not self.hf_token:
87
+ return {"error": "HuggingFace token required for upload operations"}
88
+
89
+ try:
90
+ # Check if local database exists
91
+ db_path = self.data_dir / "embeddings.db"
92
+ index_path = self.data_dir / f"{database_name}.faiss"
93
+
94
+ if not db_path.exists():
95
+ return {"error": "Local embeddings database not found"}
96
+ if not index_path.exists():
97
+ return {
98
+ "error": f"FAISS index for database '{database_name}' not found"
99
+ }
100
+
101
+ # Create export directory for this upload
102
+ export_path = (
103
+ self.export_dir
104
+ / f"{database_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
105
+ )
106
+ export_path.mkdir(parents=True, exist_ok=True)
107
+
108
+ # Copy database files to export directory
109
+ shutil.copy2(db_path, export_path / "embeddings.db")
110
+ shutil.copy2(index_path, export_path / f"{database_name}.faiss")
111
+
112
+ # Create database info file
113
+ db_info = self._get_database_info(database_name)
114
+ if not db_info:
115
+ return {
116
+ "error": f"Database '{database_name}' not found in local storage"
117
+ }
118
+
119
+ info_file = {
120
+ "database_name": database_name,
121
+ "description": description,
122
+ "embedding_model": db_info.get("embedding_model"),
123
+ "embedding_dimensions": db_info.get("embedding_dimensions"),
124
+ "document_count": db_info.get("document_count"),
125
+ "created_at": db_info.get("created_at"),
126
+ "uploaded_at": datetime.now().isoformat(),
127
+ "version": "1.0.0",
128
+ "format": "tooluniverse_embedding_db",
129
+ }
130
+
131
+ with open(export_path / "database_info.json", "w") as f:
132
+ json.dump(info_file, f, indent=2)
133
+
134
+ # Create README file
135
+ readme_content = self._generate_readme(database_name, description, db_info)
136
+ with open(export_path / "README.md", "w") as f:
137
+ f.write(readme_content)
138
+
139
+ # Create repository if it doesn't exist
140
+ try:
141
+ self.hf_api.repo_info(repository, repo_type="dataset")
142
+ self.logger.info(f"Repository {repository} already exists")
143
+ except HfHubHTTPError:
144
+ self.logger.info(f"Creating new repository: {repository}")
145
+ self.hf_api.create_repo(
146
+ repo_id=repository, repo_type="dataset", private=private
147
+ )
148
+
149
+ # Upload files to HuggingFace
150
+ self.logger.info(f"Uploading database to {repository}")
151
+ upload_folder(
152
+ folder_path=str(export_path),
153
+ repo_id=repository,
154
+ repo_type="dataset",
155
+ token=self.hf_token,
156
+ commit_message=commit_message,
157
+ )
158
+
159
+ # Clean up export directory
160
+ shutil.rmtree(export_path)
161
+
162
+ return {
163
+ "status": "success",
164
+ "database_name": database_name,
165
+ "repository": repository,
166
+ "document_count": db_info.get("document_count"),
167
+ "upload_url": f"{self.hf_endpoint}/datasets/{repository}",
168
+ }
169
+
170
+ except Exception as e:
171
+ self.logger.error(f"Error uploading to HuggingFace: {str(e)}")
172
+ return {"error": f"Failed to upload: {str(e)}"}
173
+
174
+ def _download_from_huggingface(self, arguments):
175
+ """Download database from HuggingFace Hub"""
176
+ repository = arguments.get("repository")
177
+ local_name = arguments.get("local_name")
178
+ overwrite = arguments.get("overwrite", False)
179
+
180
+ if not repository:
181
+ return {"error": "repository is required (format: username/repo-name)"}
182
+ if not local_name:
183
+ local_name = repository.split("/")[-1] # Use repo name as default
184
+
185
+ try:
186
+ # Check if local database already exists
187
+ if self._local_database_exists(local_name) and not overwrite:
188
+ return {
189
+ "error": f"Local database '{local_name}' already exists. Use overwrite=true to replace."
190
+ }
191
+
192
+ # Download repository to temporary directory
193
+ temp_dir = (
194
+ self.export_dir / f"download_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
195
+ )
196
+
197
+ self.logger.info(f"Downloading database from {repository}")
198
+ snapshot_download(
199
+ repo_id=repository,
200
+ repo_type="dataset",
201
+ local_dir=str(temp_dir),
202
+ token=self.hf_token,
203
+ )
204
+
205
+ # Verify required files exist
206
+ db_file = temp_dir / "embeddings.db"
207
+ info_file = temp_dir / "database_info.json"
208
+
209
+ if not db_file.exists():
210
+ shutil.rmtree(temp_dir)
211
+ return {"error": "Downloaded repository does not contain embeddings.db"}
212
+
213
+ if not info_file.exists():
214
+ shutil.rmtree(temp_dir)
215
+ return {
216
+ "error": "Downloaded repository does not contain database_info.json"
217
+ }
218
+
219
+ # Load database info
220
+ with open(info_file) as f:
221
+ db_info = json.load(f)
222
+
223
+ original_name = db_info.get("database_name")
224
+ faiss_file = temp_dir / f"{original_name}.faiss"
225
+
226
+ if not faiss_file.exists():
227
+ shutil.rmtree(temp_dir)
228
+ return {
229
+ "error": f"FAISS index file {original_name}.faiss not found in download"
230
+ }
231
+
232
+ # Copy files to local storage with new name
233
+ local_db_path = self.data_dir / "embeddings.db"
234
+ local_index_path = self.data_dir / f"{local_name}.faiss"
235
+
236
+ # Handle database file (merge or replace)
237
+ if local_db_path.exists() and not overwrite:
238
+ # Merge databases (simplified approach - copy tables)
239
+ self._merge_databases(
240
+ str(db_file), str(local_db_path), original_name, local_name
241
+ )
242
+ else:
243
+ shutil.copy2(db_file, local_db_path)
244
+ self._rename_database_in_db(
245
+ str(local_db_path), original_name, local_name
246
+ )
247
+
248
+ # Copy FAISS index
249
+ shutil.copy2(faiss_file, local_index_path)
250
+
251
+ # Clean up
252
+ shutil.rmtree(temp_dir)
253
+
254
+ return {
255
+ "status": "success",
256
+ "repository": repository,
257
+ "local_name": local_name,
258
+ "document_count": db_info.get("document_count"),
259
+ "embedding_model": db_info.get("embedding_model"),
260
+ "downloaded_at": datetime.now().isoformat(),
261
+ }
262
+
263
+ except Exception as e:
264
+ self.logger.error(f"Error downloading from HuggingFace: {str(e)}")
265
+ # Clean up on error
266
+ if "temp_dir" in locals() and temp_dir.exists():
267
+ shutil.rmtree(temp_dir)
268
+ return {"error": f"Failed to download: {str(e)}"}
269
+
270
+ def _get_database_info(self, database_name: str) -> Dict:
271
+ """Get database information from local SQLite"""
272
+ import sqlite3
273
+
274
+ db_path = self.data_dir / "embeddings.db"
275
+ if not db_path.exists():
276
+ return {}
277
+
278
+ try:
279
+ with sqlite3.connect(db_path) as conn:
280
+ cursor = conn.execute(
281
+ """
282
+ SELECT name, description, embedding_model, embedding_dimensions, document_count, created_at
283
+ FROM databases WHERE name = ?
284
+ """,
285
+ (database_name,),
286
+ )
287
+ row = cursor.fetchone()
288
+ if row:
289
+ return {
290
+ "name": row[0],
291
+ "description": row[1],
292
+ "embedding_model": row[2],
293
+ "embedding_dimensions": row[3],
294
+ "document_count": row[4],
295
+ "created_at": row[5],
296
+ }
297
+ except Exception as e:
298
+ self.logger.error(f"Error getting database info: {str(e)}")
299
+
300
+ return {}
301
+
302
+ def _local_database_exists(self, database_name: str) -> bool:
303
+ """Check if database exists locally"""
304
+ return bool(self._get_database_info(database_name))
305
+
306
+ def _generate_readme(
307
+ self, database_name: str, description: str, db_info: Dict
308
+ ) -> str:
309
+ """Generate README content for HuggingFace repository"""
310
+ return f"""# {database_name} - Embedding Database
311
+
312
+ ## Description
313
+ {description or 'Embedding database created with ToolUniverse'}
314
+
315
+ ## Database Information
316
+ - **Documents**: {db_info.get('document_count', 'Unknown')}
317
+ - **Embedding Model**: {db_info.get('embedding_model', 'Unknown')}
318
+ - **Dimensions**: {db_info.get('embedding_dimensions', 'Unknown')}
319
+ - **Created**: {db_info.get('created_at', 'Unknown')}
320
+
321
+ ## Usage
322
+
323
+ To use this database in ToolUniverse:
324
+
325
+ ```python
326
+ from src.tooluniverse.execute_function import ToolUniverse
327
+
328
+ # Download and load the database
329
+ tu = ToolUniverse()
330
+ sync = tu.init_tool("EmbeddingSync")
331
+
332
+ # Download from HuggingFace
333
+ sync.run({{
334
+ "action": "download",
335
+ "repository": "username/repo-name",
336
+ "local_name": "{database_name}"
337
+ }})
338
+
339
+ # Search the database
340
+ db = tu.init_tool("EmbeddingDatabaseSearch")
341
+ results = db.run({{
342
+ "database_name": "{database_name}",
343
+ "query": "your search query",
344
+ "top_k": 5
345
+ }})
346
+ ```
347
+
348
+ ## Format
349
+ This database uses the ToolUniverse embedding database format with FAISS vector index and SQLite metadata storage.
350
+ """
351
+
352
+ def _merge_databases(
353
+ self, source_db: str, target_db: str, source_name: str, target_name: str
354
+ ):
355
+ """Merge source database into target database (simplified implementation)"""
356
+ import sqlite3
357
+
358
+ # This is a simplified merge - in practice, you'd want more sophisticated handling
359
+ with sqlite3.connect(source_db) as source_conn:
360
+ with sqlite3.connect(target_db) as target_conn:
361
+ # Copy database record
362
+ source_conn.execute(
363
+ "UPDATE databases SET name = ? WHERE name = ?",
364
+ (target_name, source_name),
365
+ )
366
+
367
+ # Copy all records (simplified)
368
+ target_conn.execute("ATTACH DATABASE ? AS source_db", (source_db,))
369
+ target_conn.execute(
370
+ """
371
+ INSERT OR REPLACE INTO databases
372
+ SELECT * FROM source_db.databases WHERE name = ?
373
+ """,
374
+ (target_name,),
375
+ )
376
+ target_conn.execute(
377
+ """
378
+ INSERT INTO documents
379
+ SELECT * FROM source_db.documents WHERE database_name = ?
380
+ """,
381
+ (target_name,),
382
+ )
383
+ target_conn.execute("DETACH DATABASE source_db")
384
+
385
+ def _rename_database_in_db(self, db_path: str, old_name: str, new_name: str):
386
+ """Rename database in SQLite file"""
387
+ import sqlite3
388
+
389
+ with sqlite3.connect(db_path) as conn:
390
+ conn.execute(
391
+ "UPDATE databases SET name = ? WHERE name = ?", (new_name, old_name)
392
+ )
393
+ conn.execute(
394
+ "UPDATE documents SET database_name = ? WHERE database_name = ?",
395
+ (new_name, old_name),
396
+ )
@@ -0,0 +1,266 @@
1
+ import json
2
+ import requests
3
+ import urllib.parse
4
+ import networkx as nx
5
+ from .base_tool import BaseTool
6
+ from .tool_registry import register_tool
7
+
8
+
9
+ @register_tool("EnrichrTool")
10
+ class EnrichrTool(BaseTool):
11
+ """
12
+ Tool to perform gene enrichment analysis using Enrichr.
13
+ """
14
+
15
+ def __init__(self, tool_config):
16
+ super().__init__(tool_config)
17
+ # Constants
18
+ self.enrichr_url = "https://maayanlab.cloud/Enrichr/addList"
19
+ self.enrichment_url = "https://maayanlab.cloud/Enrichr/enrich"
20
+
21
+ def run(self, arguments):
22
+ """Main entry point for the tool."""
23
+ genes = arguments.get("gene_list")
24
+ libs = arguments.get(
25
+ "libs",
26
+ [
27
+ "WikiPathways_2024_Human",
28
+ "Reactome_Pathways_2024",
29
+ "MSigDB_Hallmark_2020",
30
+ "GO_Molecular_Function_2023",
31
+ "GO_Biological_Process_2023",
32
+ ],
33
+ )
34
+ return self.enrichr_api(genes, libs)
35
+
36
+ def get_official_gene_name(self, gene_name):
37
+ """
38
+ Retrieve the official gene symbol for a given gene name or synonym using the MyGene.info API.
39
+
40
+ Parameters:
41
+ gene_name (str): The gene name or synonym to query.
42
+
43
+ Returns:
44
+ str: The official gene symbol if found; otherwise, raises an Exception.
45
+ """
46
+ # URL-encode the gene_name to handle special characters
47
+ encoded_gene_name = urllib.parse.quote(gene_name)
48
+ url = f"https://mygene.info/v3/query?q={encoded_gene_name}&fields=symbol,alias&species=human"
49
+
50
+ response = requests.get(url)
51
+ if response.status_code != 200:
52
+ return f"Error querying MyGene.info API: {response.status_code}"
53
+
54
+ data = response.json()
55
+ hits = data.get("hits", [])
56
+ if not hits:
57
+ return f"No data found for: {gene_name}. Please check the gene name and try again."
58
+
59
+ # Attempt to find an exact match in the official symbol or among aliases.
60
+ for hit in hits:
61
+ symbol = hit.get("symbol", "")
62
+ if symbol.upper() == gene_name.upper():
63
+ print(
64
+ f"[enrichr_api] Using the official gene name: '{symbol}' instead of {gene_name}",
65
+ flush=True,
66
+ )
67
+ return symbol
68
+ aliases = hit.get("alias", [])
69
+ if any(gene_name.upper() == alias.upper() for alias in aliases):
70
+ print(
71
+ f"[enrichr_api] Using the official gene name: '{symbol}' instead of {gene_name}",
72
+ flush=True,
73
+ )
74
+ return symbol
75
+
76
+ # If no exact match is found, return the symbol of the top hit.
77
+ top_hit = hits[0]
78
+ symbol = top_hit.get("symbol", None)
79
+ if symbol:
80
+ print(
81
+ f"[enrichr_api] Using the official gene name: '{symbol}' instead of {gene_name}",
82
+ flush=True,
83
+ )
84
+ return symbol
85
+ else:
86
+ return f"No official gene symbol found for: {gene_name}. Please ensure it is correct."
87
+
88
+ def submit_gene_list(self, gene_list):
89
+ """
90
+ Submit the gene list to Enrichr and return the user list ID.
91
+
92
+ Parameters:
93
+ gene_list (str): Newline-separated string of gene names.
94
+
95
+ Returns:
96
+ str: The user list ID from Enrichr.
97
+ """
98
+ payload = {
99
+ "list": (None, gene_list),
100
+ "description": (None, f"Gene list for {gene_list}"),
101
+ }
102
+ response = requests.post(self.enrichr_url, files=payload)
103
+
104
+ if not response.ok:
105
+ return "Error submitting gene list to Enrichr"
106
+
107
+ return json.loads(response.text)["userListId"]
108
+
109
+ def get_enrichment_results(self, user_list_id, library):
110
+ """
111
+ Fetch enrichment results for a specific library.
112
+
113
+ Parameters:
114
+ user_list_id (str): The user list ID from Enrichr.
115
+ library (str): The name of the enrichment library.
116
+
117
+ Returns:
118
+ dict: The enrichment results.
119
+ """
120
+ query_string = f"?userListId={user_list_id}&backgroundType={library}"
121
+ response = requests.get(self.enrichment_url + query_string)
122
+
123
+ if not response.ok:
124
+ return f"Error fetching enrichment results for {library}"
125
+
126
+ return json.loads(response.text)
127
+
128
+ def build_graph(self, genes, enrichment_results):
129
+ """
130
+ Initialize and build the graph with gene nodes and enriched terms.
131
+
132
+ Parameters:
133
+ genes (list): List of gene names.
134
+ enrichment_results (dict): Dictionary of enrichment results by library.
135
+
136
+ Returns:
137
+ networkx.Graph: The constructed graph.
138
+ """
139
+ G = nx.Graph()
140
+
141
+ # Add gene nodes
142
+ for gene in genes:
143
+ G.add_node(gene, type="gene")
144
+
145
+ # Add enriched terms and edges
146
+ for library, results in enrichment_results.items():
147
+ for term in results:
148
+ term_name = term[1]
149
+ associated_genes = term[5]
150
+ G.add_node(term_name, type="term", library=library)
151
+
152
+ for gene in associated_genes:
153
+ if gene in genes:
154
+ G.add_edge(gene, term_name, weight=round(term[4], 2))
155
+
156
+ return G
157
+
158
+ def rank_paths_by_weight(self, G, source, target):
159
+ """
160
+ Find and rank paths between source and target based on total edge weight.
161
+
162
+ Parameters:
163
+ G (networkx.Graph): The graph to search.
164
+ source (str): The source node.
165
+ target (str): The target node.
166
+
167
+ Returns:
168
+ list: List of tuples (path, weight) sorted by weight descending.
169
+ """
170
+ all_paths = list(nx.all_simple_paths(G, source=source, target=target))
171
+ path_weights = []
172
+
173
+ for path in all_paths:
174
+ total_weight = sum(
175
+ G[path[i]][path[i + 1]].get("weight", 1) for i in range(len(path) - 1)
176
+ )
177
+ path_weights.append((path, total_weight))
178
+
179
+ return sorted(path_weights, key=lambda x: x[1], reverse=True)
180
+
181
+ def rank_paths_to_term(self, G, gene, term):
182
+ """
183
+ Find and rank paths from each gene to a specified term based on total edge weight.
184
+
185
+ Parameters:
186
+ G (networkx.Graph): The graph to search.
187
+ gene (str): The source gene.
188
+ term (str): The target term.
189
+
190
+ Returns:
191
+ list or None: List of tuples (path, weight) sorted by weight descending, or None if no paths.
192
+ """
193
+ all_paths = list(nx.all_simple_paths(G, source=gene, target=term))
194
+ path_weights = []
195
+
196
+ for path in all_paths:
197
+ total_weight = sum(
198
+ G[path[i]][path[i + 1]].get("weight", 1) for i in range(len(path) - 1)
199
+ )
200
+ path_weights.append((path, total_weight))
201
+
202
+ if len(path_weights) != 0:
203
+ return sorted(path_weights, key=lambda x: x[1], reverse=True)
204
+ return None
205
+
206
+ def enrichr_api(self, genes, libs):
207
+ """
208
+ Main API function to perform gene enrichment analysis.
209
+
210
+ Parameters:
211
+ genes (list): List of gene names.
212
+ libs (list): List of enrichment libraries to use.
213
+
214
+ Returns:
215
+ tuple: (connected_path, connections) dictionaries.
216
+ """
217
+ # Convert each gene to its official name and log the result
218
+ genes = [self.get_official_gene_name(gene) for gene in genes]
219
+ print("Official gene names:", genes)
220
+
221
+ # Ensure at least two genes are provided for path ranking
222
+ if len(genes) < 2:
223
+ raise ValueError(
224
+ "At least two genes are required to rank paths between genes."
225
+ )
226
+
227
+ # Prepare the gene list for Enrichr submission
228
+ gene_list_str = "\n".join(genes)
229
+ user_list_id = self.submit_gene_list(gene_list_str)
230
+
231
+ # Retrieve enrichment results for each specified library
232
+ enrichment_results = {}
233
+ for library in libs:
234
+ results = self.get_enrichment_results(user_list_id, library)
235
+ # Safely get the top 5 results; if the library key isn't found, default to an empty list
236
+ enrichment_results[library] = results.get(library, [])[:5]
237
+
238
+ # Build the graph from the gene list and enrichment results
239
+ G = self.build_graph(genes, enrichment_results)
240
+
241
+ # Rank paths from the first gene to the second
242
+ ranked_paths = self.rank_paths_by_weight(G, genes[0], genes[1])
243
+ connected_path = {}
244
+ for path, weight in ranked_paths:
245
+ connected_path[f"Path: {path}"] = f"Total Weight: {weight}"
246
+
247
+ # Compute connectivity data for each gene and graph node
248
+ connections = {}
249
+ for gene in genes:
250
+ for term in G.nodes:
251
+ paths_to_term = self.rank_paths_to_term(G, gene, term)
252
+ if paths_to_term is not None:
253
+ connections[f"Connectivity: {gene} - {term}"] = paths_to_term
254
+
255
+ # Check for empty outputs and print helper messages
256
+ if not connected_path:
257
+ print(
258
+ f"[Enrichr] No ranked paths were found between the gene pair {genes}."
259
+ )
260
+
261
+ if not connections:
262
+ print(
263
+ f"[Enrichr] No connection between genes and terms in the enriched graph of {genes}."
264
+ )
265
+
266
+ return connected_path, connections