tooluniverse 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (190) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/agentic_tools.json +1156 -0
  23. tooluniverse/data/alphafold_tools.json +87 -0
  24. tooluniverse/data/boltz_tools.json +9 -0
  25. tooluniverse/data/chembl_tools.json +16 -0
  26. tooluniverse/data/clait_tools.json +108 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +1 -1
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +1 -1
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/software_tools.json +4954 -0
  66. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  67. tooluniverse/data/packages/visualization_tools.json +399 -0
  68. tooluniverse/data/pubchem_tools.json +215 -0
  69. tooluniverse/data/pubtator_tools.json +68 -0
  70. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  71. tooluniverse/data/reactome_tools.json +19 -0
  72. tooluniverse/data/semantic_scholar_tools.json +26 -0
  73. tooluniverse/data/special_tools.json +2 -25
  74. tooluniverse/data/tool_composition_tools.json +88 -0
  75. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  76. tooluniverse/data/txagent_client_tools.json +9 -0
  77. tooluniverse/data/uniprot_tools.json +211 -0
  78. tooluniverse/data/url_fetch_tools.json +94 -0
  79. tooluniverse/data/uspto_downloader_tools.json +9 -0
  80. tooluniverse/data/uspto_tools.json +811 -0
  81. tooluniverse/data/xml_tools.json +3275 -0
  82. tooluniverse/dataset_tool.py +296 -0
  83. tooluniverse/default_config.py +165 -0
  84. tooluniverse/efo_tool.py +42 -0
  85. tooluniverse/embedding_database.py +630 -0
  86. tooluniverse/embedding_sync.py +396 -0
  87. tooluniverse/enrichr_tool.py +266 -0
  88. tooluniverse/europe_pmc_tool.py +52 -0
  89. tooluniverse/execute_function.py +1775 -95
  90. tooluniverse/extended_hooks.py +444 -0
  91. tooluniverse/gene_ontology_tool.py +194 -0
  92. tooluniverse/graphql_tool.py +158 -36
  93. tooluniverse/gwas_tool.py +358 -0
  94. tooluniverse/hpa_tool.py +1645 -0
  95. tooluniverse/humanbase_tool.py +389 -0
  96. tooluniverse/logging_config.py +254 -0
  97. tooluniverse/mcp_client_tool.py +764 -0
  98. tooluniverse/mcp_integration.py +413 -0
  99. tooluniverse/mcp_tool_registry.py +925 -0
  100. tooluniverse/medlineplus_tool.py +337 -0
  101. tooluniverse/openalex_tool.py +228 -0
  102. tooluniverse/openfda_adv_tool.py +283 -0
  103. tooluniverse/openfda_tool.py +393 -160
  104. tooluniverse/output_hook.py +1122 -0
  105. tooluniverse/package_tool.py +195 -0
  106. tooluniverse/pubchem_tool.py +158 -0
  107. tooluniverse/pubtator_tool.py +168 -0
  108. tooluniverse/rcsb_pdb_tool.py +38 -0
  109. tooluniverse/reactome_tool.py +108 -0
  110. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  111. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  112. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  113. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  114. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  115. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  116. tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
  117. tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
  118. tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
  119. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  120. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  121. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  122. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  123. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  124. tooluniverse/remote_tool.py +99 -0
  125. tooluniverse/restful_tool.py +53 -30
  126. tooluniverse/scripts/generate_tool_graph.py +408 -0
  127. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  128. tooluniverse/semantic_scholar_tool.py +62 -0
  129. tooluniverse/smcp.py +2452 -0
  130. tooluniverse/smcp_server.py +975 -0
  131. tooluniverse/test/mcp_server_test.py +0 -0
  132. tooluniverse/test/test_admetai_tool.py +370 -0
  133. tooluniverse/test/test_agentic_tool.py +129 -0
  134. tooluniverse/test/test_alphafold_tool.py +71 -0
  135. tooluniverse/test/test_chem_tool.py +37 -0
  136. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  137. tooluniverse/test/test_compose_tool.py +448 -0
  138. tooluniverse/test/test_dailymed.py +69 -0
  139. tooluniverse/test/test_dataset_tool.py +200 -0
  140. tooluniverse/test/test_disease_target_score.py +56 -0
  141. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  142. tooluniverse/test/test_efo.py +31 -0
  143. tooluniverse/test/test_enrichr_tool.py +21 -0
  144. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  145. tooluniverse/test/test_fda_adv.py +95 -0
  146. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  147. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  148. tooluniverse/test/test_gwas_tool.py +139 -0
  149. tooluniverse/test/test_hpa.py +625 -0
  150. tooluniverse/test/test_humanbase_tool.py +20 -0
  151. tooluniverse/test/test_idmap_tools.py +61 -0
  152. tooluniverse/test/test_mcp_server.py +211 -0
  153. tooluniverse/test/test_mcp_tool.py +247 -0
  154. tooluniverse/test/test_medlineplus.py +220 -0
  155. tooluniverse/test/test_openalex_tool.py +32 -0
  156. tooluniverse/test/test_opentargets.py +28 -0
  157. tooluniverse/test/test_pubchem_tool.py +116 -0
  158. tooluniverse/test/test_pubtator_tool.py +37 -0
  159. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  160. tooluniverse/test/test_reactome.py +54 -0
  161. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  162. tooluniverse/test/test_software_tools.py +147 -0
  163. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  164. tooluniverse/test/test_tool_finder.py +26 -0
  165. tooluniverse/test/test_tool_finder_llm.py +252 -0
  166. tooluniverse/test/test_tools_find.py +195 -0
  167. tooluniverse/test/test_uniprot_tools.py +74 -0
  168. tooluniverse/test/test_uspto_tool.py +72 -0
  169. tooluniverse/test/test_xml_tool.py +113 -0
  170. tooluniverse/tool_finder_embedding.py +267 -0
  171. tooluniverse/tool_finder_keyword.py +693 -0
  172. tooluniverse/tool_finder_llm.py +699 -0
  173. tooluniverse/tool_graph_web_ui.py +955 -0
  174. tooluniverse/tool_registry.py +416 -0
  175. tooluniverse/uniprot_tool.py +155 -0
  176. tooluniverse/url_tool.py +253 -0
  177. tooluniverse/uspto_tool.py +240 -0
  178. tooluniverse/utils.py +369 -41
  179. tooluniverse/xml_tool.py +369 -0
  180. tooluniverse-1.0.0.dist-info/METADATA +377 -0
  181. tooluniverse-1.0.0.dist-info/RECORD +186 -0
  182. tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
  183. tooluniverse/generate_mcp_tools.py +0 -113
  184. tooluniverse/mcp_server.py +0 -3340
  185. tooluniverse-0.2.0.dist-info/METADATA +0 -139
  186. tooluniverse-0.2.0.dist-info/RECORD +0 -21
  187. tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
  188. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
  189. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
  190. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
tooluniverse/base_tool.py CHANGED
@@ -1,11 +1,134 @@
1
1
  from .utils import extract_function_call_json, evaluate_function_call
2
+ import json
3
+ from pathlib import Path
4
+ from typing import no_type_check
5
+
6
+
7
+ class ToolExecutionError(Exception):
8
+ """Base exception for tool execution errors."""
9
+
10
+
11
+ class ValidationError(Exception):
12
+ """Exception raised when input validation fails."""
13
+
14
+
15
+ class AuthenticationError(Exception):
16
+ """Exception raised when authentication fails."""
17
+
18
+
19
+ class RateLimitError(Exception):
20
+ """Exception raised when API rate limit is exceeded."""
21
+
2
22
 
3
23
  class BaseTool:
4
24
  def __init__(self, tool_config):
5
- self.tool_config = tool_config
25
+ self.tool_config = self._apply_defaults(tool_config)
26
+
27
+ @classmethod
28
+ def get_default_config_file(cls):
29
+ """
30
+ Get the path to the default configuration file for this tool type.
31
+
32
+ This method uses a robust path resolution strategy that works across
33
+ different installation scenarios:
34
+
35
+ 1. Installed packages: Uses importlib.resources for proper package
36
+ resource access
37
+ 2. Development mode: Falls back to file-based path resolution
38
+ 3. Legacy Python: Handles importlib.resources and importlib_resources
39
+
40
+ Override this method in subclasses to specify a custom defaults file.
41
+
42
+ Returns:
43
+ Path or resource object pointing to the defaults file
44
+ """
45
+ tool_type = cls.__name__
6
46
 
7
- def run(self):
8
- pass
47
+ # Use importlib.resources for robust path resolution across different
48
+ # installation methods
49
+ try:
50
+ import importlib.resources as pkg_resources
51
+ except ImportError:
52
+ # Fallback for Python < 3.9
53
+ import importlib_resources as pkg_resources
54
+
55
+ try:
56
+ # Try to use package resources first (works with installed
57
+ # packages). Use the newer files() API
58
+ data_files = pkg_resources.files("tooluniverse.data")
59
+ defaults_file = data_files / f"{tool_type.lower()}_defaults.json"
60
+
61
+ # For compatibility, convert to a regular Path if possible
62
+ if hasattr(defaults_file, "resolve"):
63
+ return defaults_file.resolve()
64
+ else:
65
+ # For older Python versions or special cases, return resource
66
+ # path
67
+ return defaults_file
68
+
69
+ except (FileNotFoundError, ModuleNotFoundError, AttributeError):
70
+ # Fallback to file-based path resolution for development/local use
71
+ current_dir = Path(__file__).parent
72
+ defaults_file = current_dir / "data" / f"{tool_type.lower()}_defaults.json"
73
+ return defaults_file
74
+
75
+ @classmethod
76
+ def load_defaults_from_file(cls):
77
+ """Load defaults from the configuration file"""
78
+ defaults_file = cls.get_default_config_file()
79
+
80
+ # Handle both regular Path objects and importlib resource objects
81
+ try:
82
+ # Check if it's a regular Path object
83
+ if hasattr(defaults_file, "exists") and not defaults_file.exists():
84
+ return {}
85
+
86
+ # Try to read the file (works for both Path and resource objects)
87
+ if hasattr(defaults_file, "read_text"):
88
+ # Resource object with read_text method
89
+ content = defaults_file.read_text(encoding="utf-8")
90
+ data = json.loads(content)
91
+ else:
92
+ # Regular file path
93
+ with open(defaults_file, "r", encoding="utf-8") as f:
94
+ data = json.load(f)
95
+
96
+ # Look for defaults under the tool type key
97
+ tool_type = cls.__name__
98
+ return data.get(f"{tool_type.lower()}_defaults", {})
99
+
100
+ except (FileNotFoundError, json.JSONDecodeError):
101
+ # File doesn't exist or invalid JSON, return empty defaults
102
+ return {}
103
+ except Exception as e:
104
+ print(f"Warning: Could not load defaults for {cls.__name__}: {e}")
105
+ return {}
106
+
107
+ def _apply_defaults(self, tool_config):
108
+ """Apply default configuration to the tool config"""
109
+ # Load defaults from file
110
+ defaults = self.load_defaults_from_file()
111
+
112
+ if not defaults:
113
+ # No defaults available, return original config
114
+ return tool_config
115
+
116
+ # Create merged configuration by starting with defaults
117
+ merged_config = defaults.copy()
118
+
119
+ # Override with tool-specific configuration
120
+ merged_config.update(tool_config)
121
+
122
+ return merged_config
123
+
124
+ @no_type_check
125
+ def run(self, arguments=None):
126
+ """Execute the tool.
127
+
128
+ The default BaseTool implementation accepts an optional arguments
129
+ mapping to align with most concrete tool implementations which expect
130
+ a dictionary of inputs.
131
+ """
9
132
 
10
133
  def check_function_call(self, function_call_json):
11
134
  if isinstance(function_call_json, str):
@@ -22,11 +145,11 @@ class BaseTool:
22
145
  list: List of required parameters for the given endpoint.
23
146
  """
24
147
  required_params = []
25
- parameters = self.tool_config.get('parameter', {}).get('properties', {})
148
+ parameters = self.tool_config.get("parameter", {}).get("properties", {})
26
149
 
27
150
  # Check each parameter to see if it is required
28
151
  for param, details in parameters.items():
29
- if details.get('required', False):
152
+ if details.get("required", False):
30
153
  required_params.append(param.lower())
31
154
 
32
- return required_params
155
+ return required_params
@@ -0,0 +1,207 @@
1
+ import os
2
+ import pprint
3
+ import subprocess
4
+ import tempfile
5
+ import yaml
6
+ import json
7
+ import shutil
8
+ from .base_tool import BaseTool
9
+ from .tool_registry import register_tool
10
+
11
+
12
+ @register_tool("Boltz2DockingTool")
13
+ class Boltz2DockingTool(BaseTool):
14
+ """
15
+ Tool to perform protein-ligand docking and affinity prediction using the local Boltz-2 model.
16
+ This tool constructs a YAML input file, runs the `boltz predict` command,
17
+ and parses the output to return the predicted structure and affinity.
18
+ """
19
+
20
+ def __init__(self, tool_config: dict):
21
+ """
22
+ Initializes the BoltzDockingTool.
23
+ Checks if the 'boltz' command is available in the system's PATH.
24
+ """
25
+ super().__init__(tool_config)
26
+ if not shutil.which("boltz"):
27
+ raise EnvironmentError(
28
+ "The 'boltz' command is not found. "
29
+ "Please ensure the 'boltz' package is installed and accessible in the system's PATH. "
30
+ "Installation guide: https://github.com/jwohlwend/boltz"
31
+ )
32
+
33
+ def _build_yaml_input(self, arguments: dict) -> dict:
34
+ """Constructs the YAML data structure for the Boltz input."""
35
+ protein_sequence = arguments.get("protein_sequence")
36
+ ligands = arguments.get("ligands", [])
37
+
38
+ # The first ligand is assumed to be the binder for affinity prediction
39
+ if not ligands:
40
+ raise ValueError(
41
+ "At least one ligand must be provided in the 'ligands' list."
42
+ )
43
+
44
+ binder_id = ligands[0].get("id")
45
+ if not binder_id:
46
+ raise ValueError("The first ligand in the list must have a valid 'id'.")
47
+
48
+ # --- Sequences Section ---
49
+ sequences = [{"protein": {"id": "A", "sequence": protein_sequence}}]
50
+
51
+ for i, ligand_data in enumerate(ligands):
52
+ chain_id = ligand_data.get("id")
53
+ if not chain_id:
54
+ raise ValueError(f"Ligand at index {i} must have an 'id' key.")
55
+
56
+ entry = {"id": chain_id}
57
+ if "smiles" in ligand_data:
58
+ entry["smiles"] = ligand_data["smiles"]
59
+ elif "ccd" in ligand_data:
60
+ entry["ccd"] = ligand_data["ccd"]
61
+ else:
62
+ raise ValueError(
63
+ f"Ligand at index {i} must have a 'smiles' or 'ccd' key."
64
+ )
65
+ sequences.append({"ligand": entry})
66
+
67
+ # --- Properties Section (for Affinity) ---
68
+ properties = [{"affinity": {"binder": binder_id}}]
69
+
70
+ # --- Final YAML Structure ---
71
+ yaml_input = {"version": 1, "sequences": sequences, "properties": properties}
72
+
73
+ # Add optional fields
74
+ if "constraints" in arguments:
75
+ yaml_input["constraints"] = arguments["constraints"]
76
+ if "templates" in arguments:
77
+ yaml_input["templates"] = arguments["templates"]
78
+
79
+ return yaml_input
80
+
81
+ def run(self, arguments: dict | None = None, timeout: int = 1200) -> dict:
82
+ """
83
+ Executes the Boltz prediction.
84
+
85
+ Args:
86
+ arguments (dict): A dictionary containing the necessary inputs.
87
+ - protein_sequence (str): The amino acid sequence of the protein.
88
+ - ligands (list[dict]): A list of ligands, each with a 'smiles' or 'ccd' key.
89
+ - constraints (list[dict], optional): Covalent bonds or other constraints.
90
+ - templates (list[dict], optional): Structural templates.
91
+ - other optional boltz CLI flags (e.g., 'recycling_steps').
92
+ timeout (int): The maximum time in seconds to wait for the Boltz command to complete.
93
+
94
+ Returns:
95
+ dict: A dictionary containing the path to the predicted structure and affinity data, or an error.
96
+ """
97
+ arguments = arguments or {}
98
+ if not arguments.get("protein_sequence"):
99
+ return {"error": "The 'protein_sequence' parameter is required."}
100
+
101
+ # Create a temporary directory to store input and output files
102
+ with tempfile.TemporaryDirectory() as temp_dir:
103
+ input_filename = "boltz_input"
104
+ input_yaml_path = os.path.join(temp_dir, f"{input_filename}.yaml")
105
+ output_dir = os.path.join(temp_dir, "results")
106
+ os.makedirs(output_dir, exist_ok=True)
107
+
108
+ # Build and write the input YAML file
109
+ yaml_data = self._build_yaml_input(arguments)
110
+ with open(input_yaml_path, "w") as f:
111
+ yaml.dump(yaml_data, f, sort_keys=False)
112
+
113
+ # Construct the command-line arguments for Boltz
114
+ command = [
115
+ "boltz",
116
+ "predict",
117
+ input_yaml_path,
118
+ "--out_dir",
119
+ output_dir,
120
+ "--use_msa_server",
121
+ "--override", # Override existing results if any
122
+ ]
123
+
124
+ # Add optional command-line flags from arguments
125
+ for key in [
126
+ "recycling_steps",
127
+ "diffusion_samples",
128
+ "sampling_steps",
129
+ "step_scale",
130
+ ]:
131
+ if key in arguments:
132
+ command.extend([f"--{key}", str(arguments[key])])
133
+
134
+ if arguments.get("use_potentials", False):
135
+ command.append("--use_potentials")
136
+
137
+ # Execute the Boltz command
138
+ subprocess.run(
139
+ command,
140
+ capture_output=True,
141
+ text=True,
142
+ timeout=timeout,
143
+ check=True, # Will raise CalledProcessError on non-zero exit codes
144
+ )
145
+
146
+ # --- Parse the output files ---
147
+ # 1. locate the Boltz run folder under your out_dir
148
+ root_dirs = [
149
+ d
150
+ for d in os.listdir(output_dir)
151
+ if os.path.isdir(os.path.join(output_dir, d))
152
+ ]
153
+ if not root_dirs:
154
+ return {"error": "No Boltz run folder found under out_dir"}
155
+ if len(root_dirs) > 1:
156
+ # you could pick the latest by timestamp instead of the first
157
+ run_dir_name = sorted(root_dirs)[-1]
158
+ else:
159
+ run_dir_name = root_dirs[0]
160
+
161
+ run_root = os.path.join(output_dir, run_dir_name)
162
+
163
+ # 2. now point at predictions/<input_filename>
164
+ prediction_folder = os.path.join(run_root, "predictions", input_filename)
165
+ results = {}
166
+
167
+ # 3. structure .cif
168
+ if arguments.get("return_structure", False):
169
+ structure_file = os.path.join(
170
+ prediction_folder, f"{input_filename}_model_0.cif"
171
+ )
172
+ if os.path.exists(structure_file):
173
+ with open(structure_file, "r") as f:
174
+ results["predicted_structure"] = f.read()
175
+ results["structure_format"] = "cif"
176
+ else:
177
+ results["structure_error"] = (
178
+ f"Missing {os.path.basename(structure_file)}"
179
+ )
180
+
181
+ # 4. affinity .json
182
+ affinity_file = os.path.join(
183
+ prediction_folder, f"affinity_{input_filename}.json"
184
+ )
185
+ if os.path.exists(affinity_file):
186
+ with open(affinity_file, "r") as f:
187
+ results["affinity_prediction"] = json.load(f)
188
+ else:
189
+ results["affinity_error"] = f"Missing {os.path.basename(affinity_file)}"
190
+
191
+ return results
192
+
193
+
194
+ if __name__ == "__main__":
195
+ # Example usage
196
+ tool = Boltz2DockingTool(tool_config={})
197
+ query = {
198
+ "protein_sequence": "ACDEFGHIKLMNPQRSTVWY",
199
+ "ligands": [
200
+ {"id": "LIG1", "smiles": "C1=CC=CC=C1"},
201
+ ],
202
+ "use_potentials": False,
203
+ "diffusion_samples": 1,
204
+ "return_structure": False,
205
+ }
206
+ result = tool.run(query)
207
+ pprint.pprint(result)
@@ -0,0 +1,192 @@
1
+ import requests
2
+ from urllib.parse import quote
3
+
4
+ # from rdkit import Chem
5
+ from .base_tool import BaseTool
6
+ from .tool_registry import register_tool
7
+ from indigo import Indigo
8
+
9
+
10
+ @register_tool("ChEMBLTool")
11
+ class ChEMBLTool(BaseTool):
12
+ """
13
+ Tool to search for molecules similar to a given compound name or SMILES using the ChEMBL Web Services API.
14
+ """
15
+
16
+ def __init__(self, tool_config, base_url="https://www.ebi.ac.uk/chembl/api/data"):
17
+ super().__init__(tool_config)
18
+ self.base_url = base_url
19
+ self.indigo = Indigo()
20
+
21
+ def run(self, arguments):
22
+ query = arguments.get("query")
23
+ similarity_threshold = arguments.get("similarity_threshold", 80)
24
+ max_results = arguments.get("max_results", 20)
25
+
26
+ if not query:
27
+ return {"error": "`query` parameter is required."}
28
+ return self._search_similar_molecules(query, similarity_threshold, max_results)
29
+
30
+ def get_chembl_id_by_name(self, compound_name):
31
+ """
32
+ Search ChEMBL for a compound by name and return the ChEMBL ID of the first match.
33
+ """
34
+ headers = {"Accept": "application/json"}
35
+ search_url = f"{self.base_url}/molecule/search.json?q={quote(compound_name)}"
36
+ print(search_url)
37
+ response = requests.get(search_url, headers=headers)
38
+ response.raise_for_status()
39
+ results = response.json().get("molecules", [])
40
+ if not results or not isinstance(results, list):
41
+ return {"error": "No valid results found for the compound name."}
42
+ if not results:
43
+ return {"error": "No results found for the compound name."}
44
+ top_molecules = results[:3] # Get the top 3 results
45
+ chembl_ids = [
46
+ molecule.get("molecule_chembl_id")
47
+ for molecule in top_molecules
48
+ if molecule.get("molecule_chembl_id")
49
+ ]
50
+ if not chembl_ids:
51
+ return {"error": "No ChEMBL IDs found for the compound name."}
52
+ return {"chembl_ids": chembl_ids}
53
+
54
+ def get_smiles_pref_name_by_chembl_id(self, query):
55
+ """
56
+ Given a ChEMBL ID, return a dict with canonical SMILES and preferred name.
57
+ """
58
+ headers = {"Accept": "application/json"}
59
+ if query.upper().startswith("CHEMBL"):
60
+ molecule_url = f"{self.base_url}/molecule/{quote(query)}.json"
61
+ response = requests.get(molecule_url, headers=headers)
62
+ response.raise_for_status()
63
+ molecule = response.json()
64
+ if not molecule or not isinstance(molecule, dict):
65
+ return {"error": "No valid molecule found for the given ChEMBL ID."}
66
+ molecule_structures = molecule.get("molecule_structures")
67
+ if not molecule_structures or not isinstance(molecule_structures, dict):
68
+ return {
69
+ "error": "Molecule structures not found or invalid for the ChEMBL ID."
70
+ }
71
+ smiles = molecule_structures.get("canonical_smiles")
72
+ pref_name = molecule.get("pref_name")
73
+ if not smiles:
74
+ return {"error": "SMILES not found for the given ChEMBL ID."}
75
+ return {"smiles": smiles, "pref_name": pref_name}
76
+ else:
77
+ return None
78
+
79
+ def get_chembl_smiles_pref_name_id_by_name(self, compound_name):
80
+ """
81
+ Search ChEMBL for a compound by name and return a list of dicts with ChEMBL ID, canonical SMILES, and preferred name for the top 5 matches.
82
+ """
83
+ headers = {"Accept": "application/json"}
84
+ search_url = f"{self.base_url}/molecule/search.json?q={quote(compound_name)}"
85
+ response = requests.get(search_url, headers=headers)
86
+ response.raise_for_status()
87
+ results = response.json().get("molecules", [])
88
+ if not results or not isinstance(results, list):
89
+ return {"error": "No valid results found for the compound name."}
90
+ top_molecules = results[:5]
91
+ output = []
92
+ for molecule in top_molecules:
93
+ chembl_id = molecule.get("molecule_chembl_id", None)
94
+ molecule_structures = molecule.get("molecule_structures", {})
95
+ if molecule_structures is not None:
96
+ smiles = molecule_structures.get("canonical_smiles", None)
97
+ else:
98
+ smiles = None
99
+ pref_name = molecule.get("pref_name")
100
+ if chembl_id and smiles:
101
+ output.append(
102
+ {"chembl_id": chembl_id, "smiles": smiles, "pref_name": pref_name}
103
+ )
104
+ elif chembl_id and not smiles:
105
+ smiles_pre_name_dict = self.get_smiles_pref_name_by_chembl_id(chembl_id)
106
+ if (
107
+ isinstance(smiles_pre_name_dict, dict)
108
+ and "error" not in smiles_pre_name_dict
109
+ ):
110
+ output.append(
111
+ {
112
+ "chembl_id": chembl_id,
113
+ "smiles": smiles_pre_name_dict["smiles"],
114
+ "pref_name": smiles_pre_name_dict.get("pref_name"),
115
+ }
116
+ )
117
+ if not output:
118
+ return {"error": "No ChEMBL IDs or SMILES found for the compound name."}
119
+ return output
120
+
121
+ def _search_similar_molecules(self, query, similarity_threshold, max_results):
122
+ headers = {"Accept": "application/json"}
123
+
124
+ smiles_info_list = []
125
+
126
+ # If the query looks like a ChEMBL ID, fetch its SMILES and pref_name
127
+ if isinstance(query, str) and query.upper().startswith("CHEMBL"):
128
+ result = self.get_smiles_pref_name_by_chembl_id(query)
129
+ if isinstance(result, dict) and "error" in result:
130
+ return result
131
+ smiles_info_list.append(
132
+ {
133
+ "chembl_id": query,
134
+ "smiles": result["smiles"],
135
+ "pref_name": result.get("pref_name"),
136
+ }
137
+ )
138
+
139
+ # If not a ChEMBL ID, use get_chembl_smiles_pref_name_id_by_name to get info
140
+ if len(smiles_info_list) == 0 and isinstance(query, str):
141
+ results = self.get_chembl_smiles_pref_name_id_by_name(query)
142
+ if isinstance(results, dict) and "error" in results:
143
+ return results
144
+ for item in results:
145
+ smiles_info_list.append(item)
146
+
147
+ if len(smiles_info_list) == 0:
148
+ return {"error": "SMILES representation not found for the compound."}
149
+
150
+ results_list = []
151
+ for info in smiles_info_list:
152
+ smiles = info["smiles"]
153
+ pref_name = info.get("pref_name")
154
+ chembl_id = info.get("chembl_id")
155
+ mol = self.indigo.loadMolecule(smiles)
156
+ if mol is None:
157
+ return {"error": "Failed to load molecule with Indigo."}
158
+
159
+ encoded_smiles = quote(smiles)
160
+ similarity_url = f"{self.base_url}/similarity/{encoded_smiles}/{similarity_threshold}.json?limit={max_results}"
161
+ sim_response = requests.get(similarity_url, headers=headers)
162
+ sim_response.raise_for_status()
163
+ sim_results = sim_response.json().get("molecules", [])
164
+ similar_molecules = []
165
+ for mol in sim_results:
166
+ sim_chembl_id = mol.get("molecule_chembl_id")
167
+ sim_pref_name = mol.get("pref_name", "N/A")
168
+ mol_structures = mol.get("molecule_structures", {})
169
+ if mol_structures is None:
170
+ continue
171
+ mol_smiles = mol_structures.get("canonical_smiles", "N/A")
172
+ similarity = mol.get("similarity", "N/A")
173
+ similar_molecules.append(
174
+ {
175
+ "chembl_id": sim_chembl_id,
176
+ "pref_name": sim_pref_name,
177
+ "smiles": mol_smiles,
178
+ "similarity": similarity,
179
+ }
180
+ )
181
+ if len(similar_molecules) == 0:
182
+ continue
183
+ results_list.append(
184
+ {
185
+ "chembl_id": chembl_id,
186
+ "pref_name": pref_name,
187
+ "smiles": smiles,
188
+ "similar_molecules": similar_molecules,
189
+ }
190
+ )
191
+
192
+ return results_list
@@ -0,0 +1 @@
1
+ # Initialize the compose_scripts package