tooluniverse 0.2.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/adverse_event_tools.json +108 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +1 -1
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +1 -1
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.1.dist-info/METADATA +387 -0
- tooluniverse-1.0.1.dist-info/RECORD +182 -0
- tooluniverse-1.0.1.dist-info/entry_points.txt +9 -0
- tooluniverse/generate_mcp_tools.py +0 -113
- tooluniverse/mcp_server.py +0 -3340
- tooluniverse-0.2.0.dist-info/METADATA +0 -139
- tooluniverse-0.2.0.dist-info/RECORD +0 -21
- tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/WHEEL +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.1.dist-info}/top_level.txt +0 -0
tooluniverse/base_tool.py
CHANGED
|
@@ -1,11 +1,134 @@
|
|
|
1
1
|
from .utils import extract_function_call_json, evaluate_function_call
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import no_type_check
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ToolExecutionError(Exception):
|
|
8
|
+
"""Base exception for tool execution errors."""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ValidationError(Exception):
|
|
12
|
+
"""Exception raised when input validation fails."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AuthenticationError(Exception):
|
|
16
|
+
"""Exception raised when authentication fails."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RateLimitError(Exception):
|
|
20
|
+
"""Exception raised when API rate limit is exceeded."""
|
|
21
|
+
|
|
2
22
|
|
|
3
23
|
class BaseTool:
|
|
4
24
|
def __init__(self, tool_config):
|
|
5
|
-
self.tool_config = tool_config
|
|
25
|
+
self.tool_config = self._apply_defaults(tool_config)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def get_default_config_file(cls):
|
|
29
|
+
"""
|
|
30
|
+
Get the path to the default configuration file for this tool type.
|
|
31
|
+
|
|
32
|
+
This method uses a robust path resolution strategy that works across
|
|
33
|
+
different installation scenarios:
|
|
34
|
+
|
|
35
|
+
1. Installed packages: Uses importlib.resources for proper package
|
|
36
|
+
resource access
|
|
37
|
+
2. Development mode: Falls back to file-based path resolution
|
|
38
|
+
3. Legacy Python: Handles importlib.resources and importlib_resources
|
|
39
|
+
|
|
40
|
+
Override this method in subclasses to specify a custom defaults file.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Path or resource object pointing to the defaults file
|
|
44
|
+
"""
|
|
45
|
+
tool_type = cls.__name__
|
|
6
46
|
|
|
7
|
-
|
|
8
|
-
|
|
47
|
+
# Use importlib.resources for robust path resolution across different
|
|
48
|
+
# installation methods
|
|
49
|
+
try:
|
|
50
|
+
import importlib.resources as pkg_resources
|
|
51
|
+
except ImportError:
|
|
52
|
+
# Fallback for Python < 3.9
|
|
53
|
+
import importlib_resources as pkg_resources
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
# Try to use package resources first (works with installed
|
|
57
|
+
# packages). Use the newer files() API
|
|
58
|
+
data_files = pkg_resources.files("tooluniverse.data")
|
|
59
|
+
defaults_file = data_files / f"{tool_type.lower()}_defaults.json"
|
|
60
|
+
|
|
61
|
+
# For compatibility, convert to a regular Path if possible
|
|
62
|
+
if hasattr(defaults_file, "resolve"):
|
|
63
|
+
return defaults_file.resolve()
|
|
64
|
+
else:
|
|
65
|
+
# For older Python versions or special cases, return resource
|
|
66
|
+
# path
|
|
67
|
+
return defaults_file
|
|
68
|
+
|
|
69
|
+
except (FileNotFoundError, ModuleNotFoundError, AttributeError):
|
|
70
|
+
# Fallback to file-based path resolution for development/local use
|
|
71
|
+
current_dir = Path(__file__).parent
|
|
72
|
+
defaults_file = current_dir / "data" / f"{tool_type.lower()}_defaults.json"
|
|
73
|
+
return defaults_file
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def load_defaults_from_file(cls):
|
|
77
|
+
"""Load defaults from the configuration file"""
|
|
78
|
+
defaults_file = cls.get_default_config_file()
|
|
79
|
+
|
|
80
|
+
# Handle both regular Path objects and importlib resource objects
|
|
81
|
+
try:
|
|
82
|
+
# Check if it's a regular Path object
|
|
83
|
+
if hasattr(defaults_file, "exists") and not defaults_file.exists():
|
|
84
|
+
return {}
|
|
85
|
+
|
|
86
|
+
# Try to read the file (works for both Path and resource objects)
|
|
87
|
+
if hasattr(defaults_file, "read_text"):
|
|
88
|
+
# Resource object with read_text method
|
|
89
|
+
content = defaults_file.read_text(encoding="utf-8")
|
|
90
|
+
data = json.loads(content)
|
|
91
|
+
else:
|
|
92
|
+
# Regular file path
|
|
93
|
+
with open(defaults_file, "r", encoding="utf-8") as f:
|
|
94
|
+
data = json.load(f)
|
|
95
|
+
|
|
96
|
+
# Look for defaults under the tool type key
|
|
97
|
+
tool_type = cls.__name__
|
|
98
|
+
return data.get(f"{tool_type.lower()}_defaults", {})
|
|
99
|
+
|
|
100
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
|
101
|
+
# File doesn't exist or invalid JSON, return empty defaults
|
|
102
|
+
return {}
|
|
103
|
+
except Exception as e:
|
|
104
|
+
print(f"Warning: Could not load defaults for {cls.__name__}: {e}")
|
|
105
|
+
return {}
|
|
106
|
+
|
|
107
|
+
def _apply_defaults(self, tool_config):
|
|
108
|
+
"""Apply default configuration to the tool config"""
|
|
109
|
+
# Load defaults from file
|
|
110
|
+
defaults = self.load_defaults_from_file()
|
|
111
|
+
|
|
112
|
+
if not defaults:
|
|
113
|
+
# No defaults available, return original config
|
|
114
|
+
return tool_config
|
|
115
|
+
|
|
116
|
+
# Create merged configuration by starting with defaults
|
|
117
|
+
merged_config = defaults.copy()
|
|
118
|
+
|
|
119
|
+
# Override with tool-specific configuration
|
|
120
|
+
merged_config.update(tool_config)
|
|
121
|
+
|
|
122
|
+
return merged_config
|
|
123
|
+
|
|
124
|
+
@no_type_check
|
|
125
|
+
def run(self, arguments=None):
|
|
126
|
+
"""Execute the tool.
|
|
127
|
+
|
|
128
|
+
The default BaseTool implementation accepts an optional arguments
|
|
129
|
+
mapping to align with most concrete tool implementations which expect
|
|
130
|
+
a dictionary of inputs.
|
|
131
|
+
"""
|
|
9
132
|
|
|
10
133
|
def check_function_call(self, function_call_json):
|
|
11
134
|
if isinstance(function_call_json, str):
|
|
@@ -22,11 +145,11 @@ class BaseTool:
|
|
|
22
145
|
list: List of required parameters for the given endpoint.
|
|
23
146
|
"""
|
|
24
147
|
required_params = []
|
|
25
|
-
parameters = self.tool_config.get(
|
|
148
|
+
parameters = self.tool_config.get("parameter", {}).get("properties", {})
|
|
26
149
|
|
|
27
150
|
# Check each parameter to see if it is required
|
|
28
151
|
for param, details in parameters.items():
|
|
29
|
-
if details.get(
|
|
152
|
+
if details.get("required", False):
|
|
30
153
|
required_params.append(param.lower())
|
|
31
154
|
|
|
32
|
-
return required_params
|
|
155
|
+
return required_params
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pprint
|
|
3
|
+
import subprocess
|
|
4
|
+
import tempfile
|
|
5
|
+
import yaml
|
|
6
|
+
import json
|
|
7
|
+
import shutil
|
|
8
|
+
from .base_tool import BaseTool
|
|
9
|
+
from .tool_registry import register_tool
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_tool("Boltz2DockingTool")
|
|
13
|
+
class Boltz2DockingTool(BaseTool):
|
|
14
|
+
"""
|
|
15
|
+
Tool to perform protein-ligand docking and affinity prediction using the local Boltz-2 model.
|
|
16
|
+
This tool constructs a YAML input file, runs the `boltz predict` command,
|
|
17
|
+
and parses the output to return the predicted structure and affinity.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, tool_config: dict):
|
|
21
|
+
"""
|
|
22
|
+
Initializes the BoltzDockingTool.
|
|
23
|
+
Checks if the 'boltz' command is available in the system's PATH.
|
|
24
|
+
"""
|
|
25
|
+
super().__init__(tool_config)
|
|
26
|
+
if not shutil.which("boltz"):
|
|
27
|
+
raise EnvironmentError(
|
|
28
|
+
"The 'boltz' command is not found. "
|
|
29
|
+
"Please ensure the 'boltz' package is installed and accessible in the system's PATH. "
|
|
30
|
+
"Installation guide: https://github.com/jwohlwend/boltz"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def _build_yaml_input(self, arguments: dict) -> dict:
|
|
34
|
+
"""Constructs the YAML data structure for the Boltz input."""
|
|
35
|
+
protein_sequence = arguments.get("protein_sequence")
|
|
36
|
+
ligands = arguments.get("ligands", [])
|
|
37
|
+
|
|
38
|
+
# The first ligand is assumed to be the binder for affinity prediction
|
|
39
|
+
if not ligands:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"At least one ligand must be provided in the 'ligands' list."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
binder_id = ligands[0].get("id")
|
|
45
|
+
if not binder_id:
|
|
46
|
+
raise ValueError("The first ligand in the list must have a valid 'id'.")
|
|
47
|
+
|
|
48
|
+
# --- Sequences Section ---
|
|
49
|
+
sequences = [{"protein": {"id": "A", "sequence": protein_sequence}}]
|
|
50
|
+
|
|
51
|
+
for i, ligand_data in enumerate(ligands):
|
|
52
|
+
chain_id = ligand_data.get("id")
|
|
53
|
+
if not chain_id:
|
|
54
|
+
raise ValueError(f"Ligand at index {i} must have an 'id' key.")
|
|
55
|
+
|
|
56
|
+
entry = {"id": chain_id}
|
|
57
|
+
if "smiles" in ligand_data:
|
|
58
|
+
entry["smiles"] = ligand_data["smiles"]
|
|
59
|
+
elif "ccd" in ligand_data:
|
|
60
|
+
entry["ccd"] = ligand_data["ccd"]
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"Ligand at index {i} must have a 'smiles' or 'ccd' key."
|
|
64
|
+
)
|
|
65
|
+
sequences.append({"ligand": entry})
|
|
66
|
+
|
|
67
|
+
# --- Properties Section (for Affinity) ---
|
|
68
|
+
properties = [{"affinity": {"binder": binder_id}}]
|
|
69
|
+
|
|
70
|
+
# --- Final YAML Structure ---
|
|
71
|
+
yaml_input = {"version": 1, "sequences": sequences, "properties": properties}
|
|
72
|
+
|
|
73
|
+
# Add optional fields
|
|
74
|
+
if "constraints" in arguments:
|
|
75
|
+
yaml_input["constraints"] = arguments["constraints"]
|
|
76
|
+
if "templates" in arguments:
|
|
77
|
+
yaml_input["templates"] = arguments["templates"]
|
|
78
|
+
|
|
79
|
+
return yaml_input
|
|
80
|
+
|
|
81
|
+
def run(self, arguments: dict | None = None, timeout: int = 1200) -> dict:
|
|
82
|
+
"""
|
|
83
|
+
Executes the Boltz prediction.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
arguments (dict): A dictionary containing the necessary inputs.
|
|
87
|
+
- protein_sequence (str): The amino acid sequence of the protein.
|
|
88
|
+
- ligands (list[dict]): A list of ligands, each with a 'smiles' or 'ccd' key.
|
|
89
|
+
- constraints (list[dict], optional): Covalent bonds or other constraints.
|
|
90
|
+
- templates (list[dict], optional): Structural templates.
|
|
91
|
+
- other optional boltz CLI flags (e.g., 'recycling_steps').
|
|
92
|
+
timeout (int): The maximum time in seconds to wait for the Boltz command to complete.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
dict: A dictionary containing the path to the predicted structure and affinity data, or an error.
|
|
96
|
+
"""
|
|
97
|
+
arguments = arguments or {}
|
|
98
|
+
if not arguments.get("protein_sequence"):
|
|
99
|
+
return {"error": "The 'protein_sequence' parameter is required."}
|
|
100
|
+
|
|
101
|
+
# Create a temporary directory to store input and output files
|
|
102
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
103
|
+
input_filename = "boltz_input"
|
|
104
|
+
input_yaml_path = os.path.join(temp_dir, f"{input_filename}.yaml")
|
|
105
|
+
output_dir = os.path.join(temp_dir, "results")
|
|
106
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
107
|
+
|
|
108
|
+
# Build and write the input YAML file
|
|
109
|
+
yaml_data = self._build_yaml_input(arguments)
|
|
110
|
+
with open(input_yaml_path, "w") as f:
|
|
111
|
+
yaml.dump(yaml_data, f, sort_keys=False)
|
|
112
|
+
|
|
113
|
+
# Construct the command-line arguments for Boltz
|
|
114
|
+
command = [
|
|
115
|
+
"boltz",
|
|
116
|
+
"predict",
|
|
117
|
+
input_yaml_path,
|
|
118
|
+
"--out_dir",
|
|
119
|
+
output_dir,
|
|
120
|
+
"--use_msa_server",
|
|
121
|
+
"--override", # Override existing results if any
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
# Add optional command-line flags from arguments
|
|
125
|
+
for key in [
|
|
126
|
+
"recycling_steps",
|
|
127
|
+
"diffusion_samples",
|
|
128
|
+
"sampling_steps",
|
|
129
|
+
"step_scale",
|
|
130
|
+
]:
|
|
131
|
+
if key in arguments:
|
|
132
|
+
command.extend([f"--{key}", str(arguments[key])])
|
|
133
|
+
|
|
134
|
+
if arguments.get("use_potentials", False):
|
|
135
|
+
command.append("--use_potentials")
|
|
136
|
+
|
|
137
|
+
# Execute the Boltz command
|
|
138
|
+
subprocess.run(
|
|
139
|
+
command,
|
|
140
|
+
capture_output=True,
|
|
141
|
+
text=True,
|
|
142
|
+
timeout=timeout,
|
|
143
|
+
check=True, # Will raise CalledProcessError on non-zero exit codes
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# --- Parse the output files ---
|
|
147
|
+
# 1. locate the Boltz run folder under your out_dir
|
|
148
|
+
root_dirs = [
|
|
149
|
+
d
|
|
150
|
+
for d in os.listdir(output_dir)
|
|
151
|
+
if os.path.isdir(os.path.join(output_dir, d))
|
|
152
|
+
]
|
|
153
|
+
if not root_dirs:
|
|
154
|
+
return {"error": "No Boltz run folder found under out_dir"}
|
|
155
|
+
if len(root_dirs) > 1:
|
|
156
|
+
# you could pick the latest by timestamp instead of the first
|
|
157
|
+
run_dir_name = sorted(root_dirs)[-1]
|
|
158
|
+
else:
|
|
159
|
+
run_dir_name = root_dirs[0]
|
|
160
|
+
|
|
161
|
+
run_root = os.path.join(output_dir, run_dir_name)
|
|
162
|
+
|
|
163
|
+
# 2. now point at predictions/<input_filename>
|
|
164
|
+
prediction_folder = os.path.join(run_root, "predictions", input_filename)
|
|
165
|
+
results = {}
|
|
166
|
+
|
|
167
|
+
# 3. structure .cif
|
|
168
|
+
if arguments.get("return_structure", False):
|
|
169
|
+
structure_file = os.path.join(
|
|
170
|
+
prediction_folder, f"{input_filename}_model_0.cif"
|
|
171
|
+
)
|
|
172
|
+
if os.path.exists(structure_file):
|
|
173
|
+
with open(structure_file, "r") as f:
|
|
174
|
+
results["predicted_structure"] = f.read()
|
|
175
|
+
results["structure_format"] = "cif"
|
|
176
|
+
else:
|
|
177
|
+
results["structure_error"] = (
|
|
178
|
+
f"Missing {os.path.basename(structure_file)}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# 4. affinity .json
|
|
182
|
+
affinity_file = os.path.join(
|
|
183
|
+
prediction_folder, f"affinity_{input_filename}.json"
|
|
184
|
+
)
|
|
185
|
+
if os.path.exists(affinity_file):
|
|
186
|
+
with open(affinity_file, "r") as f:
|
|
187
|
+
results["affinity_prediction"] = json.load(f)
|
|
188
|
+
else:
|
|
189
|
+
results["affinity_error"] = f"Missing {os.path.basename(affinity_file)}"
|
|
190
|
+
|
|
191
|
+
return results
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
if __name__ == "__main__":
|
|
195
|
+
# Example usage
|
|
196
|
+
tool = Boltz2DockingTool(tool_config={})
|
|
197
|
+
query = {
|
|
198
|
+
"protein_sequence": "ACDEFGHIKLMNPQRSTVWY",
|
|
199
|
+
"ligands": [
|
|
200
|
+
{"id": "LIG1", "smiles": "C1=CC=CC=C1"},
|
|
201
|
+
],
|
|
202
|
+
"use_potentials": False,
|
|
203
|
+
"diffusion_samples": 1,
|
|
204
|
+
"return_structure": False,
|
|
205
|
+
}
|
|
206
|
+
result = tool.run(query)
|
|
207
|
+
pprint.pprint(result)
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from urllib.parse import quote
|
|
3
|
+
|
|
4
|
+
# from rdkit import Chem
|
|
5
|
+
from .base_tool import BaseTool
|
|
6
|
+
from .tool_registry import register_tool
|
|
7
|
+
from indigo import Indigo
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_tool("ChEMBLTool")
|
|
11
|
+
class ChEMBLTool(BaseTool):
|
|
12
|
+
"""
|
|
13
|
+
Tool to search for molecules similar to a given compound name or SMILES using the ChEMBL Web Services API.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, tool_config, base_url="https://www.ebi.ac.uk/chembl/api/data"):
|
|
17
|
+
super().__init__(tool_config)
|
|
18
|
+
self.base_url = base_url
|
|
19
|
+
self.indigo = Indigo()
|
|
20
|
+
|
|
21
|
+
def run(self, arguments):
|
|
22
|
+
query = arguments.get("query")
|
|
23
|
+
similarity_threshold = arguments.get("similarity_threshold", 80)
|
|
24
|
+
max_results = arguments.get("max_results", 20)
|
|
25
|
+
|
|
26
|
+
if not query:
|
|
27
|
+
return {"error": "`query` parameter is required."}
|
|
28
|
+
return self._search_similar_molecules(query, similarity_threshold, max_results)
|
|
29
|
+
|
|
30
|
+
def get_chembl_id_by_name(self, compound_name):
|
|
31
|
+
"""
|
|
32
|
+
Search ChEMBL for a compound by name and return the ChEMBL ID of the first match.
|
|
33
|
+
"""
|
|
34
|
+
headers = {"Accept": "application/json"}
|
|
35
|
+
search_url = f"{self.base_url}/molecule/search.json?q={quote(compound_name)}"
|
|
36
|
+
print(search_url)
|
|
37
|
+
response = requests.get(search_url, headers=headers)
|
|
38
|
+
response.raise_for_status()
|
|
39
|
+
results = response.json().get("molecules", [])
|
|
40
|
+
if not results or not isinstance(results, list):
|
|
41
|
+
return {"error": "No valid results found for the compound name."}
|
|
42
|
+
if not results:
|
|
43
|
+
return {"error": "No results found for the compound name."}
|
|
44
|
+
top_molecules = results[:3] # Get the top 3 results
|
|
45
|
+
chembl_ids = [
|
|
46
|
+
molecule.get("molecule_chembl_id")
|
|
47
|
+
for molecule in top_molecules
|
|
48
|
+
if molecule.get("molecule_chembl_id")
|
|
49
|
+
]
|
|
50
|
+
if not chembl_ids:
|
|
51
|
+
return {"error": "No ChEMBL IDs found for the compound name."}
|
|
52
|
+
return {"chembl_ids": chembl_ids}
|
|
53
|
+
|
|
54
|
+
def get_smiles_pref_name_by_chembl_id(self, query):
|
|
55
|
+
"""
|
|
56
|
+
Given a ChEMBL ID, return a dict with canonical SMILES and preferred name.
|
|
57
|
+
"""
|
|
58
|
+
headers = {"Accept": "application/json"}
|
|
59
|
+
if query.upper().startswith("CHEMBL"):
|
|
60
|
+
molecule_url = f"{self.base_url}/molecule/{quote(query)}.json"
|
|
61
|
+
response = requests.get(molecule_url, headers=headers)
|
|
62
|
+
response.raise_for_status()
|
|
63
|
+
molecule = response.json()
|
|
64
|
+
if not molecule or not isinstance(molecule, dict):
|
|
65
|
+
return {"error": "No valid molecule found for the given ChEMBL ID."}
|
|
66
|
+
molecule_structures = molecule.get("molecule_structures")
|
|
67
|
+
if not molecule_structures or not isinstance(molecule_structures, dict):
|
|
68
|
+
return {
|
|
69
|
+
"error": "Molecule structures not found or invalid for the ChEMBL ID."
|
|
70
|
+
}
|
|
71
|
+
smiles = molecule_structures.get("canonical_smiles")
|
|
72
|
+
pref_name = molecule.get("pref_name")
|
|
73
|
+
if not smiles:
|
|
74
|
+
return {"error": "SMILES not found for the given ChEMBL ID."}
|
|
75
|
+
return {"smiles": smiles, "pref_name": pref_name}
|
|
76
|
+
else:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def get_chembl_smiles_pref_name_id_by_name(self, compound_name):
|
|
80
|
+
"""
|
|
81
|
+
Search ChEMBL for a compound by name and return a list of dicts with ChEMBL ID, canonical SMILES, and preferred name for the top 5 matches.
|
|
82
|
+
"""
|
|
83
|
+
headers = {"Accept": "application/json"}
|
|
84
|
+
search_url = f"{self.base_url}/molecule/search.json?q={quote(compound_name)}"
|
|
85
|
+
response = requests.get(search_url, headers=headers)
|
|
86
|
+
response.raise_for_status()
|
|
87
|
+
results = response.json().get("molecules", [])
|
|
88
|
+
if not results or not isinstance(results, list):
|
|
89
|
+
return {"error": "No valid results found for the compound name."}
|
|
90
|
+
top_molecules = results[:5]
|
|
91
|
+
output = []
|
|
92
|
+
for molecule in top_molecules:
|
|
93
|
+
chembl_id = molecule.get("molecule_chembl_id", None)
|
|
94
|
+
molecule_structures = molecule.get("molecule_structures", {})
|
|
95
|
+
if molecule_structures is not None:
|
|
96
|
+
smiles = molecule_structures.get("canonical_smiles", None)
|
|
97
|
+
else:
|
|
98
|
+
smiles = None
|
|
99
|
+
pref_name = molecule.get("pref_name")
|
|
100
|
+
if chembl_id and smiles:
|
|
101
|
+
output.append(
|
|
102
|
+
{"chembl_id": chembl_id, "smiles": smiles, "pref_name": pref_name}
|
|
103
|
+
)
|
|
104
|
+
elif chembl_id and not smiles:
|
|
105
|
+
smiles_pre_name_dict = self.get_smiles_pref_name_by_chembl_id(chembl_id)
|
|
106
|
+
if (
|
|
107
|
+
isinstance(smiles_pre_name_dict, dict)
|
|
108
|
+
and "error" not in smiles_pre_name_dict
|
|
109
|
+
):
|
|
110
|
+
output.append(
|
|
111
|
+
{
|
|
112
|
+
"chembl_id": chembl_id,
|
|
113
|
+
"smiles": smiles_pre_name_dict["smiles"],
|
|
114
|
+
"pref_name": smiles_pre_name_dict.get("pref_name"),
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
if not output:
|
|
118
|
+
return {"error": "No ChEMBL IDs or SMILES found for the compound name."}
|
|
119
|
+
return output
|
|
120
|
+
|
|
121
|
+
def _search_similar_molecules(self, query, similarity_threshold, max_results):
|
|
122
|
+
headers = {"Accept": "application/json"}
|
|
123
|
+
|
|
124
|
+
smiles_info_list = []
|
|
125
|
+
|
|
126
|
+
# If the query looks like a ChEMBL ID, fetch its SMILES and pref_name
|
|
127
|
+
if isinstance(query, str) and query.upper().startswith("CHEMBL"):
|
|
128
|
+
result = self.get_smiles_pref_name_by_chembl_id(query)
|
|
129
|
+
if isinstance(result, dict) and "error" in result:
|
|
130
|
+
return result
|
|
131
|
+
smiles_info_list.append(
|
|
132
|
+
{
|
|
133
|
+
"chembl_id": query,
|
|
134
|
+
"smiles": result["smiles"],
|
|
135
|
+
"pref_name": result.get("pref_name"),
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# If not a ChEMBL ID, use get_chembl_smiles_pref_name_id_by_name to get info
|
|
140
|
+
if len(smiles_info_list) == 0 and isinstance(query, str):
|
|
141
|
+
results = self.get_chembl_smiles_pref_name_id_by_name(query)
|
|
142
|
+
if isinstance(results, dict) and "error" in results:
|
|
143
|
+
return results
|
|
144
|
+
for item in results:
|
|
145
|
+
smiles_info_list.append(item)
|
|
146
|
+
|
|
147
|
+
if len(smiles_info_list) == 0:
|
|
148
|
+
return {"error": "SMILES representation not found for the compound."}
|
|
149
|
+
|
|
150
|
+
results_list = []
|
|
151
|
+
for info in smiles_info_list:
|
|
152
|
+
smiles = info["smiles"]
|
|
153
|
+
pref_name = info.get("pref_name")
|
|
154
|
+
chembl_id = info.get("chembl_id")
|
|
155
|
+
mol = self.indigo.loadMolecule(smiles)
|
|
156
|
+
if mol is None:
|
|
157
|
+
return {"error": "Failed to load molecule with Indigo."}
|
|
158
|
+
|
|
159
|
+
encoded_smiles = quote(smiles)
|
|
160
|
+
similarity_url = f"{self.base_url}/similarity/{encoded_smiles}/{similarity_threshold}.json?limit={max_results}"
|
|
161
|
+
sim_response = requests.get(similarity_url, headers=headers)
|
|
162
|
+
sim_response.raise_for_status()
|
|
163
|
+
sim_results = sim_response.json().get("molecules", [])
|
|
164
|
+
similar_molecules = []
|
|
165
|
+
for mol in sim_results:
|
|
166
|
+
sim_chembl_id = mol.get("molecule_chembl_id")
|
|
167
|
+
sim_pref_name = mol.get("pref_name", "N/A")
|
|
168
|
+
mol_structures = mol.get("molecule_structures", {})
|
|
169
|
+
if mol_structures is None:
|
|
170
|
+
continue
|
|
171
|
+
mol_smiles = mol_structures.get("canonical_smiles", "N/A")
|
|
172
|
+
similarity = mol.get("similarity", "N/A")
|
|
173
|
+
similar_molecules.append(
|
|
174
|
+
{
|
|
175
|
+
"chembl_id": sim_chembl_id,
|
|
176
|
+
"pref_name": sim_pref_name,
|
|
177
|
+
"smiles": mol_smiles,
|
|
178
|
+
"similarity": similarity,
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
if len(similar_molecules) == 0:
|
|
182
|
+
continue
|
|
183
|
+
results_list.append(
|
|
184
|
+
{
|
|
185
|
+
"chembl_id": chembl_id,
|
|
186
|
+
"pref_name": pref_name,
|
|
187
|
+
"smiles": smiles,
|
|
188
|
+
"similar_molecules": similar_molecules,
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return results_list
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Initialize the compose_scripts package
|