tooluniverse 0.1.4__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clait_tools.json +108 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +544 -168
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +82 -58
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/software_tools.json +4954 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.0.dist-info/METADATA +377 -0
- tooluniverse-1.0.0.dist-info/RECORD +186 -0
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +1 -1
- tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
- tooluniverse-0.1.4.dist-info/METADATA +0 -141
- tooluniverse-0.1.4.dist-info/RECORD +0 -18
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from fastmcp import FastMCP
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
from tooluniverse.boltz_tool import Boltz2DockingTool
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
# Read the tool config dicts from the JSON file
|
|
8
|
+
try:
|
|
9
|
+
with open(
|
|
10
|
+
os.path.join(os.path.dirname(__file__), "boltz_client_tools.json"), "r"
|
|
11
|
+
) as f:
|
|
12
|
+
boltz_tools = json.load(f)
|
|
13
|
+
except FileNotFoundError as e:
|
|
14
|
+
print(f"\033[91mError: {e}\033[0m")
|
|
15
|
+
print(
|
|
16
|
+
f"\033[91mIs boltz_client_tools.json in the parent directory of {__file__}?\033[0m"
|
|
17
|
+
)
|
|
18
|
+
sys.exit(1)
|
|
19
|
+
|
|
20
|
+
server = FastMCP("Your MCP Server", stateless_http=True)
|
|
21
|
+
agents = {}
|
|
22
|
+
for tool_config in boltz_tools:
|
|
23
|
+
agents[tool_config["name"]] = Boltz2DockingTool(tool_config=tool_config)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@server.tool()
|
|
27
|
+
def run_boltz2(query: dict):
|
|
28
|
+
"""Run the Boltz2 docking tool.
|
|
29
|
+
Args:
|
|
30
|
+
"query" dict: A dictionary containing:
|
|
31
|
+
- protein_sequence (str): Protein sequence using single-letter amino acid codes
|
|
32
|
+
- ligands (list): List of ligand dictionaries, each containing:
|
|
33
|
+
- id (str): Unique identifier for the ligand
|
|
34
|
+
- smiles (str): SMILES representation of the ligand molecule
|
|
35
|
+
- without_potentials (bool): Whether to run without potentials (default: False)
|
|
36
|
+
- diffusion_samples (int): Number of diffusion samples to generate (default: 1)
|
|
37
|
+
- Additional constraint keys may be included as needed
|
|
38
|
+
Returns:
|
|
39
|
+
dict: A dictionary containing the docking results with the following structure:
|
|
40
|
+
- predicted_structure (str): The predicted protein-ligand complex structure in CIF format
|
|
41
|
+
- structure_format (str): Format of the structure file (typically 'cif')
|
|
42
|
+
- structure_error (str): Error message if structure file is missing
|
|
43
|
+
- affinity_prediction (object): JSON object containing affinity predictions and related data
|
|
44
|
+
- affinity_error (str): Error message if affinity file is missing
|
|
45
|
+
"""
|
|
46
|
+
return agents["boltz2_docking"].run(query)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
server.run(transport="streamable-http", host="0.0.0.0", port=8080)
|
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DepMap Gene Correlation Tool - MCP Server
|
|
3
|
+
|
|
4
|
+
This module provides an MCP (Model Context Protocol) server for analyzing gene-gene
|
|
5
|
+
correlations from the DepMap (Dependency Map) dataset. The tool processes CRISPR
|
|
6
|
+
knockout screening data from cancer cell lines to identify genetic dependencies
|
|
7
|
+
and synthetic lethal relationships.
|
|
8
|
+
|
|
9
|
+
The DepMap dataset contains systematic CRISPR-Cas9 knockout screens across over
|
|
10
|
+
1,000 cancer cell lines, providing insights into essential genes and genetic
|
|
11
|
+
interactions that can inform therapeutic target discovery and drug development.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import os
|
|
16
|
+
import h5py
|
|
17
|
+
import asyncio
|
|
18
|
+
import uuid
|
|
19
|
+
from typing import Dict, Optional
|
|
20
|
+
from fastmcp import FastMCP
|
|
21
|
+
|
|
22
|
+
# Initialize MCP Server for DepMap gene correlation analysis
|
|
23
|
+
server = FastMCP("DepMap Gene Correlation SMCP Server")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DepmapCorrelationTool:
|
|
27
|
+
"""
|
|
28
|
+
Comprehensive tool for analyzing gene-gene correlations from DepMap CRISPR knockout data.
|
|
29
|
+
|
|
30
|
+
This class provides functionality to:
|
|
31
|
+
- Load and index large-scale DepMap correlation matrices
|
|
32
|
+
- Perform efficient gene-gene correlation lookups
|
|
33
|
+
- Access statistical significance metrics (p-values, adjusted p-values)
|
|
34
|
+
- Handle both dense and sparse matrix formats for scalability
|
|
35
|
+
|
|
36
|
+
The tool processes DepMap 24Q2 data containing CRISPR knockout effects across
|
|
37
|
+
1,320+ cancer cell lines, enabling identification of genetic dependencies,
|
|
38
|
+
synthetic lethal relationships, and co-essential gene pairs.
|
|
39
|
+
|
|
40
|
+
Supports multiple data formats:
|
|
41
|
+
- Dense matrices (.npy files) for smaller datasets
|
|
42
|
+
- Sparse matrices (HDF5) for memory-efficient large-scale analysis
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, data_dir: Optional[str] = None):
|
|
46
|
+
"""
|
|
47
|
+
Initialize the DepMap correlation tool by loading preprocessed correlation data.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
data_dir (str, optional): Path to directory containing DepMap correlation matrices.
|
|
51
|
+
If None, uses DEPMAP_DATA_PATH/depmap_24q2.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
FileNotFoundError: If the specified data directory or required files cannot be found.
|
|
55
|
+
Exception: If data loading fails due to format issues or corruption.
|
|
56
|
+
"""
|
|
57
|
+
# Construct data directory path
|
|
58
|
+
if data_dir is None:
|
|
59
|
+
depmap_data_path = os.getenv("DEPMAP_DATA_PATH", "")
|
|
60
|
+
self.data_dir = os.path.join(depmap_data_path, "depmap_24q2")
|
|
61
|
+
else:
|
|
62
|
+
self.data_dir = data_dir
|
|
63
|
+
|
|
64
|
+
# Validate data directory exists
|
|
65
|
+
if not os.path.exists(self.data_dir):
|
|
66
|
+
raise FileNotFoundError(
|
|
67
|
+
f"DepMap data directory not found at {self.data_dir}. Please check your DEPMAP_DATA_PATH."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Load DepMap correlation data and gene indices
|
|
71
|
+
print(f"Initializing DepMap correlation tool from data: {self.data_dir}...")
|
|
72
|
+
self._load_gene_index()
|
|
73
|
+
self._load_correlation_data()
|
|
74
|
+
print(
|
|
75
|
+
f"DepMap tool initialized successfully (loaded correlation data for {self.num_genes:,} genes)."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def _load_gene_index(self):
|
|
79
|
+
"""
|
|
80
|
+
Load gene symbol index and create efficient lookup mapping.
|
|
81
|
+
|
|
82
|
+
This method loads the gene names from either a preprocessed numpy array
|
|
83
|
+
or a text file, then creates a dictionary mapping for O(1) gene lookups.
|
|
84
|
+
The gene index is essential for translating gene symbols to matrix indices.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
FileNotFoundError: If neither gene index file format is found.
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
# Try loading preprocessed gene index array first (faster)
|
|
91
|
+
gene_idx_path = os.path.join(self.data_dir, "gene_idx_array.npy")
|
|
92
|
+
if os.path.exists(gene_idx_path):
|
|
93
|
+
self.gene_names = np.load(
|
|
94
|
+
gene_idx_path, allow_pickle=True, mmap_mode="r"
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
# Fallback to text file format
|
|
98
|
+
gene_names_path = os.path.join(self.data_dir, "gene_names.txt")
|
|
99
|
+
with open(gene_names_path, "r") as f:
|
|
100
|
+
self.gene_names = np.array([line.strip() for line in f])
|
|
101
|
+
|
|
102
|
+
# Create bidirectional mapping for efficient gene symbol lookups
|
|
103
|
+
self.gene_to_idx = {gene: idx for idx, gene in enumerate(self.gene_names)}
|
|
104
|
+
self.num_genes = len(self.gene_names)
|
|
105
|
+
|
|
106
|
+
except FileNotFoundError:
|
|
107
|
+
raise FileNotFoundError(
|
|
108
|
+
f"Gene names file not found in {self.data_dir}. Expected 'gene_idx_array.npy' or 'gene_names.txt'."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def _load_correlation_data(self):
|
|
112
|
+
"""
|
|
113
|
+
Load correlation matrices and statistical data in optimal format.
|
|
114
|
+
|
|
115
|
+
This method automatically detects and loads correlation data from either:
|
|
116
|
+
1. Dense numpy matrices (.npy files) - faster for smaller datasets
|
|
117
|
+
2. Sparse HDF5 format - memory-efficient for large-scale data
|
|
118
|
+
|
|
119
|
+
The method also checks for adjusted p-values (FDR correction) availability
|
|
120
|
+
and configures the appropriate data access methods.
|
|
121
|
+
|
|
122
|
+
Raises:
|
|
123
|
+
FileNotFoundError: If no correlation data files are found in the expected formats.
|
|
124
|
+
"""
|
|
125
|
+
# Check for dense matrix format first
|
|
126
|
+
corr_matrix_path = os.path.join(self.data_dir, "corr_matrix.npy")
|
|
127
|
+
p_val_matrix_path = os.path.join(self.data_dir, "p_val_matrix.npy")
|
|
128
|
+
|
|
129
|
+
if os.path.exists(corr_matrix_path) and os.path.exists(p_val_matrix_path):
|
|
130
|
+
# Load dense matrices with memory mapping for efficiency
|
|
131
|
+
self.corr_matrix = np.load(corr_matrix_path, mmap_mode="r")
|
|
132
|
+
self.p_val_matrix = np.load(p_val_matrix_path, mmap_mode="r")
|
|
133
|
+
self.format = "dense"
|
|
134
|
+
|
|
135
|
+
# Check for FDR-adjusted p-values
|
|
136
|
+
p_adj_matrix_path = os.path.join(self.data_dir, "p_adj_matrix.npy")
|
|
137
|
+
if os.path.exists(p_adj_matrix_path):
|
|
138
|
+
self.p_adj_matrix = np.load(p_adj_matrix_path, mmap_mode="r")
|
|
139
|
+
self.has_adj_p = True
|
|
140
|
+
else:
|
|
141
|
+
self.has_adj_p = False
|
|
142
|
+
else:
|
|
143
|
+
# Fallback to sparse HDF5 format for large datasets
|
|
144
|
+
h5_path = os.path.join(self.data_dir, "gene_correlations.h5")
|
|
145
|
+
if not os.path.exists(h5_path):
|
|
146
|
+
raise FileNotFoundError(
|
|
147
|
+
f"No correlation data found in {self.data_dir}. Expected either dense (.npy) or sparse (.h5) format."
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
self.h5_file = h5py.File(h5_path, "r")
|
|
151
|
+
self.format = "sparse"
|
|
152
|
+
self.has_adj_p = "p_adj" in self.h5_file
|
|
153
|
+
|
|
154
|
+
def get_correlation(self, gene_a: str, gene_b: str) -> Dict[str, float]:
|
|
155
|
+
"""
|
|
156
|
+
Retrieve correlation coefficient and statistical significance between two genes.
|
|
157
|
+
|
|
158
|
+
This method performs efficient lookup of gene-gene correlations from DepMap
|
|
159
|
+
CRISPR knockout data, providing both correlation strength and statistical
|
|
160
|
+
significance metrics for genetic interaction analysis.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
gene_a (str): First gene symbol (e.g., 'BRAF', 'TP53'). Must be present in dataset.
|
|
164
|
+
gene_b (str): Second gene symbol (e.g., 'MAPK1', 'MDM2'). Must be present in dataset.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Dict[str, float]: Dictionary containing correlation analysis results:
|
|
168
|
+
- 'correlation': Pearson correlation coefficient (-1.0 to 1.0)
|
|
169
|
+
- 'p_value': Statistical significance of correlation
|
|
170
|
+
- 'adjusted_p_value': FDR-corrected p-value (if available)
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
KeyError: If either gene symbol is not found in the correlation matrix.
|
|
174
|
+
"""
|
|
175
|
+
# Validate gene symbols exist in dataset
|
|
176
|
+
if gene_a not in self.gene_to_idx:
|
|
177
|
+
raise KeyError(
|
|
178
|
+
f"Gene '{gene_a}' not available in the DepMap correlation matrix. Check gene symbol spelling."
|
|
179
|
+
)
|
|
180
|
+
if gene_b not in self.gene_to_idx:
|
|
181
|
+
raise KeyError(
|
|
182
|
+
f"Gene '{gene_b}' not available in the DepMap correlation matrix. Check gene symbol spelling."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Convert gene symbols to matrix indices
|
|
186
|
+
idx_a = self.gene_to_idx[gene_a]
|
|
187
|
+
idx_b = self.gene_to_idx[gene_b]
|
|
188
|
+
|
|
189
|
+
if self.format == "dense":
|
|
190
|
+
# Direct matrix access for dense format
|
|
191
|
+
result = {
|
|
192
|
+
"correlation": float(self.corr_matrix[idx_a, idx_b]),
|
|
193
|
+
"p_value": float(self.p_val_matrix[idx_a, idx_b]),
|
|
194
|
+
}
|
|
195
|
+
if self.has_adj_p:
|
|
196
|
+
result["adjusted_p_value"] = float(self.p_adj_matrix[idx_a, idx_b])
|
|
197
|
+
else: # sparse format
|
|
198
|
+
|
|
199
|
+
def get_csr_value(group, row, col):
|
|
200
|
+
"""Extract value from compressed sparse row matrix in HDF5 format."""
|
|
201
|
+
indptr, indices, data = (
|
|
202
|
+
group["indptr"][:],
|
|
203
|
+
group["indices"][:],
|
|
204
|
+
group["data"][:],
|
|
205
|
+
)
|
|
206
|
+
for i in range(indptr[row], indptr[row + 1]):
|
|
207
|
+
if indices[i] == col:
|
|
208
|
+
return float(data[i])
|
|
209
|
+
return 0.0 # Return 0 for missing values in sparse matrix
|
|
210
|
+
|
|
211
|
+
# Extract correlation data from sparse HDF5 format
|
|
212
|
+
result = {
|
|
213
|
+
"correlation": get_csr_value(self.h5_file["corr"], idx_a, idx_b),
|
|
214
|
+
"p_value": get_csr_value(self.h5_file["p_val"], idx_a, idx_b),
|
|
215
|
+
}
|
|
216
|
+
if self.has_adj_p:
|
|
217
|
+
result["adjusted_p_value"] = get_csr_value(
|
|
218
|
+
self.h5_file["p_adj"], idx_a, idx_b
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
def __del__(self):
|
|
224
|
+
"""
|
|
225
|
+
Clean up resources when the tool instance is destroyed.
|
|
226
|
+
|
|
227
|
+
Ensures proper closure of HDF5 file handles to prevent resource leaks
|
|
228
|
+
when using sparse matrix format.
|
|
229
|
+
"""
|
|
230
|
+
if (
|
|
231
|
+
hasattr(self, "format")
|
|
232
|
+
and self.format == "sparse"
|
|
233
|
+
and hasattr(self, "h5_file")
|
|
234
|
+
):
|
|
235
|
+
self.h5_file.close()
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@server.tool()
|
|
239
|
+
async def compute_depmap24q2_gene_correlations(
|
|
240
|
+
gene_a: str, gene_b: str, data_dir: Optional[str] = None
|
|
241
|
+
):
|
|
242
|
+
"""
|
|
243
|
+
MCP Tool: Analyzes gene-gene correlations from DepMap CRISPR knockout screening data.
|
|
244
|
+
|
|
245
|
+
This tool validates genetic interactions using empirical cell viability data from
|
|
246
|
+
1,320+ cancer cell lines in the DepMap 24Q2 dataset. It determines if two genes
|
|
247
|
+
have correlated knockout effects, providing insights into genetic dependencies
|
|
248
|
+
and synthetic lethal relationships.
|
|
249
|
+
|
|
250
|
+
Biological Interpretation:
|
|
251
|
+
- Positive correlations indicate co-dependency (genes with shared essential functions)
|
|
252
|
+
- Negative correlations suggest synthetic lethality (compensatory relationships)
|
|
253
|
+
- Strong correlations (|r| > 0.5) with statistical significance (adj_p < 0.05)
|
|
254
|
+
provide robust evidence for therapeutic targeting opportunities
|
|
255
|
+
|
|
256
|
+
Clinical Applications:
|
|
257
|
+
- Prioritize gene pairs for experimental validation
|
|
258
|
+
- Inform combination therapy development strategies
|
|
259
|
+
- Guide functional genomics studies
|
|
260
|
+
- Translate pathway predictions into clinically-actionable insights
|
|
261
|
+
- Identify potential drug targets and resistance mechanisms
|
|
262
|
+
|
|
263
|
+
Data Source:
|
|
264
|
+
- DepMap 24Q2 release containing CRISPR-Cas9 knockout screens
|
|
265
|
+
- Over 1,320 well-characterized cancer cell lines
|
|
266
|
+
- Standardized gene effect scores (CERES algorithm)
|
|
267
|
+
- Multiple statistical correction methods applied
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
gene_a (str): First gene symbol for correlation analysis (e.g., 'BRAF', 'TP53').
|
|
271
|
+
Must use standard HUGO gene nomenclature.
|
|
272
|
+
gene_b (str): Second gene symbol for correlation analysis (e.g., 'MAPK1', 'MDM2').
|
|
273
|
+
Must use standard HUGO gene nomenclature.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
dict: Comprehensive correlation analysis results containing:
|
|
277
|
+
- 'correlation_data' (dict): Statistical measures including:
|
|
278
|
+
* 'correlation': Pearson correlation coefficient (-1.0 to 1.0)
|
|
279
|
+
* 'p_value': Statistical significance of correlation
|
|
280
|
+
* 'adjusted_p_value': FDR-corrected p-value (when available)
|
|
281
|
+
- 'interpretation' (dict): Biological and statistical context including:
|
|
282
|
+
* 'strength': Descriptive correlation strength assessment
|
|
283
|
+
* 'significance': Statistical significance interpretation
|
|
284
|
+
* 'direction': Relationship type (similar vs opposing effects)
|
|
285
|
+
* 'summary': Comprehensive analysis summary
|
|
286
|
+
- 'context_info' (list): Detailed analysis messages and metadata
|
|
287
|
+
- 'error' (str, optional): Error description if analysis failed
|
|
288
|
+
|
|
289
|
+
Example Usage:
|
|
290
|
+
# Analyze BRAF-MAPK1 interaction (oncogenic pathway)
|
|
291
|
+
result = await compute_depmap24q2_gene_correlations("BRAF", "MAPK1")
|
|
292
|
+
|
|
293
|
+
# Check synthetic lethality between DNA repair genes
|
|
294
|
+
result = await compute_depmap24q2_gene_correlations("BRCA1", "PARP1")
|
|
295
|
+
"""
|
|
296
|
+
# Generate unique request ID for tracking and logging
|
|
297
|
+
request_id = str(uuid.uuid4())[:8]
|
|
298
|
+
print(
|
|
299
|
+
f"[{request_id}] Received DepMap gene correlation analysis request: {gene_a} vs {gene_b}"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
context_info = []
|
|
303
|
+
|
|
304
|
+
# Initialize global DepMap tool instance for MCP server
|
|
305
|
+
# This instance will be used by the MCP tool function to serve correlation queries
|
|
306
|
+
try:
|
|
307
|
+
depmap_tool = DepmapCorrelationTool(data_dir=data_dir)
|
|
308
|
+
print("DepMap Correlation tool instance created and ready for MCP server")
|
|
309
|
+
except Exception as e:
|
|
310
|
+
print(f"Error creating DepMap Correlation tool: {str(e)}")
|
|
311
|
+
print(
|
|
312
|
+
"Please ensure DEPMAP_DATA_PATH is correctly set and correlation data exists."
|
|
313
|
+
)
|
|
314
|
+
raise e
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
# Brief async pause to allow for proper request handling
|
|
318
|
+
await asyncio.sleep(0.1)
|
|
319
|
+
|
|
320
|
+
# Input validation and standardization
|
|
321
|
+
gene_a_std = gene_a.upper().strip()
|
|
322
|
+
gene_b_std = gene_b.upper().strip()
|
|
323
|
+
|
|
324
|
+
if not gene_a_std or not gene_b_std:
|
|
325
|
+
raise ValueError(
|
|
326
|
+
"Gene symbols cannot be empty. Please provide valid HUGO gene symbols."
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if gene_a_std == gene_b_std:
|
|
330
|
+
context_info.append(
|
|
331
|
+
f"Note: Analyzing self-correlation for gene {gene_a_std} (diagonal element)."
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
print(
|
|
335
|
+
f"[{request_id}] Processing correlation analysis for standardized genes: {gene_a_std} vs {gene_b_std}"
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Execute DepMap correlation lookup
|
|
339
|
+
corr_data = depmap_tool.get_correlation(gene_a_std, gene_b_std)
|
|
340
|
+
correlation = corr_data["correlation"]
|
|
341
|
+
p_value = corr_data["p_value"]
|
|
342
|
+
adj_p_value = corr_data.get("adjusted_p_value")
|
|
343
|
+
|
|
344
|
+
context_info.append(
|
|
345
|
+
"Successfully retrieved correlation data from DepMap 24Q2 dataset."
|
|
346
|
+
)
|
|
347
|
+
context_info.append(
|
|
348
|
+
"Analysis based on CRISPR knockout effects across 1,320+ cancer cell lines."
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Statistical and biological interpretation functions
|
|
352
|
+
def _interpret_strength(c):
|
|
353
|
+
"""Classify correlation strength based on absolute value."""
|
|
354
|
+
abs_c = abs(c)
|
|
355
|
+
if abs_c >= 0.8:
|
|
356
|
+
return "very strong"
|
|
357
|
+
if abs_c >= 0.6:
|
|
358
|
+
return "strong"
|
|
359
|
+
if abs_c >= 0.4:
|
|
360
|
+
return "moderate"
|
|
361
|
+
if abs_c >= 0.2:
|
|
362
|
+
return "weak"
|
|
363
|
+
return "negligible"
|
|
364
|
+
|
|
365
|
+
def _interpret_significance(p, adj_p):
|
|
366
|
+
"""Determine statistical significance with appropriate correction."""
|
|
367
|
+
if adj_p is not None and adj_p <= 0.05:
|
|
368
|
+
return "significant (FDR corrected)"
|
|
369
|
+
if adj_p is not None and adj_p <= 0.1:
|
|
370
|
+
return "marginally significant (FDR corrected)"
|
|
371
|
+
if p <= 0.05:
|
|
372
|
+
return "significant (uncorrected)"
|
|
373
|
+
if p <= 0.1:
|
|
374
|
+
return "marginally significant (uncorrected)"
|
|
375
|
+
return "not statistically significant"
|
|
376
|
+
|
|
377
|
+
def _interpret_biological_relationship(corr, strength):
|
|
378
|
+
"""Provide biological context for correlation direction and strength."""
|
|
379
|
+
if strength == "negligible":
|
|
380
|
+
return "independent knockout effects"
|
|
381
|
+
elif corr > 0:
|
|
382
|
+
return "co-dependent relationship (shared essential functions)"
|
|
383
|
+
else:
|
|
384
|
+
return "synthetic lethal relationship (compensatory functions)"
|
|
385
|
+
|
|
386
|
+
# Generate comprehensive interpretation
|
|
387
|
+
strength = _interpret_strength(correlation)
|
|
388
|
+
significance = _interpret_significance(p_value, adj_p_value)
|
|
389
|
+
direction = "similar" if correlation > 0 else "opposing"
|
|
390
|
+
biological_relationship = _interpret_biological_relationship(
|
|
391
|
+
correlation, strength
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
interpretation = {
|
|
395
|
+
"strength": strength,
|
|
396
|
+
"significance": significance,
|
|
397
|
+
"direction": direction,
|
|
398
|
+
"biological_relationship": biological_relationship,
|
|
399
|
+
"summary": f"DepMap analysis reveals a {strength}, {direction} correlation (r={correlation:.3f}) in knockout effects between {gene_a_std} and {gene_b_std}, suggesting {biological_relationship}. This finding is {significance}.",
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
# Log successful completion with key metrics
|
|
403
|
+
print(
|
|
404
|
+
f"[{request_id}] DepMap correlation analysis completed: r={correlation:.3f}, p={p_value:.2e}"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return {
|
|
408
|
+
"correlation_data": corr_data,
|
|
409
|
+
"interpretation": interpretation,
|
|
410
|
+
"context_info": context_info,
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
except (KeyError, ValueError, FileNotFoundError) as e:
|
|
414
|
+
error_message = f"DepMap correlation analysis validation error: {str(e)}"
|
|
415
|
+
print(f"[{request_id}] {error_message}")
|
|
416
|
+
return {
|
|
417
|
+
"error": error_message,
|
|
418
|
+
"context_info": context_info
|
|
419
|
+
+ ["Please verify gene symbols and data availability."],
|
|
420
|
+
}
|
|
421
|
+
except Exception as e:
|
|
422
|
+
error_message = f"Unexpected error during DepMap correlation analysis: {str(e)}"
|
|
423
|
+
print(f"[{request_id}] {error_message}")
|
|
424
|
+
return {
|
|
425
|
+
"error": error_message,
|
|
426
|
+
"context_info": context_info
|
|
427
|
+
+ ["Internal server error occurred during analysis."],
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
if __name__ == "__main__":
|
|
432
|
+
print("Starting MCP server for DepMap Gene Correlation Analysis Tool...")
|
|
433
|
+
print("Dataset: DepMap 24Q2 CRISPR knockout screening data")
|
|
434
|
+
print("Coverage: 1,320+ cancer cell lines with genetic dependency profiles")
|
|
435
|
+
print("Application: Genetic interaction analysis and synthetic lethality discovery")
|
|
436
|
+
print("Server: FastMCP with streamable HTTP transport")
|
|
437
|
+
print("Port: 7002 (configured to avoid conflicts with other biomedical tools)")
|
|
438
|
+
|
|
439
|
+
# Launch the MCP server with DepMap correlation analysis capabilities
|
|
440
|
+
server.run(
|
|
441
|
+
transport="streamable-http", host="0.0.0.0", port=7002, stateless_http=True
|
|
442
|
+
)
|