tooluniverse 0.1.4__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clait_tools.json +108 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +544 -168
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +82 -58
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/software_tools.json +4954 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.0.dist-info/METADATA +377 -0
- tooluniverse-1.0.0.dist-info/RECORD +186 -0
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +1 -1
- tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
- tooluniverse-0.1.4.dist-info/METADATA +0 -141
- tooluniverse-0.1.4.dist-info/RECORD +0 -18
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PINNACLE Protein-Protein Interaction Tool - MCP Server
|
|
3
|
+
|
|
4
|
+
This module provides an MCP (Model Context Protocol) server for retrieving
|
|
5
|
+
PINNACLE (Protein Interaction Network Contextualized Learning) embeddings.
|
|
6
|
+
PINNACLE generates cell-type-specific protein-protein interaction embeddings
|
|
7
|
+
that capture the functional relationships between proteins in different
|
|
8
|
+
cellular contexts.
|
|
9
|
+
|
|
10
|
+
The tool provides access to pre-computed PPI embeddings that can be used for:
|
|
11
|
+
- Drug target prediction and prioritization
|
|
12
|
+
- Disease mechanism analysis
|
|
13
|
+
- Protein function prediction
|
|
14
|
+
- Network-based biomarker discovery
|
|
15
|
+
- Systems biology research
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from fastmcp import FastMCP
|
|
19
|
+
import os
|
|
20
|
+
import asyncio
|
|
21
|
+
import uuid
|
|
22
|
+
import torch
|
|
23
|
+
from typing import Dict, Tuple, Optional
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Initialize MCP Server for PINNACLE PPI embedding retrieval
|
|
27
|
+
server = FastMCP("PINNACLE PPI SMCP Server")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PinnaclePPITool:
|
|
31
|
+
"""
|
|
32
|
+
Comprehensive tool for retrieving cell-type-specific protein-protein interaction embeddings.
|
|
33
|
+
|
|
34
|
+
This class provides functionality to:
|
|
35
|
+
- Load pre-computed PINNACLE PPI embeddings from PyTorch checkpoint files
|
|
36
|
+
- Perform flexible cell type matching with fuzzy string matching
|
|
37
|
+
- Retrieve embeddings for specific cellular contexts
|
|
38
|
+
- Handle multiple cell type naming conventions
|
|
39
|
+
|
|
40
|
+
PINNACLE embeddings encode protein interactions in a dense vector space,
|
|
41
|
+
where proteins with similar functional roles or interaction patterns
|
|
42
|
+
have similar embedding representations. These embeddings are contextualized
|
|
43
|
+
for specific cell types, capturing cell-type-specific interaction patterns.
|
|
44
|
+
|
|
45
|
+
The tool supports various cell types including immune cells, tissue-specific
|
|
46
|
+
cells, and disease-associated cellular contexts.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, embed_path: Optional[str] = None):
|
|
50
|
+
"""
|
|
51
|
+
Initialize the PINNACLE PPI tool by loading pre-computed embeddings.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
embed_path (str, optional): Path to the PINNACLE embeddings file (.pth format).
|
|
55
|
+
If None, uses PINNACLE_DATA_PATH/pinnacle_embeds/ppi_embed_dict.pth.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
FileNotFoundError: If the specified embeddings file cannot be found.
|
|
59
|
+
Exception: If embedding loading fails due to format issues or corruption.
|
|
60
|
+
"""
|
|
61
|
+
# Construct embeddings file path
|
|
62
|
+
if embed_path is None:
|
|
63
|
+
pinnacle_data_path = os.getenv("PINNACLE_DATA_PATH", "")
|
|
64
|
+
self.embed_path = os.path.join(
|
|
65
|
+
pinnacle_data_path, "pinnacle_embeds", "ppi_embed_dict.pth"
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
self.embed_path = embed_path
|
|
69
|
+
|
|
70
|
+
# Validate embeddings file exists
|
|
71
|
+
if not os.path.exists(self.embed_path):
|
|
72
|
+
raise FileNotFoundError(
|
|
73
|
+
f"PINNACLE embeddings file not found at {self.embed_path}. Please check your PINNACLE_DATA_PATH."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Load PINNACLE PPI embeddings from PyTorch checkpoint
|
|
77
|
+
print(f"Initializing PINNACLE PPI tool from embeddings: {self.embed_path}...")
|
|
78
|
+
self.ppi_dict = torch.load(self.embed_path, weights_only=False)
|
|
79
|
+
|
|
80
|
+
# Display available cell types for reference
|
|
81
|
+
available_cell_types = list(self.ppi_dict.keys())
|
|
82
|
+
print(
|
|
83
|
+
f"PINNACLE tool initialized successfully (loaded embeddings for {len(available_cell_types)} cell types)."
|
|
84
|
+
)
|
|
85
|
+
print(
|
|
86
|
+
f"Available cell types: {available_cell_types[:5]}..."
|
|
87
|
+
if len(available_cell_types) > 5
|
|
88
|
+
else f"Available cell types: {available_cell_types}"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def get_ppi_embeddings(self, cell_type: str) -> Tuple[Dict[str, torch.Tensor], str]:
|
|
92
|
+
"""
|
|
93
|
+
Retrieve cell-type-specific protein-protein interaction embeddings.
|
|
94
|
+
|
|
95
|
+
This method performs intelligent matching to find the most appropriate
|
|
96
|
+
embeddings for the requested cell type, supporting both exact and fuzzy
|
|
97
|
+
matching to handle various naming conventions and synonyms.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
cell_type (str): Target cell type name (e.g., 'b_cell', 'hepatocyte', 'T-cell').
|
|
101
|
+
The method handles various naming conventions including spaces,
|
|
102
|
+
hyphens, underscores, and capitalization differences.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple[Dict[str, torch.Tensor], str]: A tuple containing:
|
|
106
|
+
- Dict mapping protein/gene names to their embedding tensors (empty if no match)
|
|
107
|
+
- Status message indicating match quality and selected cell type
|
|
108
|
+
|
|
109
|
+
Note:
|
|
110
|
+
The matching algorithm performs the following steps:
|
|
111
|
+
1. Normalize input by converting to lowercase and standardizing separators
|
|
112
|
+
2. Attempt exact match with normalized names
|
|
113
|
+
3. Perform partial/substring matching for related cell types
|
|
114
|
+
4. Return first partial match if multiple candidates exist
|
|
115
|
+
"""
|
|
116
|
+
# Normalize input cell type name for robust matching
|
|
117
|
+
# Convert to lowercase and standardize separators (spaces, hyphens -> underscores)
|
|
118
|
+
formalized_cell_type = (
|
|
119
|
+
cell_type.replace(",", "").replace("-", "_").replace(" ", "_").lower()
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Search for matching cell types with progressive matching strategy
|
|
123
|
+
matching_cell_types = []
|
|
124
|
+
|
|
125
|
+
for cell_key in self.ppi_dict.keys():
|
|
126
|
+
# Normalize stored cell type name using same rules
|
|
127
|
+
formalized_key = (
|
|
128
|
+
cell_key.replace(",", "").replace("-", "_").replace(" ", "_").lower()
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Priority 1: Exact match (highest confidence)
|
|
132
|
+
if formalized_key == formalized_cell_type:
|
|
133
|
+
return (
|
|
134
|
+
self.ppi_dict[cell_key],
|
|
135
|
+
f"Exact match found for cell type '{cell_type}' -> '{cell_key}'.",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Priority 2: Partial/substring match (moderate confidence)
|
|
139
|
+
if (
|
|
140
|
+
formalized_cell_type in formalized_key
|
|
141
|
+
or formalized_key in formalized_cell_type
|
|
142
|
+
):
|
|
143
|
+
matching_cell_types.append(cell_key)
|
|
144
|
+
|
|
145
|
+
# Return best partial match if available
|
|
146
|
+
if matching_cell_types:
|
|
147
|
+
best_match = matching_cell_types[0]
|
|
148
|
+
print(
|
|
149
|
+
f"Partial match for '{cell_type}': using '{best_match}' from {len(matching_cell_types)} candidates."
|
|
150
|
+
)
|
|
151
|
+
return (
|
|
152
|
+
self.ppi_dict[best_match],
|
|
153
|
+
f"Partial match for '{cell_type}': using '{best_match}' (from {len(matching_cell_types)} candidates).",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# No match found - return empty embeddings with helpful error message
|
|
157
|
+
available_types = list(self.ppi_dict.keys())
|
|
158
|
+
message = f"Cell type '{cell_type}' not found in PINNACLE embeddings. Available cell types ({len(available_types)} total): {available_types[:10]}{'...' if len(available_types) > 10 else ''}"
|
|
159
|
+
print(f"[PINNACLE PPI Retrieval]: {message}")
|
|
160
|
+
return {}, message
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@server.tool()
|
|
164
|
+
async def run_pinnacle_ppi_retrieval(cell_type: str, embed_path: Optional[str] = None):
|
|
165
|
+
"""
|
|
166
|
+
MCP Tool: Retrieves cell-type-specific protein-protein interaction embeddings from PINNACLE.
|
|
167
|
+
|
|
168
|
+
This tool provides access to pre-computed PINNACLE (Protein Interaction Network
|
|
169
|
+
Contextualized Learning) embeddings that represent protein-protein interactions
|
|
170
|
+
in specific cellular contexts. These embeddings encode functional relationships
|
|
171
|
+
between proteins as dense vector representations, capturing both direct physical
|
|
172
|
+
interactions and functional associations.
|
|
173
|
+
|
|
174
|
+
Scientific Background:
|
|
175
|
+
- PINNACLE embeddings are trained on cell-type-specific protein interaction networks
|
|
176
|
+
- Embeddings capture both local (direct interactions) and global (pathway-level) relationships
|
|
177
|
+
- Cell-type specificity accounts for tissue-specific expression and interaction patterns
|
|
178
|
+
- Dense vector representations enable similarity calculations and downstream ML applications
|
|
179
|
+
|
|
180
|
+
Applications:
|
|
181
|
+
- Drug target identification and prioritization
|
|
182
|
+
- Disease mechanism analysis and biomarker discovery
|
|
183
|
+
- Protein function prediction and annotation
|
|
184
|
+
- Network-based drug repurposing
|
|
185
|
+
- Systems biology and pathway analysis
|
|
186
|
+
- Precision medicine and personalized therapeutics
|
|
187
|
+
|
|
188
|
+
Technical Details:
|
|
189
|
+
- Embeddings are stored as PyTorch tensors with consistent dimensionality
|
|
190
|
+
- Supports fuzzy matching for cell type names (handles various naming conventions)
|
|
191
|
+
- Returns embeddings for all proteins/genes available in the specified cell type
|
|
192
|
+
- Vector dimensions typically range from 128-512 depending on model configuration
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
cell_type (str): Target cell type for embedding retrieval. Supports flexible naming:
|
|
196
|
+
- Standard formats: 'b_cell', 'hepatocyte', 'cardiomyocyte'
|
|
197
|
+
- Alternative formats: 'B-cell', 'T cell', 'NK cells'
|
|
198
|
+
- Tissue types: 'liver', 'heart', 'brain', 'immune'
|
|
199
|
+
The tool performs intelligent matching to find the best available match.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
dict: Comprehensive embedding retrieval results containing:
|
|
203
|
+
- 'embeddings' (dict, optional): Protein-to-embedding mapping where:
|
|
204
|
+
* Keys: Gene/protein symbols (e.g., 'TP53', 'EGFR', 'BRCA1')
|
|
205
|
+
* Values: Embedding vectors as lists of floats (e.g., 256-dimensional vectors)
|
|
206
|
+
Only present when embeddings are successfully retrieved.
|
|
207
|
+
- 'context_info' (list): Detailed information about the retrieval process:
|
|
208
|
+
* Match quality (exact vs partial match)
|
|
209
|
+
* Selected cell type name
|
|
210
|
+
* Number of proteins with embeddings
|
|
211
|
+
* Available alternatives if no match found
|
|
212
|
+
- 'error' (str, optional): Error description if retrieval failed
|
|
213
|
+
|
|
214
|
+
Example Usage:
|
|
215
|
+
# Retrieve B cell PPI embeddings
|
|
216
|
+
result = await run_pinnacle_ppi_retrieval("b_cell")
|
|
217
|
+
|
|
218
|
+
# Get hepatocyte-specific interactions
|
|
219
|
+
result = await run_pinnacle_ppi_retrieval("hepatocyte")
|
|
220
|
+
|
|
221
|
+
# Flexible naming support
|
|
222
|
+
result = await run_pinnacle_ppi_retrieval("T-cells")
|
|
223
|
+
"""
|
|
224
|
+
# Generate unique request ID for tracking and logging
|
|
225
|
+
request_id = str(uuid.uuid4())[:8]
|
|
226
|
+
print(
|
|
227
|
+
f"[{request_id}] Received PINNACLE PPI embedding retrieval request for cell type: '{cell_type}'"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
# Brief async pause to allow for proper request handling
|
|
232
|
+
await asyncio.sleep(0.1)
|
|
233
|
+
|
|
234
|
+
# Validate input parameter
|
|
235
|
+
if not cell_type or not cell_type.strip():
|
|
236
|
+
raise ValueError(
|
|
237
|
+
"Cell type parameter cannot be empty. Please provide a valid cell type name."
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
print(
|
|
241
|
+
f"[{request_id}] Processing PPI embedding retrieval for cell type: '{cell_type.strip()}'"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Initialize global PINNACLE tool instance for MCP server
|
|
245
|
+
# This instance will be used by the MCP tool function to serve PPI embedding requests
|
|
246
|
+
try:
|
|
247
|
+
pinnacle_tool = PinnaclePPITool(embed_path=embed_path)
|
|
248
|
+
print("PINNACLE PPI tool instance created and ready for MCP server")
|
|
249
|
+
except Exception as e:
|
|
250
|
+
print(f"Error creating PINNACLE PPI tool: {str(e)}")
|
|
251
|
+
print(
|
|
252
|
+
"Please ensure PINNACLE_DATA_PATH is correctly set and embedding files exist."
|
|
253
|
+
)
|
|
254
|
+
raise e
|
|
255
|
+
|
|
256
|
+
# Execute PINNACLE embedding retrieval with intelligent matching
|
|
257
|
+
embeddings, match_message = pinnacle_tool.get_ppi_embeddings(cell_type.strip())
|
|
258
|
+
|
|
259
|
+
# Handle case where no embeddings are found
|
|
260
|
+
if not embeddings:
|
|
261
|
+
print(f"[{request_id}] No embeddings found for cell type '{cell_type}'")
|
|
262
|
+
return {
|
|
263
|
+
"error": f"No PINNACLE embeddings available for cell type '{cell_type}'",
|
|
264
|
+
"context_info": [
|
|
265
|
+
match_message,
|
|
266
|
+
"Consider checking available cell types or using alternative naming conventions.",
|
|
267
|
+
"Common formats include: 'b_cell', 'hepatocyte', 'cardiomyocyte', 't_cell', etc.",
|
|
268
|
+
],
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
# Convert PyTorch tensors to JSON-serializable lists
|
|
272
|
+
# This enables downstream processing and API compatibility
|
|
273
|
+
serializable_embeddings = {
|
|
274
|
+
gene: tensor.tolist() if hasattr(tensor, "tolist") else tensor
|
|
275
|
+
for gene, tensor in embeddings.items()
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
# Log successful completion with key metrics
|
|
279
|
+
num_proteins = len(serializable_embeddings)
|
|
280
|
+
embedding_dim = (
|
|
281
|
+
len(next(iter(serializable_embeddings.values())))
|
|
282
|
+
if serializable_embeddings
|
|
283
|
+
else 0
|
|
284
|
+
)
|
|
285
|
+
print(
|
|
286
|
+
f"[{request_id}] PINNACLE PPI retrieval completed: {num_proteins} proteins, {embedding_dim}D embeddings"
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"embeddings": serializable_embeddings,
|
|
291
|
+
"context_info": [
|
|
292
|
+
match_message,
|
|
293
|
+
f"Successfully retrieved embeddings for {num_proteins} proteins/genes.",
|
|
294
|
+
f"Embedding dimensionality: {embedding_dim} features per protein.",
|
|
295
|
+
f"Cell type context: {cell_type} (matched and processed).",
|
|
296
|
+
],
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
except ValueError as e:
|
|
300
|
+
error_message = f"PINNACLE PPI retrieval validation error: {str(e)}"
|
|
301
|
+
print(f"[{request_id}] {error_message}")
|
|
302
|
+
return {
|
|
303
|
+
"error": error_message,
|
|
304
|
+
"context_info": ["Please verify cell type parameter and format."],
|
|
305
|
+
}
|
|
306
|
+
except Exception as e:
|
|
307
|
+
error_message = f"Unexpected error during PINNACLE PPI retrieval: {str(e)}"
|
|
308
|
+
print(f"[{request_id}] {error_message}")
|
|
309
|
+
return {
|
|
310
|
+
"error": error_message,
|
|
311
|
+
"context_info": [
|
|
312
|
+
"Internal server error occurred during embedding retrieval."
|
|
313
|
+
],
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if __name__ == "__main__":
|
|
318
|
+
print("Starting MCP server for PINNACLE Protein-Protein Interaction Tool...")
|
|
319
|
+
print("Model: PINNACLE (Protein Interaction Network Contextualized Learning)")
|
|
320
|
+
print("Application: Cell-type-specific protein interaction embedding retrieval")
|
|
321
|
+
print("Features: Intelligent cell type matching and dense vector representations")
|
|
322
|
+
print("Server: FastMCP with streamable HTTP transport")
|
|
323
|
+
print("Port: 7001 (configured to avoid conflicts with other biomedical tools)")
|
|
324
|
+
|
|
325
|
+
# Launch the MCP server with PINNACLE PPI embedding capabilities
|
|
326
|
+
server.run(
|
|
327
|
+
transport="streamable-http", host="0.0.0.0", port=7001, stateless_http=True
|
|
328
|
+
)
|