tooluniverse 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clait_tools.json +108 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +1 -1
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +1 -1
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/software_tools.json +4954 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.0.dist-info/METADATA +377 -0
- tooluniverse-1.0.0.dist-info/RECORD +186 -0
- tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
- tooluniverse/generate_mcp_tools.py +0 -113
- tooluniverse/mcp_server.py +0 -3340
- tooluniverse-0.2.0.dist-info/METADATA +0 -139
- tooluniverse-0.2.0.dist-info/RECORD +0 -21
- tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
tooluniverse/hpa_tool.py
ADDED
|
@@ -0,0 +1,1645 @@
|
|
|
1
|
+
# hpa_tool.py
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
5
|
+
from typing import Dict, Any, List
|
|
6
|
+
from .base_tool import BaseTool
|
|
7
|
+
from .tool_registry import register_tool
|
|
8
|
+
|
|
9
|
+
HPA_SEARCH_API = "https://www.proteinatlas.org/api/search_download.php"
|
|
10
|
+
HPA_BASE = "https://www.proteinatlas.org"
|
|
11
|
+
HPA_JSON_API_TEMPLATE = "https://www.proteinatlas.org/{ensembl_id}.json"
|
|
12
|
+
HPA_XML_API_TEMPLATE = "https://www.proteinatlas.org/{ensembl_id}.xml"
|
|
13
|
+
|
|
14
|
+
# --- Base Tool Classes ---
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@register_tool("HPASearchApiTool")
|
|
18
|
+
class HPASearchApiTool(BaseTool):
|
|
19
|
+
"""
|
|
20
|
+
Base class for interacting with HPA's search_download.php API.
|
|
21
|
+
Uses HPA's search and download API to get protein expression data.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, tool_config):
|
|
25
|
+
super().__init__(tool_config)
|
|
26
|
+
self.timeout = 30
|
|
27
|
+
self.base_url = HPA_SEARCH_API
|
|
28
|
+
|
|
29
|
+
def _make_api_request(
|
|
30
|
+
self, search_term: str, columns: str, format_type: str = "json"
|
|
31
|
+
) -> Dict[str, Any]:
|
|
32
|
+
"""Make HPA API request with improved error handling"""
|
|
33
|
+
params = {
|
|
34
|
+
"search": search_term,
|
|
35
|
+
"format": format_type,
|
|
36
|
+
"columns": columns,
|
|
37
|
+
"compress": "no",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
resp = requests.get(self.base_url, params=params, timeout=self.timeout)
|
|
42
|
+
if resp.status_code == 404:
|
|
43
|
+
return {"error": f"No data found for gene '{search_term}'"}
|
|
44
|
+
if resp.status_code != 200:
|
|
45
|
+
return {
|
|
46
|
+
"error": f"HPA API request failed, HTTP {resp.status_code}",
|
|
47
|
+
"detail": resp.text,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if format_type == "json":
|
|
51
|
+
data = resp.json()
|
|
52
|
+
# Ensure we always return a list for consistency
|
|
53
|
+
if not isinstance(data, list):
|
|
54
|
+
return {"error": "API did not return expected list format"}
|
|
55
|
+
return data
|
|
56
|
+
else:
|
|
57
|
+
return {"tsv_data": resp.text}
|
|
58
|
+
|
|
59
|
+
except requests.RequestException as e:
|
|
60
|
+
return {"error": f"HPA API request failed: {str(e)}"}
|
|
61
|
+
except ValueError as e:
|
|
62
|
+
return {
|
|
63
|
+
"error": f"Failed to parse HPA response data: {str(e)}",
|
|
64
|
+
"content": resp.text,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@register_tool("HPAJsonApiTool")
|
|
69
|
+
class HPAJsonApiTool(BaseTool):
|
|
70
|
+
"""
|
|
71
|
+
Base class for interacting with HPA's /{ensembl_id}.json API.
|
|
72
|
+
More efficient for getting comprehensive gene data.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, tool_config):
|
|
76
|
+
super().__init__(tool_config)
|
|
77
|
+
self.timeout = 30
|
|
78
|
+
self.base_url_template = HPA_JSON_API_TEMPLATE
|
|
79
|
+
|
|
80
|
+
def _make_api_request(self, ensembl_id: str) -> Dict[str, Any]:
|
|
81
|
+
"""Make HPA JSON API request for a specific gene"""
|
|
82
|
+
url = self.base_url_template.format(ensembl_id=ensembl_id)
|
|
83
|
+
try:
|
|
84
|
+
resp = requests.get(url, timeout=self.timeout)
|
|
85
|
+
if resp.status_code == 404:
|
|
86
|
+
return {"error": f"No data found for Ensembl ID '{ensembl_id}'"}
|
|
87
|
+
if resp.status_code != 200:
|
|
88
|
+
return {
|
|
89
|
+
"error": f"HPA JSON API request failed, HTTP {resp.status_code}",
|
|
90
|
+
"detail": resp.text,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return resp.json()
|
|
94
|
+
|
|
95
|
+
except requests.RequestException as e:
|
|
96
|
+
return {"error": f"HPA JSON API request failed: {str(e)}"}
|
|
97
|
+
except ValueError as e:
|
|
98
|
+
return {
|
|
99
|
+
"error": f"Failed to parse HPA JSON response: {str(e)}",
|
|
100
|
+
"content": resp.text,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@register_tool("HPAXmlApiTool")
|
|
105
|
+
class HPAXmlApiTool(BaseTool):
|
|
106
|
+
"""
|
|
107
|
+
Base class for interacting with HPA's /{ensembl_id}.xml API.
|
|
108
|
+
Optimized for comprehensive XML data extraction.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self, tool_config):
|
|
112
|
+
super().__init__(tool_config)
|
|
113
|
+
self.timeout = 45
|
|
114
|
+
self.base_url_template = HPA_XML_API_TEMPLATE
|
|
115
|
+
|
|
116
|
+
def _make_api_request(self, ensembl_id: str) -> ET.Element:
|
|
117
|
+
"""Make HPA XML API request for a specific gene"""
|
|
118
|
+
url = self.base_url_template.format(ensembl_id=ensembl_id)
|
|
119
|
+
try:
|
|
120
|
+
resp = requests.get(url, timeout=self.timeout)
|
|
121
|
+
if resp.status_code == 404:
|
|
122
|
+
raise Exception(f"No XML data found for Ensembl ID '{ensembl_id}'")
|
|
123
|
+
if resp.status_code != 200:
|
|
124
|
+
raise Exception(f"HPA XML API request failed, HTTP {resp.status_code}")
|
|
125
|
+
|
|
126
|
+
return ET.fromstring(resp.content)
|
|
127
|
+
except requests.RequestException as e:
|
|
128
|
+
raise Exception(f"HPA XML API request failed: {str(e)}")
|
|
129
|
+
except ET.ParseError as e:
|
|
130
|
+
raise Exception(f"Failed to parse HPA XML response: {str(e)}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# --- New Enhanced Tools Based on Your Optimization Plan ---
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@register_tool("HPAGetRnaExpressionBySourceTool")
|
|
137
|
+
class HPAGetRnaExpressionBySourceTool(HPASearchApiTool):
|
|
138
|
+
"""
|
|
139
|
+
Get RNA expression for a gene from specific biological sources using optimized columns parameter.
|
|
140
|
+
This tool directly leverages the comprehensive columns table for efficient queries.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, tool_config):
|
|
144
|
+
super().__init__(tool_config)
|
|
145
|
+
# Use correct HPA API column identifiers
|
|
146
|
+
self.source_column_mappings = {
|
|
147
|
+
"tissue": "rnatsm", # RNA tissue specific nTPM
|
|
148
|
+
"blood": "rnablm", # RNA blood lineage specific nTPM
|
|
149
|
+
"brain": "rnabrm", # RNA brain region specific nTPM
|
|
150
|
+
"single_cell": "rnascm", # RNA single cell type specific nTPM
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Map expected API response field names for each source type
|
|
154
|
+
self.api_response_fields = {
|
|
155
|
+
"tissue": "RNA tissue specific nTPM",
|
|
156
|
+
"blood": "RNA blood lineage specific nTPM",
|
|
157
|
+
"brain": "RNA brain region specific nTPM",
|
|
158
|
+
"single_cell": "RNA single cell type specific nTPM",
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# Map source names to expected keys in API response
|
|
162
|
+
self.source_name_mappings = {
|
|
163
|
+
"tissue": {
|
|
164
|
+
"adipose_tissue": ["adipose tissue", "fat"],
|
|
165
|
+
"adrenal_gland": ["adrenal gland", "adrenal"],
|
|
166
|
+
"appendix": ["appendix"],
|
|
167
|
+
"bone_marrow": ["bone marrow"],
|
|
168
|
+
"brain": ["brain", "cerebral cortex"],
|
|
169
|
+
"breast": ["breast"],
|
|
170
|
+
"bronchus": ["bronchus"],
|
|
171
|
+
"cerebellum": ["cerebellum"],
|
|
172
|
+
"cerebral_cortex": ["cerebral cortex", "brain"],
|
|
173
|
+
"cervix": ["cervix"],
|
|
174
|
+
"choroid_plexus": ["choroid plexus"],
|
|
175
|
+
"colon": ["colon"],
|
|
176
|
+
"duodenum": ["duodenum"],
|
|
177
|
+
"endometrium": ["endometrium"],
|
|
178
|
+
"epididymis": ["epididymis"],
|
|
179
|
+
"esophagus": ["esophagus"],
|
|
180
|
+
"fallopian_tube": ["fallopian tube"],
|
|
181
|
+
"gallbladder": ["gallbladder"],
|
|
182
|
+
"heart_muscle": ["heart muscle", "heart"],
|
|
183
|
+
"hippocampal_formation": ["hippocampus", "hippocampal formation"],
|
|
184
|
+
"hypothalamus": ["hypothalamus"],
|
|
185
|
+
"kidney": ["kidney"],
|
|
186
|
+
"liver": ["liver"],
|
|
187
|
+
"lung": ["lung"],
|
|
188
|
+
"lymph_node": ["lymph node"],
|
|
189
|
+
"nasopharynx": ["nasopharynx"],
|
|
190
|
+
"oral_mucosa": ["oral mucosa"],
|
|
191
|
+
"ovary": ["ovary"],
|
|
192
|
+
"pancreas": ["pancreas"],
|
|
193
|
+
"parathyroid_gland": ["parathyroid gland"],
|
|
194
|
+
"pituitary_gland": ["pituitary gland"],
|
|
195
|
+
"placenta": ["placenta"],
|
|
196
|
+
"prostate": ["prostate"],
|
|
197
|
+
"rectum": ["rectum"],
|
|
198
|
+
"retina": ["retina"],
|
|
199
|
+
"salivary_gland": ["salivary gland"],
|
|
200
|
+
"seminal_vesicle": ["seminal vesicle"],
|
|
201
|
+
"skeletal_muscle": ["skeletal muscle"],
|
|
202
|
+
"skin": ["skin"],
|
|
203
|
+
"small_intestine": ["small intestine"],
|
|
204
|
+
"smooth_muscle": ["smooth muscle"],
|
|
205
|
+
"soft_tissue": ["soft tissue"],
|
|
206
|
+
"spleen": ["spleen"],
|
|
207
|
+
"stomach": ["stomach"],
|
|
208
|
+
"testis": ["testis"],
|
|
209
|
+
"thymus": ["thymus"],
|
|
210
|
+
"thyroid_gland": ["thyroid gland"],
|
|
211
|
+
"tongue": ["tongue"],
|
|
212
|
+
"tonsil": ["tonsil"],
|
|
213
|
+
"urinary_bladder": ["urinary bladder"],
|
|
214
|
+
"vagina": ["vagina"],
|
|
215
|
+
},
|
|
216
|
+
"blood": {
|
|
217
|
+
"t_cell": ["t-cell", "t cell"],
|
|
218
|
+
"b_cell": ["b-cell", "b cell"],
|
|
219
|
+
"nk_cell": ["nk-cell", "nk cell", "natural killer"],
|
|
220
|
+
"monocyte": ["monocyte"],
|
|
221
|
+
"neutrophil": ["neutrophil"],
|
|
222
|
+
"eosinophil": ["eosinophil"],
|
|
223
|
+
"basophil": ["basophil"],
|
|
224
|
+
"dendritic_cell": ["dendritic cell"],
|
|
225
|
+
},
|
|
226
|
+
"brain": {
|
|
227
|
+
"cerebellum": ["cerebellum"],
|
|
228
|
+
"cerebral_cortex": ["cerebral cortex", "cortex"],
|
|
229
|
+
"hippocampus": ["hippocampus", "hippocampal formation"],
|
|
230
|
+
"hypothalamus": ["hypothalamus"],
|
|
231
|
+
"amygdala": ["amygdala"],
|
|
232
|
+
"brainstem": ["brainstem", "brain stem"],
|
|
233
|
+
"thalamus": ["thalamus"],
|
|
234
|
+
},
|
|
235
|
+
"single_cell": {
|
|
236
|
+
"t_cell": ["t-cell", "t cell"],
|
|
237
|
+
"b_cell": ["b-cell", "b cell"],
|
|
238
|
+
"hepatocyte": ["hepatocyte"],
|
|
239
|
+
"neuron": ["neuron"],
|
|
240
|
+
"astrocyte": ["astrocyte"],
|
|
241
|
+
"fibroblast": ["fibroblast"],
|
|
242
|
+
},
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
246
|
+
gene_name = arguments.get("gene_name")
|
|
247
|
+
source_type = arguments.get("source_type", "").lower()
|
|
248
|
+
source_name = (
|
|
249
|
+
arguments.get("source_name", "").lower().replace(" ", "_").replace("-", "_")
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if not gene_name:
|
|
253
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
254
|
+
if not source_type:
|
|
255
|
+
return {"error": "Parameter 'source_type' is required"}
|
|
256
|
+
if not source_name:
|
|
257
|
+
return {"error": "Parameter 'source_name' is required"}
|
|
258
|
+
|
|
259
|
+
# Validate source type
|
|
260
|
+
if source_type not in self.source_column_mappings:
|
|
261
|
+
available_types = ", ".join(self.source_column_mappings.keys())
|
|
262
|
+
return {
|
|
263
|
+
"error": f"Invalid source_type '{source_type}'. Available types: {available_types}"
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# Enhanced validation with intelligent recommendations
|
|
267
|
+
if source_name not in self.source_name_mappings[source_type]:
|
|
268
|
+
available_sources = list(self.source_name_mappings[source_type].keys())
|
|
269
|
+
|
|
270
|
+
# Find similar source names (fuzzy matching)
|
|
271
|
+
similar_sources = []
|
|
272
|
+
source_keywords = source_name.replace("_", " ").split()
|
|
273
|
+
|
|
274
|
+
for valid_source in available_sources:
|
|
275
|
+
# Direct substring matching
|
|
276
|
+
if (
|
|
277
|
+
source_name.lower() in valid_source.lower()
|
|
278
|
+
or valid_source.lower() in source_name.lower()
|
|
279
|
+
):
|
|
280
|
+
similar_sources.append(valid_source)
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
# Check with underscores removed/normalized
|
|
284
|
+
normalized_input = source_name.lower().replace("_", "").replace(" ", "")
|
|
285
|
+
normalized_valid = (
|
|
286
|
+
valid_source.lower().replace("_", "").replace(" ", "")
|
|
287
|
+
)
|
|
288
|
+
if (
|
|
289
|
+
normalized_input in normalized_valid
|
|
290
|
+
or normalized_valid in normalized_input
|
|
291
|
+
):
|
|
292
|
+
similar_sources.append(valid_source)
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
# Check individual keywords
|
|
296
|
+
for keyword in source_keywords:
|
|
297
|
+
if (
|
|
298
|
+
keyword.lower() in valid_source.lower()
|
|
299
|
+
or valid_source.lower() in keyword.lower()
|
|
300
|
+
):
|
|
301
|
+
similar_sources.append(valid_source)
|
|
302
|
+
break
|
|
303
|
+
|
|
304
|
+
error_msg = (
|
|
305
|
+
f"Invalid source_name '{source_name}' for source_type '{source_type}'. "
|
|
306
|
+
)
|
|
307
|
+
if similar_sources:
|
|
308
|
+
error_msg += f"Similar options: {similar_sources[:3]}. "
|
|
309
|
+
error_msg += (
|
|
310
|
+
f"All available sources for '{source_type}': {available_sources}"
|
|
311
|
+
)
|
|
312
|
+
return {"error": error_msg}
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
# Get the correct API column
|
|
316
|
+
api_column = self.source_column_mappings[source_type]
|
|
317
|
+
columns = f"g,gs,{api_column}"
|
|
318
|
+
|
|
319
|
+
# Call the search API
|
|
320
|
+
response_data = self._make_api_request(gene_name, columns)
|
|
321
|
+
|
|
322
|
+
if "error" in response_data:
|
|
323
|
+
return response_data
|
|
324
|
+
|
|
325
|
+
if not response_data or len(response_data) == 0:
|
|
326
|
+
return {
|
|
327
|
+
"gene_name": gene_name,
|
|
328
|
+
"source_type": source_type,
|
|
329
|
+
"source_name": source_name,
|
|
330
|
+
"expression_value": "N/A",
|
|
331
|
+
"status": "Gene not found",
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
# Get the first result
|
|
335
|
+
gene_data = response_data[0]
|
|
336
|
+
|
|
337
|
+
# Extract expression data from the API response
|
|
338
|
+
expression_value = "N/A"
|
|
339
|
+
available_sources = []
|
|
340
|
+
|
|
341
|
+
# Get the expression data dictionary for this source type
|
|
342
|
+
api_field_name = self.api_response_fields[source_type]
|
|
343
|
+
expression_data = gene_data.get(api_field_name)
|
|
344
|
+
|
|
345
|
+
if expression_data and isinstance(expression_data, dict):
|
|
346
|
+
available_sources = list(expression_data.keys())
|
|
347
|
+
|
|
348
|
+
# Get possible names for this source
|
|
349
|
+
possible_names = self.source_name_mappings[source_type][source_name]
|
|
350
|
+
|
|
351
|
+
# Try to find a matching source name in the response
|
|
352
|
+
for source_key in expression_data.keys():
|
|
353
|
+
source_key_lower = source_key.lower()
|
|
354
|
+
for possible_name in possible_names:
|
|
355
|
+
if (
|
|
356
|
+
possible_name.lower() in source_key_lower
|
|
357
|
+
or source_key_lower in possible_name.lower()
|
|
358
|
+
):
|
|
359
|
+
expression_value = expression_data[source_key]
|
|
360
|
+
break
|
|
361
|
+
if expression_value != "N/A":
|
|
362
|
+
break
|
|
363
|
+
|
|
364
|
+
# If exact match not found, look for partial matches
|
|
365
|
+
if expression_value == "N/A":
|
|
366
|
+
source_keywords = source_name.replace("_", " ").split()
|
|
367
|
+
for source_key in expression_data.keys():
|
|
368
|
+
source_key_lower = source_key.lower()
|
|
369
|
+
for keyword in source_keywords:
|
|
370
|
+
if keyword in source_key_lower:
|
|
371
|
+
expression_value = expression_data[source_key]
|
|
372
|
+
break
|
|
373
|
+
if expression_value != "N/A":
|
|
374
|
+
break
|
|
375
|
+
|
|
376
|
+
# Categorize expression level
|
|
377
|
+
expression_level = "unknown"
|
|
378
|
+
if expression_value != "N/A":
|
|
379
|
+
try:
|
|
380
|
+
val = float(expression_value)
|
|
381
|
+
if val > 50:
|
|
382
|
+
expression_level = "very high"
|
|
383
|
+
elif val > 10:
|
|
384
|
+
expression_level = "high"
|
|
385
|
+
elif val > 1:
|
|
386
|
+
expression_level = "medium"
|
|
387
|
+
elif val > 0.1:
|
|
388
|
+
expression_level = "low"
|
|
389
|
+
else:
|
|
390
|
+
expression_level = "very low"
|
|
391
|
+
except (ValueError, TypeError):
|
|
392
|
+
expression_level = "unknown"
|
|
393
|
+
|
|
394
|
+
return {
|
|
395
|
+
"gene_name": gene_data.get("Gene", gene_name),
|
|
396
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
397
|
+
"source_type": source_type,
|
|
398
|
+
"source_name": source_name,
|
|
399
|
+
"expression_value": expression_value,
|
|
400
|
+
"expression_level": expression_level,
|
|
401
|
+
"expression_unit": "nTPM",
|
|
402
|
+
"column_queried": api_column,
|
|
403
|
+
"available_sources": (
|
|
404
|
+
available_sources[:10]
|
|
405
|
+
if len(available_sources) > 10
|
|
406
|
+
else available_sources
|
|
407
|
+
),
|
|
408
|
+
"total_available_sources": len(available_sources),
|
|
409
|
+
"status": (
|
|
410
|
+
"success"
|
|
411
|
+
if expression_value != "N/A"
|
|
412
|
+
else "no_expression_data_for_source"
|
|
413
|
+
),
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
return {
|
|
418
|
+
"error": f"Failed to retrieve RNA expression data: {str(e)}",
|
|
419
|
+
"gene_name": gene_name,
|
|
420
|
+
"source_type": source_type,
|
|
421
|
+
"source_name": source_name,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
@register_tool("HPAGetSubcellularLocationTool")
|
|
426
|
+
class HPAGetSubcellularLocationTool(HPASearchApiTool):
|
|
427
|
+
"""
|
|
428
|
+
Get annotated subcellular locations for a protein using optimized columns parameter.
|
|
429
|
+
Uses scml (main location) and scal (additional location) columns for efficient queries.
|
|
430
|
+
"""
|
|
431
|
+
|
|
432
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
433
|
+
gene_name = arguments.get("gene_name")
|
|
434
|
+
if not gene_name:
|
|
435
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
436
|
+
|
|
437
|
+
# Use specific columns for subcellular location data
|
|
438
|
+
result = self._make_api_request(gene_name, "g,gs,scml,scal")
|
|
439
|
+
|
|
440
|
+
if "error" in result:
|
|
441
|
+
return result
|
|
442
|
+
|
|
443
|
+
if not result:
|
|
444
|
+
return {"error": "No subcellular location data found"}
|
|
445
|
+
|
|
446
|
+
gene_data = result[0]
|
|
447
|
+
|
|
448
|
+
# Parse main and additional locations
|
|
449
|
+
main_location = gene_data.get("Subcellular main location", "")
|
|
450
|
+
additional_location = gene_data.get("Subcellular additional location", "")
|
|
451
|
+
|
|
452
|
+
# Handle different data types (string or list)
|
|
453
|
+
if isinstance(main_location, list):
|
|
454
|
+
main_locations = main_location
|
|
455
|
+
elif isinstance(main_location, str):
|
|
456
|
+
main_locations = (
|
|
457
|
+
[loc.strip() for loc in main_location.split(";") if loc.strip()]
|
|
458
|
+
if main_location
|
|
459
|
+
else []
|
|
460
|
+
)
|
|
461
|
+
else:
|
|
462
|
+
main_locations = []
|
|
463
|
+
|
|
464
|
+
if isinstance(additional_location, list):
|
|
465
|
+
additional_locations = additional_location
|
|
466
|
+
elif isinstance(additional_location, str):
|
|
467
|
+
additional_locations = (
|
|
468
|
+
[loc.strip() for loc in additional_location.split(";") if loc.strip()]
|
|
469
|
+
if additional_location
|
|
470
|
+
else []
|
|
471
|
+
)
|
|
472
|
+
else:
|
|
473
|
+
additional_locations = []
|
|
474
|
+
|
|
475
|
+
return {
|
|
476
|
+
"gene_name": gene_data.get("Gene", gene_name),
|
|
477
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
478
|
+
"main_locations": main_locations,
|
|
479
|
+
"additional_locations": additional_locations,
|
|
480
|
+
"total_locations": len(main_locations) + len(additional_locations),
|
|
481
|
+
"location_summary": self._generate_location_summary(
|
|
482
|
+
main_locations, additional_locations
|
|
483
|
+
),
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
def _generate_location_summary(
|
|
487
|
+
self, main_locs: List[str], add_locs: List[str]
|
|
488
|
+
) -> str:
|
|
489
|
+
"""Generate a summary of subcellular locations"""
|
|
490
|
+
if not main_locs and not add_locs:
|
|
491
|
+
return "No subcellular location data available"
|
|
492
|
+
|
|
493
|
+
summary_parts = []
|
|
494
|
+
if main_locs:
|
|
495
|
+
summary_parts.append(f"Primary: {', '.join(main_locs)}")
|
|
496
|
+
if add_locs:
|
|
497
|
+
summary_parts.append(f"Additional: {', '.join(add_locs)}")
|
|
498
|
+
|
|
499
|
+
return "; ".join(summary_parts)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# --- Existing Tools (Updated with improvements) ---
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
@register_tool("HPASearchGenesTool")
|
|
506
|
+
class HPASearchGenesTool(HPASearchApiTool):
|
|
507
|
+
"""
|
|
508
|
+
Search for matching genes by gene name, keywords, or cell line names and return Ensembl ID list.
|
|
509
|
+
This is the entry tool for many query workflows.
|
|
510
|
+
"""
|
|
511
|
+
|
|
512
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
513
|
+
search_query = arguments.get("search_query")
|
|
514
|
+
if not search_query:
|
|
515
|
+
return {"error": "Parameter 'search_query' is required"}
|
|
516
|
+
|
|
517
|
+
# 'g' for Gene name, 'gs' for Gene synonym, 'eg' for Ensembl ID
|
|
518
|
+
columns = "g,gs,eg"
|
|
519
|
+
result = self._make_api_request(search_query, columns)
|
|
520
|
+
|
|
521
|
+
if "error" in result:
|
|
522
|
+
return result
|
|
523
|
+
|
|
524
|
+
if not result or not isinstance(result, list):
|
|
525
|
+
return {"error": f"No matching genes found for query '{search_query}'"}
|
|
526
|
+
|
|
527
|
+
formatted_results = []
|
|
528
|
+
for gene in result:
|
|
529
|
+
gene_synonym = gene.get("Gene synonym", "")
|
|
530
|
+
if isinstance(gene_synonym, str):
|
|
531
|
+
synonyms = gene_synonym.split(", ") if gene_synonym else []
|
|
532
|
+
elif isinstance(gene_synonym, list):
|
|
533
|
+
synonyms = gene_synonym
|
|
534
|
+
else:
|
|
535
|
+
synonyms = []
|
|
536
|
+
|
|
537
|
+
formatted_results.append(
|
|
538
|
+
{
|
|
539
|
+
"gene_name": gene.get("Gene"),
|
|
540
|
+
"ensembl_id": gene.get("Ensembl"),
|
|
541
|
+
"gene_synonyms": synonyms,
|
|
542
|
+
}
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
return {
|
|
546
|
+
"search_query": search_query,
|
|
547
|
+
"match_count": len(formatted_results),
|
|
548
|
+
"genes": formatted_results,
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@register_tool("HPAGetComparativeExpressionTool")
|
|
553
|
+
class HPAGetComparativeExpressionTool(HPASearchApiTool):
|
|
554
|
+
"""
|
|
555
|
+
Compare gene expression levels in specific cell lines and healthy tissues.
|
|
556
|
+
Get expression data for comparison by gene name and cell line name.
|
|
557
|
+
"""
|
|
558
|
+
|
|
559
|
+
def __init__(self, tool_config):
|
|
560
|
+
super().__init__(tool_config)
|
|
561
|
+
# Mapping of common cell lines to their column identifiers
|
|
562
|
+
self.cell_line_columns = {
|
|
563
|
+
"ishikawa": "cell_RNA_ishikawa_heraklio",
|
|
564
|
+
"hela": "cell_RNA_hela",
|
|
565
|
+
"mcf7": "cell_RNA_mcf7",
|
|
566
|
+
"a549": "cell_RNA_a549",
|
|
567
|
+
"hepg2": "cell_RNA_hepg2",
|
|
568
|
+
"jurkat": "cell_RNA_jurkat",
|
|
569
|
+
"pc3": "cell_RNA_pc3",
|
|
570
|
+
"rh30": "cell_RNA_rh30",
|
|
571
|
+
"siha": "cell_RNA_siha",
|
|
572
|
+
"u251": "cell_RNA_u251",
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
576
|
+
gene_name = arguments.get("gene_name")
|
|
577
|
+
cell_line = arguments.get("cell_line", "").lower()
|
|
578
|
+
|
|
579
|
+
if not gene_name:
|
|
580
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
581
|
+
if not cell_line:
|
|
582
|
+
return {"error": "Parameter 'cell_line' is required"}
|
|
583
|
+
|
|
584
|
+
# Enhanced validation with intelligent recommendations
|
|
585
|
+
cell_column = self.cell_line_columns.get(cell_line)
|
|
586
|
+
if not cell_column:
|
|
587
|
+
available_lines = list(self.cell_line_columns.keys())
|
|
588
|
+
|
|
589
|
+
# Find similar cell line names
|
|
590
|
+
similar_lines = []
|
|
591
|
+
for valid_line in available_lines:
|
|
592
|
+
if cell_line in valid_line or valid_line in cell_line:
|
|
593
|
+
similar_lines.append(valid_line)
|
|
594
|
+
|
|
595
|
+
error_msg = f"Unsupported cell_line '{cell_line}'. "
|
|
596
|
+
if similar_lines:
|
|
597
|
+
error_msg += f"Similar options: {similar_lines}. "
|
|
598
|
+
error_msg += f"All supported cell lines: {available_lines}"
|
|
599
|
+
return {"error": error_msg}
|
|
600
|
+
|
|
601
|
+
# Request expression data for the cell line
|
|
602
|
+
cell_columns = f"g,gs,{cell_column}"
|
|
603
|
+
cell_result = self._make_api_request(gene_name, cell_columns)
|
|
604
|
+
if "error" in cell_result:
|
|
605
|
+
return cell_result
|
|
606
|
+
|
|
607
|
+
# Request expression data for healthy tissues
|
|
608
|
+
tissue_columns = "g,gs,rnatsm"
|
|
609
|
+
tissue_result = self._make_api_request(gene_name, tissue_columns)
|
|
610
|
+
if "error" in tissue_result:
|
|
611
|
+
return tissue_result
|
|
612
|
+
|
|
613
|
+
# Format the result
|
|
614
|
+
if not cell_result or not tissue_result:
|
|
615
|
+
return {"error": "No expression data found"}
|
|
616
|
+
|
|
617
|
+
# Extract the first matching gene data
|
|
618
|
+
cell_data = (
|
|
619
|
+
cell_result[0] if isinstance(cell_result, list) and cell_result else {}
|
|
620
|
+
)
|
|
621
|
+
tissue_data = (
|
|
622
|
+
tissue_result[0]
|
|
623
|
+
if isinstance(tissue_result, list) and tissue_result
|
|
624
|
+
else {}
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
return {
|
|
628
|
+
"gene_name": gene_name,
|
|
629
|
+
"gene_symbol": cell_data.get("Gene", gene_name),
|
|
630
|
+
"gene_synonym": cell_data.get("Gene synonym", ""),
|
|
631
|
+
"cell_line": cell_line,
|
|
632
|
+
"cell_line_expression": cell_data.get(cell_column, "N/A"),
|
|
633
|
+
"healthy_tissue_expression": tissue_data.get(
|
|
634
|
+
"RNA tissue specific nTPM", "N/A"
|
|
635
|
+
),
|
|
636
|
+
"expression_unit": "nTPM (normalized Transcripts Per Million)",
|
|
637
|
+
"comparison_summary": self._generate_comparison_summary(
|
|
638
|
+
cell_data.get(cell_column), tissue_data.get("RNA tissue specific nTPM")
|
|
639
|
+
),
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
def _generate_comparison_summary(self, cell_expr, tissue_expr) -> str:
|
|
643
|
+
"""Generate expression level comparison summary"""
|
|
644
|
+
try:
|
|
645
|
+
cell_val = float(cell_expr) if cell_expr and cell_expr != "N/A" else None
|
|
646
|
+
tissue_val = (
|
|
647
|
+
float(tissue_expr) if tissue_expr and tissue_expr != "N/A" else None
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
if cell_val is None or tissue_val is None:
|
|
651
|
+
return "Insufficient data for comparison"
|
|
652
|
+
|
|
653
|
+
if cell_val > tissue_val * 2:
|
|
654
|
+
return f"Expression significantly higher in cell line ({cell_val:.2f} vs {tissue_val:.2f})"
|
|
655
|
+
elif tissue_val > cell_val * 2:
|
|
656
|
+
return f"Expression significantly higher in healthy tissues ({tissue_val:.2f} vs {cell_val:.2f})"
|
|
657
|
+
else:
|
|
658
|
+
return f"Expression levels similar (cell line: {cell_val:.2f}, healthy tissues: {tissue_val:.2f})"
|
|
659
|
+
except Exception:
|
|
660
|
+
return "Failed to calculate expression level comparison"
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
@register_tool("HPAGetDiseaseExpressionTool")
|
|
664
|
+
class HPAGetDiseaseExpressionTool(HPASearchApiTool):
|
|
665
|
+
"""
|
|
666
|
+
Get expression data for a gene in specific diseases and tissues.
|
|
667
|
+
Get related expression information by gene name, tissue type, and disease name.
|
|
668
|
+
"""
|
|
669
|
+
|
|
670
|
+
def __init__(self, tool_config):
|
|
671
|
+
super().__init__(tool_config)
|
|
672
|
+
# Mapping of common cancer types to their column identifiers
|
|
673
|
+
self.cancer_columns = {
|
|
674
|
+
"brain_cancer": "cancer_RNA_brain_cancer",
|
|
675
|
+
"breast_cancer": "cancer_RNA_breast_cancer",
|
|
676
|
+
"colon_cancer": "cancer_RNA_colon_cancer",
|
|
677
|
+
"lung_cancer": "cancer_RNA_lung_cancer",
|
|
678
|
+
"liver_cancer": "cancer_RNA_liver_cancer",
|
|
679
|
+
"prostate_cancer": "cancer_RNA_prostate_cancer",
|
|
680
|
+
"kidney_cancer": "cancer_RNA_kidney_cancer",
|
|
681
|
+
"pancreatic_cancer": "cancer_RNA_pancreatic_cancer",
|
|
682
|
+
"stomach_cancer": "cancer_RNA_stomach_cancer",
|
|
683
|
+
"ovarian_cancer": "cancer_RNA_ovarian_cancer",
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
687
|
+
gene_name = arguments.get("gene_name")
|
|
688
|
+
tissue_type = arguments.get("tissue_type", "").lower()
|
|
689
|
+
disease_name = arguments.get("disease_name", "").lower()
|
|
690
|
+
|
|
691
|
+
if not gene_name:
|
|
692
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
693
|
+
if not disease_name:
|
|
694
|
+
return {"error": "Parameter 'disease_name' is required"}
|
|
695
|
+
|
|
696
|
+
# Enhanced validation with intelligent recommendations
|
|
697
|
+
disease_key = f"{tissue_type}_{disease_name}" if tissue_type else disease_name
|
|
698
|
+
cancer_column = None
|
|
699
|
+
|
|
700
|
+
# Match cancer type
|
|
701
|
+
for key, column in self.cancer_columns.items():
|
|
702
|
+
if disease_key in key or disease_name in key:
|
|
703
|
+
cancer_column = column
|
|
704
|
+
break
|
|
705
|
+
|
|
706
|
+
if not cancer_column:
|
|
707
|
+
available_diseases = [
|
|
708
|
+
k.replace("_", " ") for k in self.cancer_columns.keys()
|
|
709
|
+
]
|
|
710
|
+
|
|
711
|
+
# Find similar disease names
|
|
712
|
+
similar_diseases = []
|
|
713
|
+
disease_keywords = disease_name.replace("_", " ").split()
|
|
714
|
+
|
|
715
|
+
for valid_disease in available_diseases:
|
|
716
|
+
for keyword in disease_keywords:
|
|
717
|
+
if (
|
|
718
|
+
keyword in valid_disease.lower()
|
|
719
|
+
or valid_disease.lower() in keyword
|
|
720
|
+
):
|
|
721
|
+
similar_diseases.append(valid_disease)
|
|
722
|
+
break
|
|
723
|
+
|
|
724
|
+
error_msg = f"Unsupported disease_name '{disease_name}'. "
|
|
725
|
+
if similar_diseases:
|
|
726
|
+
error_msg += f"Similar options: {similar_diseases[:3]}. "
|
|
727
|
+
error_msg += f"All supported diseases: {available_diseases}"
|
|
728
|
+
return {"error": error_msg}
|
|
729
|
+
|
|
730
|
+
# Build request columns
|
|
731
|
+
columns = f"g,gs,{cancer_column},rnatsm"
|
|
732
|
+
result = self._make_api_request(gene_name, columns)
|
|
733
|
+
|
|
734
|
+
if "error" in result:
|
|
735
|
+
return result
|
|
736
|
+
|
|
737
|
+
if not result:
|
|
738
|
+
return {"error": "No expression data found"}
|
|
739
|
+
|
|
740
|
+
# Extract the first matching gene data
|
|
741
|
+
gene_data = result[0] if isinstance(result, list) and result else {}
|
|
742
|
+
|
|
743
|
+
return {
|
|
744
|
+
"gene_name": gene_name,
|
|
745
|
+
"gene_symbol": gene_data.get("Gene", gene_name),
|
|
746
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
747
|
+
"tissue_type": tissue_type or "Not specified",
|
|
748
|
+
"disease_name": disease_name,
|
|
749
|
+
"disease_expression": gene_data.get(cancer_column, "N/A"),
|
|
750
|
+
"healthy_expression": gene_data.get("RNA tissue specific nTPM", "N/A"),
|
|
751
|
+
"expression_unit": "nTPM (normalized Transcripts Per Million)",
|
|
752
|
+
"disease_vs_healthy": self._compare_disease_healthy(
|
|
753
|
+
gene_data.get(cancer_column), gene_data.get("RNA tissue specific nTPM")
|
|
754
|
+
),
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
def _compare_disease_healthy(self, disease_expr, healthy_expr) -> str:
|
|
758
|
+
"""Compare expression difference between disease and healthy state"""
|
|
759
|
+
try:
|
|
760
|
+
disease_val = (
|
|
761
|
+
float(disease_expr) if disease_expr and disease_expr != "N/A" else None
|
|
762
|
+
)
|
|
763
|
+
healthy_val = (
|
|
764
|
+
float(healthy_expr) if healthy_expr and healthy_expr != "N/A" else None
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
if disease_val is None or healthy_val is None:
|
|
768
|
+
return "Insufficient data for comparison"
|
|
769
|
+
|
|
770
|
+
fold_change = disease_val / healthy_val if healthy_val > 0 else float("inf")
|
|
771
|
+
|
|
772
|
+
if fold_change > 2:
|
|
773
|
+
return f"Disease state expression upregulated {fold_change:.2f} fold"
|
|
774
|
+
elif fold_change < 0.5:
|
|
775
|
+
return (
|
|
776
|
+
f"Disease state expression downregulated {1/fold_change:.2f} fold"
|
|
777
|
+
)
|
|
778
|
+
else:
|
|
779
|
+
return f"Expression level relatively stable (fold change: {fold_change:.2f})"
|
|
780
|
+
except Exception:
|
|
781
|
+
return "Failed to calculate expression difference"
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
@register_tool("HPAGetBiologicalProcessTool")
|
|
785
|
+
class HPAGetBiologicalProcessTool(HPASearchApiTool):
|
|
786
|
+
"""
|
|
787
|
+
Get biological process information related to a gene.
|
|
788
|
+
Get specific biological processes a gene is involved in by gene name.
|
|
789
|
+
"""
|
|
790
|
+
|
|
791
|
+
def __init__(self, tool_config):
|
|
792
|
+
super().__init__(tool_config)
|
|
793
|
+
# Predefined biological process list
|
|
794
|
+
self.target_processes = [
|
|
795
|
+
"Apoptosis",
|
|
796
|
+
"Biological rhythms",
|
|
797
|
+
"Cell cycle",
|
|
798
|
+
"Host-virus interaction",
|
|
799
|
+
"Necrosis",
|
|
800
|
+
"Transcription",
|
|
801
|
+
"Transcription regulation",
|
|
802
|
+
]
|
|
803
|
+
|
|
804
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
805
|
+
gene_name = arguments.get("gene_name")
|
|
806
|
+
filter_processes = arguments.get("filter_processes", True)
|
|
807
|
+
|
|
808
|
+
if not gene_name:
|
|
809
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
810
|
+
|
|
811
|
+
# Request biological process data for the gene
|
|
812
|
+
columns = "g,gs,upbp"
|
|
813
|
+
result = self._make_api_request(gene_name, columns)
|
|
814
|
+
|
|
815
|
+
if "error" in result:
|
|
816
|
+
return result
|
|
817
|
+
|
|
818
|
+
if not result:
|
|
819
|
+
return {"error": "No gene data found"}
|
|
820
|
+
|
|
821
|
+
# Extract the first matching gene data
|
|
822
|
+
gene_data = result[0] if isinstance(result, list) and result else {}
|
|
823
|
+
|
|
824
|
+
# Parse biological processes
|
|
825
|
+
biological_processes = gene_data.get("Biological process", "")
|
|
826
|
+
if not biological_processes or biological_processes == "N/A":
|
|
827
|
+
return {
|
|
828
|
+
"gene_name": gene_name,
|
|
829
|
+
"gene_symbol": gene_data.get("Gene", gene_name),
|
|
830
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
831
|
+
"biological_processes": [],
|
|
832
|
+
"target_processes_found": [],
|
|
833
|
+
"target_process_names": [],
|
|
834
|
+
"total_processes": 0,
|
|
835
|
+
"target_processes_count": 0,
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
# Split and clean process list - handle both string and list formats
|
|
839
|
+
processes_list = []
|
|
840
|
+
if isinstance(biological_processes, list):
|
|
841
|
+
processes_list = biological_processes
|
|
842
|
+
elif isinstance(biological_processes, str):
|
|
843
|
+
# Usually separated by semicolon or comma
|
|
844
|
+
processes_list = [
|
|
845
|
+
p.strip()
|
|
846
|
+
for p in biological_processes.replace(";", ",").split(",")
|
|
847
|
+
if p.strip()
|
|
848
|
+
]
|
|
849
|
+
|
|
850
|
+
# Filter target processes
|
|
851
|
+
target_found = []
|
|
852
|
+
if filter_processes:
|
|
853
|
+
for process in processes_list:
|
|
854
|
+
for target in self.target_processes:
|
|
855
|
+
if target.lower() in process.lower():
|
|
856
|
+
target_found.append(
|
|
857
|
+
{"target_process": target, "full_description": process}
|
|
858
|
+
)
|
|
859
|
+
|
|
860
|
+
return {
|
|
861
|
+
"gene_name": gene_name,
|
|
862
|
+
"gene_symbol": gene_data.get("Gene", gene_name),
|
|
863
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
864
|
+
"biological_processes": processes_list,
|
|
865
|
+
"target_processes_found": target_found,
|
|
866
|
+
"target_process_names": [tp["target_process"] for tp in target_found],
|
|
867
|
+
"total_processes": len(processes_list),
|
|
868
|
+
"target_processes_count": len(target_found),
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
@register_tool("HPAGetCancerPrognosticsTool")
|
|
873
|
+
class HPAGetCancerPrognosticsTool(HPAJsonApiTool):
|
|
874
|
+
"""
|
|
875
|
+
Get prognostic value of a gene across various cancers.
|
|
876
|
+
Uses the efficient JSON API to retrieve cancer prognostic data.
|
|
877
|
+
"""
|
|
878
|
+
|
|
879
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
880
|
+
ensembl_id = arguments.get("ensembl_id")
|
|
881
|
+
if not ensembl_id:
|
|
882
|
+
return {"error": "Parameter 'ensembl_id' is required"}
|
|
883
|
+
|
|
884
|
+
data = self._make_api_request(ensembl_id)
|
|
885
|
+
if "error" in data:
|
|
886
|
+
return data
|
|
887
|
+
|
|
888
|
+
prognostics = []
|
|
889
|
+
for key, value in data.items():
|
|
890
|
+
if key.startswith("Cancer prognostics") and isinstance(value, dict):
|
|
891
|
+
cancer_type = key.replace("Cancer prognostics - ", "").strip()
|
|
892
|
+
if value and value.get("is_prognostic"):
|
|
893
|
+
prognostics.append(
|
|
894
|
+
{
|
|
895
|
+
"cancer_type": cancer_type,
|
|
896
|
+
"prognostic_type": value.get("prognostic type", "Unknown"),
|
|
897
|
+
"p_value": value.get("p_val", "N/A"),
|
|
898
|
+
"is_prognostic": value.get("is_prognostic", False),
|
|
899
|
+
}
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
return {
|
|
903
|
+
"ensembl_id": ensembl_id,
|
|
904
|
+
"gene": data.get("Gene", "Unknown"),
|
|
905
|
+
"gene_synonym": data.get("Gene synonym", ""),
|
|
906
|
+
"prognostic_cancers_count": len(prognostics),
|
|
907
|
+
"prognostic_summary": (
|
|
908
|
+
prognostics
|
|
909
|
+
if prognostics
|
|
910
|
+
else "No significant prognostic value found in the analyzed cancers."
|
|
911
|
+
),
|
|
912
|
+
"note": "Prognostic value indicates whether high/low expression of this gene correlates with patient survival in specific cancer types.",
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
@register_tool("HPAGetProteinInteractionsTool")
|
|
917
|
+
class HPAGetProteinInteractionsTool(HPASearchApiTool):
|
|
918
|
+
"""
|
|
919
|
+
Get protein-protein interaction partners for a gene.
|
|
920
|
+
Uses search API to retrieve interaction data.
|
|
921
|
+
"""
|
|
922
|
+
|
|
923
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
924
|
+
gene_name = arguments.get("gene_name")
|
|
925
|
+
if not gene_name:
|
|
926
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
927
|
+
|
|
928
|
+
# Use 'ppi' column to retrieve protein-protein interactions
|
|
929
|
+
columns = "g,gs,ppi"
|
|
930
|
+
result = self._make_api_request(gene_name, columns)
|
|
931
|
+
|
|
932
|
+
if "error" in result:
|
|
933
|
+
return result
|
|
934
|
+
|
|
935
|
+
if not result or not isinstance(result, list):
|
|
936
|
+
return {"error": f"No interaction data found for gene '{gene_name}'"}
|
|
937
|
+
|
|
938
|
+
gene_data = result[0]
|
|
939
|
+
interactions_str = gene_data.get("Protein-protein interaction", "")
|
|
940
|
+
|
|
941
|
+
if not interactions_str or interactions_str == "N/A":
|
|
942
|
+
return {
|
|
943
|
+
"gene": gene_data.get("Gene", gene_name),
|
|
944
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
945
|
+
"interactions": "No interaction data found.",
|
|
946
|
+
"interactor_count": 0,
|
|
947
|
+
"interactors": [],
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
# Parse interaction string (usually semicolon or comma separated)
|
|
951
|
+
interactors = [
|
|
952
|
+
i.strip()
|
|
953
|
+
for i in interactions_str.replace(";", ",").split(",")
|
|
954
|
+
if i.strip()
|
|
955
|
+
]
|
|
956
|
+
|
|
957
|
+
return {
|
|
958
|
+
"gene": gene_data.get("Gene", gene_name),
|
|
959
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
960
|
+
"interactor_count": len(interactors),
|
|
961
|
+
"interactors": interactors,
|
|
962
|
+
"interaction_summary": f"Found {len(interactors)} protein interaction partners",
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
@register_tool("HPAGetRnaExpressionByTissueTool")
|
|
967
|
+
class HPAGetRnaExpressionByTissueTool(HPAJsonApiTool):
|
|
968
|
+
"""
|
|
969
|
+
Query RNA expression levels for a gene in specific tissues.
|
|
970
|
+
More precise than general tissue expression queries.
|
|
971
|
+
"""
|
|
972
|
+
|
|
973
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
974
|
+
ensembl_id = arguments.get("ensembl_id")
|
|
975
|
+
tissue_names = arguments.get("tissue_names", [])
|
|
976
|
+
|
|
977
|
+
if not ensembl_id:
|
|
978
|
+
return {"error": "Parameter 'ensembl_id' is required"}
|
|
979
|
+
if not tissue_names or not isinstance(tissue_names, list):
|
|
980
|
+
# Provide helpful tissue name examples
|
|
981
|
+
example_tissues = [
|
|
982
|
+
"brain",
|
|
983
|
+
"liver",
|
|
984
|
+
"heart",
|
|
985
|
+
"kidney",
|
|
986
|
+
"lung",
|
|
987
|
+
"pancreas",
|
|
988
|
+
"skin",
|
|
989
|
+
"muscle",
|
|
990
|
+
]
|
|
991
|
+
return {
|
|
992
|
+
"error": f"Parameter 'tissue_names' is required and must be a list. Example: {example_tissues}"
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
data = self._make_api_request(ensembl_id)
|
|
996
|
+
if "error" in data:
|
|
997
|
+
return data
|
|
998
|
+
|
|
999
|
+
# Get RNA tissue expression data
|
|
1000
|
+
rna_data = data.get("RNA tissue specific nTPM", {})
|
|
1001
|
+
if not isinstance(rna_data, dict):
|
|
1002
|
+
return {"error": "No RNA tissue expression data available for this gene"}
|
|
1003
|
+
|
|
1004
|
+
expression_results = {}
|
|
1005
|
+
available_tissues = list(rna_data.keys())
|
|
1006
|
+
|
|
1007
|
+
for tissue in tissue_names:
|
|
1008
|
+
# Case-insensitive matching
|
|
1009
|
+
found_tissue = None
|
|
1010
|
+
for available_tissue in available_tissues:
|
|
1011
|
+
if (
|
|
1012
|
+
tissue.lower() in available_tissue.lower()
|
|
1013
|
+
or available_tissue.lower() in tissue.lower()
|
|
1014
|
+
):
|
|
1015
|
+
found_tissue = available_tissue
|
|
1016
|
+
break
|
|
1017
|
+
|
|
1018
|
+
if found_tissue:
|
|
1019
|
+
expression_results[tissue] = {
|
|
1020
|
+
"matched_tissue": found_tissue,
|
|
1021
|
+
"expression_value": rna_data[found_tissue],
|
|
1022
|
+
"expression_level": self._categorize_expression(
|
|
1023
|
+
rna_data[found_tissue]
|
|
1024
|
+
),
|
|
1025
|
+
}
|
|
1026
|
+
else:
|
|
1027
|
+
expression_results[tissue] = {
|
|
1028
|
+
"matched_tissue": "Not found",
|
|
1029
|
+
"expression_value": "N/A",
|
|
1030
|
+
"expression_level": "No data",
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
return {
|
|
1034
|
+
"ensembl_id": ensembl_id,
|
|
1035
|
+
"gene": data.get("Gene", "Unknown"),
|
|
1036
|
+
"gene_synonym": data.get("Gene synonym", ""),
|
|
1037
|
+
"expression_unit": "nTPM (normalized Transcripts Per Million)",
|
|
1038
|
+
"queried_tissues": tissue_names,
|
|
1039
|
+
"tissue_expression": expression_results,
|
|
1040
|
+
"available_tissues_sample": (
|
|
1041
|
+
available_tissues[:10]
|
|
1042
|
+
if len(available_tissues) > 10
|
|
1043
|
+
else available_tissues
|
|
1044
|
+
),
|
|
1045
|
+
"total_available_tissues": len(available_tissues),
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
def _categorize_expression(self, expr_value) -> str:
|
|
1049
|
+
"""Categorize expression level"""
|
|
1050
|
+
try:
|
|
1051
|
+
val = float(expr_value)
|
|
1052
|
+
if val > 50:
|
|
1053
|
+
return "Very high"
|
|
1054
|
+
elif val > 10:
|
|
1055
|
+
return "High"
|
|
1056
|
+
elif val > 1:
|
|
1057
|
+
return "Medium"
|
|
1058
|
+
elif val > 0.1:
|
|
1059
|
+
return "Low"
|
|
1060
|
+
else:
|
|
1061
|
+
return "Very low"
|
|
1062
|
+
except (ValueError, TypeError):
|
|
1063
|
+
return "Unknown"
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
@register_tool("HPAGetContextualBiologicalProcessTool")
|
|
1067
|
+
class HPAGetContextualBiologicalProcessTool(BaseTool):
|
|
1068
|
+
"""
|
|
1069
|
+
Analyze a gene's biological processes in the context of specific tissue or cell line.
|
|
1070
|
+
Enhanced with intelligent context validation and recommendation.
|
|
1071
|
+
"""
|
|
1072
|
+
|
|
1073
|
+
def __init__(self, tool_config):
|
|
1074
|
+
super().__init__(tool_config)
|
|
1075
|
+
# Define all valid context options
|
|
1076
|
+
self.valid_contexts = {
|
|
1077
|
+
"tissues": [
|
|
1078
|
+
"adipose_tissue",
|
|
1079
|
+
"adrenal_gland",
|
|
1080
|
+
"appendix",
|
|
1081
|
+
"bone_marrow",
|
|
1082
|
+
"brain",
|
|
1083
|
+
"breast",
|
|
1084
|
+
"bronchus",
|
|
1085
|
+
"cerebellum",
|
|
1086
|
+
"cerebral_cortex",
|
|
1087
|
+
"cervix",
|
|
1088
|
+
"colon",
|
|
1089
|
+
"duodenum",
|
|
1090
|
+
"endometrium",
|
|
1091
|
+
"esophagus",
|
|
1092
|
+
"gallbladder",
|
|
1093
|
+
"heart_muscle",
|
|
1094
|
+
"kidney",
|
|
1095
|
+
"liver",
|
|
1096
|
+
"lung",
|
|
1097
|
+
"lymph_node",
|
|
1098
|
+
"ovary",
|
|
1099
|
+
"pancreas",
|
|
1100
|
+
"placenta",
|
|
1101
|
+
"prostate",
|
|
1102
|
+
"rectum",
|
|
1103
|
+
"salivary_gland",
|
|
1104
|
+
"skeletal_muscle",
|
|
1105
|
+
"skin",
|
|
1106
|
+
"small_intestine",
|
|
1107
|
+
"spleen",
|
|
1108
|
+
"stomach",
|
|
1109
|
+
"testis",
|
|
1110
|
+
"thymus",
|
|
1111
|
+
"thyroid_gland",
|
|
1112
|
+
"urinary_bladder",
|
|
1113
|
+
"vagina",
|
|
1114
|
+
],
|
|
1115
|
+
"cell_lines": [
|
|
1116
|
+
"hela",
|
|
1117
|
+
"mcf7",
|
|
1118
|
+
"a549",
|
|
1119
|
+
"hepg2",
|
|
1120
|
+
"jurkat",
|
|
1121
|
+
"pc3",
|
|
1122
|
+
"rh30",
|
|
1123
|
+
"siha",
|
|
1124
|
+
"u251",
|
|
1125
|
+
],
|
|
1126
|
+
"blood_cells": [
|
|
1127
|
+
"t_cell",
|
|
1128
|
+
"b_cell",
|
|
1129
|
+
"nk_cell",
|
|
1130
|
+
"monocyte",
|
|
1131
|
+
"neutrophil",
|
|
1132
|
+
"eosinophil",
|
|
1133
|
+
],
|
|
1134
|
+
"brain_regions": [
|
|
1135
|
+
"cerebellum",
|
|
1136
|
+
"cerebral_cortex",
|
|
1137
|
+
"hippocampus",
|
|
1138
|
+
"hypothalamus",
|
|
1139
|
+
"amygdala",
|
|
1140
|
+
],
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
def _validate_context(self, context_name: str) -> Dict[str, Any]:
|
|
1144
|
+
"""Validate context_name and provide intelligent recommendations"""
|
|
1145
|
+
context_lower = context_name.lower().replace(" ", "_").replace("-", "_")
|
|
1146
|
+
|
|
1147
|
+
# Check all valid contexts
|
|
1148
|
+
all_valid = []
|
|
1149
|
+
for category, contexts in self.valid_contexts.items():
|
|
1150
|
+
all_valid.extend(contexts)
|
|
1151
|
+
if context_lower in contexts:
|
|
1152
|
+
return {"valid": True, "category": category}
|
|
1153
|
+
|
|
1154
|
+
# Find similar contexts (fuzzy matching)
|
|
1155
|
+
similar_contexts = []
|
|
1156
|
+
context_keywords = context_lower.split("_")
|
|
1157
|
+
|
|
1158
|
+
for valid_context in all_valid:
|
|
1159
|
+
for keyword in context_keywords:
|
|
1160
|
+
if keyword in valid_context.lower() or valid_context.lower() in keyword:
|
|
1161
|
+
similar_contexts.append(valid_context)
|
|
1162
|
+
break
|
|
1163
|
+
|
|
1164
|
+
return {
|
|
1165
|
+
"valid": False,
|
|
1166
|
+
"input": context_name,
|
|
1167
|
+
"similar_suggestions": similar_contexts[:5], # Top 5 suggestions
|
|
1168
|
+
"all_tissues": self.valid_contexts["tissues"][:10], # First 10 tissues
|
|
1169
|
+
"all_cell_lines": self.valid_contexts["cell_lines"],
|
|
1170
|
+
"total_available": len(all_valid),
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
1174
|
+
gene_name = arguments.get("gene_name")
|
|
1175
|
+
context_name = arguments.get("context_name")
|
|
1176
|
+
|
|
1177
|
+
if not gene_name:
|
|
1178
|
+
return {"error": "Parameter 'gene_name' is required"}
|
|
1179
|
+
if not context_name:
|
|
1180
|
+
return {"error": "Parameter 'context_name' is required"}
|
|
1181
|
+
|
|
1182
|
+
# Validate context_name and provide recommendations if invalid
|
|
1183
|
+
validation = self._validate_context(context_name)
|
|
1184
|
+
if not validation["valid"]:
|
|
1185
|
+
error_msg = f"Invalid context_name '{validation['input']}'. "
|
|
1186
|
+
if validation["similar_suggestions"]:
|
|
1187
|
+
error_msg += f"Similar options: {validation['similar_suggestions']}. "
|
|
1188
|
+
error_msg += f"Available tissues: {validation['all_tissues']}... "
|
|
1189
|
+
error_msg += f"Available cell lines: {validation['all_cell_lines']}. "
|
|
1190
|
+
error_msg += f"Total {validation['total_available']} contexts available."
|
|
1191
|
+
return {"error": error_msg}
|
|
1192
|
+
|
|
1193
|
+
try:
|
|
1194
|
+
# Step 1: Get gene basic info and Ensembl ID
|
|
1195
|
+
search_api = HPASearchApiTool({})
|
|
1196
|
+
search_result = search_api._make_api_request(gene_name, "g,gs,eg,upbp")
|
|
1197
|
+
|
|
1198
|
+
if "error" in search_result or not search_result:
|
|
1199
|
+
return {"error": f"Could not find gene information for '{gene_name}'"}
|
|
1200
|
+
|
|
1201
|
+
gene_data = (
|
|
1202
|
+
search_result[0] if isinstance(search_result, list) else search_result
|
|
1203
|
+
)
|
|
1204
|
+
ensembl_id = gene_data.get("Ensembl", "")
|
|
1205
|
+
|
|
1206
|
+
if not ensembl_id:
|
|
1207
|
+
return {"error": f"Could not find Ensembl ID for gene '{gene_name}'"}
|
|
1208
|
+
|
|
1209
|
+
# Step 2: Get biological processes
|
|
1210
|
+
biological_processes = gene_data.get("Biological process", "")
|
|
1211
|
+
processes_list = []
|
|
1212
|
+
if biological_processes and biological_processes != "N/A":
|
|
1213
|
+
if isinstance(biological_processes, list):
|
|
1214
|
+
processes_list = biological_processes
|
|
1215
|
+
elif isinstance(biological_processes, str):
|
|
1216
|
+
processes_list = [
|
|
1217
|
+
p.strip()
|
|
1218
|
+
for p in biological_processes.replace(";", ",").split(",")
|
|
1219
|
+
if p.strip()
|
|
1220
|
+
]
|
|
1221
|
+
|
|
1222
|
+
# Step 3: Get expression in context with improved error handling
|
|
1223
|
+
json_api = HPAJsonApiTool({})
|
|
1224
|
+
json_data = json_api._make_api_request(ensembl_id)
|
|
1225
|
+
|
|
1226
|
+
expression_value = "N/A"
|
|
1227
|
+
expression_level = "not expressed"
|
|
1228
|
+
context_type = (
|
|
1229
|
+
validation["category"].replace("_", " ").rstrip("s")
|
|
1230
|
+
) # "tissues" -> "tissue"
|
|
1231
|
+
|
|
1232
|
+
if "error" not in json_data and json_data:
|
|
1233
|
+
# FIXED: Check if rna_data is not None before calling .keys()
|
|
1234
|
+
rna_data = json_data.get("RNA tissue specific nTPM")
|
|
1235
|
+
if rna_data and isinstance(rna_data, dict):
|
|
1236
|
+
# Try to find matching tissue
|
|
1237
|
+
for tissue_key in rna_data.keys():
|
|
1238
|
+
if (
|
|
1239
|
+
context_name.lower() in tissue_key.lower()
|
|
1240
|
+
or tissue_key.lower() in context_name.lower()
|
|
1241
|
+
):
|
|
1242
|
+
expression_value = rna_data[tissue_key]
|
|
1243
|
+
break
|
|
1244
|
+
|
|
1245
|
+
# If not found in tissues and it's a cell line, try cell line data
|
|
1246
|
+
if expression_value == "N/A" and validation["category"] == "cell_lines":
|
|
1247
|
+
context_type = "cell line"
|
|
1248
|
+
cell_line_columns = {
|
|
1249
|
+
"hela": "cell_RNA_hela",
|
|
1250
|
+
"mcf7": "cell_RNA_mcf7",
|
|
1251
|
+
"a549": "cell_RNA_a549",
|
|
1252
|
+
"hepg2": "cell_RNA_hepg2",
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
cell_column = cell_line_columns.get(context_name.lower())
|
|
1256
|
+
if cell_column:
|
|
1257
|
+
cell_result = search_api._make_api_request(
|
|
1258
|
+
gene_name, f"g,{cell_column}"
|
|
1259
|
+
)
|
|
1260
|
+
if "error" not in cell_result and cell_result:
|
|
1261
|
+
expression_value = cell_result[0].get(cell_column, "N/A")
|
|
1262
|
+
|
|
1263
|
+
# Categorize expression level
|
|
1264
|
+
try:
|
|
1265
|
+
expr_val = float(expression_value) if expression_value != "N/A" else 0
|
|
1266
|
+
if expr_val > 10:
|
|
1267
|
+
expression_level = "highly expressed"
|
|
1268
|
+
elif expr_val > 1:
|
|
1269
|
+
expression_level = "moderately expressed"
|
|
1270
|
+
elif expr_val > 0.1:
|
|
1271
|
+
expression_level = "expressed at low level"
|
|
1272
|
+
else:
|
|
1273
|
+
expression_level = "not expressed or very low"
|
|
1274
|
+
except (ValueError, TypeError):
|
|
1275
|
+
expression_level = "expression level unclear"
|
|
1276
|
+
|
|
1277
|
+
# Generate contextual conclusion
|
|
1278
|
+
relevance = (
|
|
1279
|
+
"may be functionally relevant"
|
|
1280
|
+
if "expressed" in expression_level and "not" not in expression_level
|
|
1281
|
+
else "is likely not functionally relevant"
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
conclusion = f"Gene {gene_name} is involved in {len(processes_list)} biological processes. It is {expression_level} in {context_name} ({expression_value} nTPM), suggesting its functional roles {relevance} in this {context_type} context."
|
|
1285
|
+
|
|
1286
|
+
return {
|
|
1287
|
+
"gene": gene_data.get("Gene", gene_name),
|
|
1288
|
+
"gene_synonym": gene_data.get("Gene synonym", ""),
|
|
1289
|
+
"ensembl_id": ensembl_id,
|
|
1290
|
+
"context": context_name,
|
|
1291
|
+
"context_type": context_type,
|
|
1292
|
+
"context_category": validation["category"],
|
|
1293
|
+
"expression_in_context": f"{expression_value} nTPM",
|
|
1294
|
+
"expression_level": expression_level,
|
|
1295
|
+
"total_biological_processes": len(processes_list),
|
|
1296
|
+
"biological_processes": (
|
|
1297
|
+
processes_list[:10] if len(processes_list) > 10 else processes_list
|
|
1298
|
+
),
|
|
1299
|
+
"contextual_conclusion": conclusion,
|
|
1300
|
+
"functional_relevance": relevance,
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
except Exception as e:
|
|
1304
|
+
return {"error": f"Failed to perform contextual analysis: {str(e)}"}
|
|
1305
|
+
|
|
1306
|
+
|
|
1307
|
+
# --- Keep existing comprehensive gene details tool for images ---
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
@register_tool("HPAGetGenePageDetailsTool")
|
|
1311
|
+
class HPAGetGenePageDetailsTool(HPAXmlApiTool):
|
|
1312
|
+
"""
|
|
1313
|
+
Get detailed information about a gene page, including images, protein expression, antibody data, etc.
|
|
1314
|
+
Get the most comprehensive data by parsing HPA's single gene XML endpoint.
|
|
1315
|
+
Enhanced version with improved image extraction and comprehensive data parsing based on optimization plan.
|
|
1316
|
+
"""
|
|
1317
|
+
|
|
1318
|
+
def __init__(self, tool_config):
|
|
1319
|
+
super().__init__(tool_config)
|
|
1320
|
+
|
|
1321
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
1322
|
+
ensembl_id = arguments.get("ensembl_id")
|
|
1323
|
+
include_images = arguments.get("include_images", True)
|
|
1324
|
+
include_antibodies = arguments.get("include_antibodies", True)
|
|
1325
|
+
include_expression = arguments.get("include_expression", True)
|
|
1326
|
+
|
|
1327
|
+
if not ensembl_id:
|
|
1328
|
+
return {"error": "Parameter 'ensembl_id' is required"}
|
|
1329
|
+
|
|
1330
|
+
try:
|
|
1331
|
+
root = self._make_api_request(ensembl_id)
|
|
1332
|
+
return self._parse_gene_xml(
|
|
1333
|
+
root, ensembl_id, include_images, include_antibodies, include_expression
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
except Exception as e:
|
|
1337
|
+
return {"error": str(e)}
|
|
1338
|
+
|
|
1339
|
+
def _parse_gene_xml(
|
|
1340
|
+
self,
|
|
1341
|
+
root: ET.Element,
|
|
1342
|
+
ensembl_id: str,
|
|
1343
|
+
include_images: bool,
|
|
1344
|
+
include_antibodies: bool,
|
|
1345
|
+
include_expression: bool,
|
|
1346
|
+
) -> Dict[str, Any]:
|
|
1347
|
+
"""Parse gene XML data comprehensively based on actual HPA XML schema"""
|
|
1348
|
+
result = {
|
|
1349
|
+
"ensembl_id": ensembl_id,
|
|
1350
|
+
"gene_name": "",
|
|
1351
|
+
"gene_description": "",
|
|
1352
|
+
"chromosome_location": "",
|
|
1353
|
+
"uniprot_ids": [],
|
|
1354
|
+
"summary": {},
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
# Extract basic gene information from entry element
|
|
1358
|
+
entry_elem = root.find(".//entry")
|
|
1359
|
+
if entry_elem is not None:
|
|
1360
|
+
# Gene name
|
|
1361
|
+
name_elem = entry_elem.find("name")
|
|
1362
|
+
if name_elem is not None:
|
|
1363
|
+
result["gene_name"] = name_elem.text or ""
|
|
1364
|
+
|
|
1365
|
+
# Gene synonyms
|
|
1366
|
+
synonyms = []
|
|
1367
|
+
for synonym_elem in entry_elem.findall("synonym"):
|
|
1368
|
+
if synonym_elem.text:
|
|
1369
|
+
synonyms.append(synonym_elem.text)
|
|
1370
|
+
result["gene_synonyms"] = synonyms
|
|
1371
|
+
|
|
1372
|
+
# Extract Uniprot IDs from identifier/xref elements
|
|
1373
|
+
identifier_elem = entry_elem.find("identifier")
|
|
1374
|
+
if identifier_elem is not None:
|
|
1375
|
+
for xref in identifier_elem.findall("xref"):
|
|
1376
|
+
if xref.get("db") == "Uniprot/SWISSPROT":
|
|
1377
|
+
result["uniprot_ids"].append(xref.get("id", ""))
|
|
1378
|
+
|
|
1379
|
+
# Extract protein classes
|
|
1380
|
+
protein_classes = []
|
|
1381
|
+
protein_classes_elem = entry_elem.find("proteinClasses")
|
|
1382
|
+
if protein_classes_elem is not None:
|
|
1383
|
+
for pc in protein_classes_elem.findall("proteinClass"):
|
|
1384
|
+
class_name = pc.get("name", "")
|
|
1385
|
+
if class_name:
|
|
1386
|
+
protein_classes.append(class_name)
|
|
1387
|
+
result["protein_classes"] = protein_classes
|
|
1388
|
+
|
|
1389
|
+
# Extract image information with enhanced parsing
|
|
1390
|
+
if include_images:
|
|
1391
|
+
result["ihc_images"] = self._extract_ihc_images(root)
|
|
1392
|
+
result["if_images"] = self._extract_if_images(root)
|
|
1393
|
+
|
|
1394
|
+
# Extract antibody information
|
|
1395
|
+
if include_antibodies:
|
|
1396
|
+
result["antibodies"] = self._extract_antibodies(root)
|
|
1397
|
+
|
|
1398
|
+
# Extract expression information
|
|
1399
|
+
if include_expression:
|
|
1400
|
+
result["expression_summary"] = self._extract_expression_summary(root)
|
|
1401
|
+
result["tissue_expression"] = self._extract_tissue_expression(root)
|
|
1402
|
+
result["cell_line_expression"] = self._extract_cell_line_expression(root)
|
|
1403
|
+
|
|
1404
|
+
# Extract summary statistics
|
|
1405
|
+
result["summary"] = {
|
|
1406
|
+
"total_antibodies": len(result.get("antibodies", [])),
|
|
1407
|
+
"total_ihc_images": len(result.get("ihc_images", [])),
|
|
1408
|
+
"total_if_images": len(result.get("if_images", [])),
|
|
1409
|
+
"tissues_with_expression": len(result.get("tissue_expression", [])),
|
|
1410
|
+
"cell_lines_with_expression": len(result.get("cell_line_expression", [])),
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
return result
|
|
1414
|
+
|
|
1415
|
+
def _extract_ihc_images(self, root: ET.Element) -> List[Dict[str, Any]]:
|
|
1416
|
+
"""Extract tissue immunohistochemistry (IHC) images based on actual HPA XML structure"""
|
|
1417
|
+
images = []
|
|
1418
|
+
|
|
1419
|
+
# Find tissueExpression elements which contain IHC images
|
|
1420
|
+
for tissue_expr in root.findall(".//tissueExpression"):
|
|
1421
|
+
# Extract selected images from tissueExpression
|
|
1422
|
+
for image_elem in tissue_expr.findall(".//image"):
|
|
1423
|
+
image_type = image_elem.get("imageType", "")
|
|
1424
|
+
if image_type == "selected":
|
|
1425
|
+
tissue_elem = image_elem.find("tissue")
|
|
1426
|
+
image_url_elem = image_elem.find("imageUrl")
|
|
1427
|
+
|
|
1428
|
+
if tissue_elem is not None and image_url_elem is not None:
|
|
1429
|
+
tissue_name = tissue_elem.text or ""
|
|
1430
|
+
organ = tissue_elem.get("organ", "")
|
|
1431
|
+
ontology_terms = tissue_elem.get("ontologyTerms", "")
|
|
1432
|
+
image_url = image_url_elem.text or ""
|
|
1433
|
+
|
|
1434
|
+
images.append(
|
|
1435
|
+
{
|
|
1436
|
+
"image_type": "Immunohistochemistry",
|
|
1437
|
+
"tissue_name": tissue_name,
|
|
1438
|
+
"organ": organ,
|
|
1439
|
+
"ontology_terms": ontology_terms,
|
|
1440
|
+
"image_url": image_url,
|
|
1441
|
+
"selected": True,
|
|
1442
|
+
}
|
|
1443
|
+
)
|
|
1444
|
+
|
|
1445
|
+
return images
|
|
1446
|
+
|
|
1447
|
+
def _extract_if_images(self, root: ET.Element) -> List[Dict[str, Any]]:
|
|
1448
|
+
"""Extract subcellular immunofluorescence (IF) images based on actual HPA XML structure"""
|
|
1449
|
+
images = []
|
|
1450
|
+
|
|
1451
|
+
# Look for subcellular expression data (IF images are typically in subcellular sections)
|
|
1452
|
+
for subcell_expr in root.findall(".//subcellularExpression"):
|
|
1453
|
+
# Extract subcellular location images
|
|
1454
|
+
for image_elem in subcell_expr.findall(".//image"):
|
|
1455
|
+
image_type = image_elem.get("imageType", "")
|
|
1456
|
+
if image_type == "selected":
|
|
1457
|
+
location_elem = image_elem.find("location")
|
|
1458
|
+
image_url_elem = image_elem.find("imageUrl")
|
|
1459
|
+
|
|
1460
|
+
if location_elem is not None and image_url_elem is not None:
|
|
1461
|
+
location_name = location_elem.text or ""
|
|
1462
|
+
image_url = image_url_elem.text or ""
|
|
1463
|
+
|
|
1464
|
+
images.append(
|
|
1465
|
+
{
|
|
1466
|
+
"image_type": "Immunofluorescence",
|
|
1467
|
+
"subcellular_location": location_name,
|
|
1468
|
+
"image_url": image_url,
|
|
1469
|
+
"selected": True,
|
|
1470
|
+
}
|
|
1471
|
+
)
|
|
1472
|
+
|
|
1473
|
+
return images
|
|
1474
|
+
|
|
1475
|
+
def _extract_antibodies(self, root: ET.Element) -> List[Dict[str, Any]]:
|
|
1476
|
+
"""Extract antibody information from actual HPA XML structure"""
|
|
1477
|
+
antibodies_data = []
|
|
1478
|
+
|
|
1479
|
+
# Look for antibody references in various expression sections
|
|
1480
|
+
antibody_ids = set()
|
|
1481
|
+
|
|
1482
|
+
# Look for antibody references in tissue expression
|
|
1483
|
+
for tissue_expr in root.findall(".//tissueExpression"):
|
|
1484
|
+
for elem in tissue_expr.iter():
|
|
1485
|
+
if "antibody" in elem.tag.lower() or elem.get("antibody"):
|
|
1486
|
+
antibody_id = elem.get("antibody") or elem.text
|
|
1487
|
+
if antibody_id:
|
|
1488
|
+
antibody_ids.add(antibody_id)
|
|
1489
|
+
|
|
1490
|
+
# Create basic antibody info for found IDs
|
|
1491
|
+
for antibody_id in antibody_ids:
|
|
1492
|
+
antibodies_data.append(
|
|
1493
|
+
{
|
|
1494
|
+
"antibody_id": antibody_id,
|
|
1495
|
+
"source": "HPA",
|
|
1496
|
+
"applications": ["IHC", "IF"],
|
|
1497
|
+
"validation_status": "Available",
|
|
1498
|
+
}
|
|
1499
|
+
)
|
|
1500
|
+
|
|
1501
|
+
# If no specific antibody IDs found, create a placeholder
|
|
1502
|
+
if not antibodies_data:
|
|
1503
|
+
antibodies_data.append(
|
|
1504
|
+
{
|
|
1505
|
+
"antibody_id": "HPA_antibody",
|
|
1506
|
+
"source": "HPA",
|
|
1507
|
+
"applications": ["IHC", "IF"],
|
|
1508
|
+
"validation_status": "Available",
|
|
1509
|
+
}
|
|
1510
|
+
)
|
|
1511
|
+
|
|
1512
|
+
return antibodies_data
|
|
1513
|
+
|
|
1514
|
+
def _extract_expression_summary(self, root: ET.Element) -> Dict[str, Any]:
|
|
1515
|
+
"""Extract expression summary information from actual HPA XML structure"""
|
|
1516
|
+
summary = {
|
|
1517
|
+
"tissue_specificity": "",
|
|
1518
|
+
"subcellular_location": [],
|
|
1519
|
+
"protein_class": [],
|
|
1520
|
+
"predicted_location": "",
|
|
1521
|
+
"tissue_expression_summary": "",
|
|
1522
|
+
"subcellular_expression_summary": "",
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
# Extract predicted location
|
|
1526
|
+
predicted_location_elem = root.find(".//predictedLocation")
|
|
1527
|
+
if predicted_location_elem is not None:
|
|
1528
|
+
summary["predicted_location"] = predicted_location_elem.text or ""
|
|
1529
|
+
|
|
1530
|
+
# Extract tissue expression summary
|
|
1531
|
+
tissue_expr_elem = root.find(".//tissueExpression")
|
|
1532
|
+
if tissue_expr_elem is not None:
|
|
1533
|
+
tissue_summary_elem = tissue_expr_elem.find("summary")
|
|
1534
|
+
if tissue_summary_elem is not None:
|
|
1535
|
+
summary["tissue_expression_summary"] = tissue_summary_elem.text or ""
|
|
1536
|
+
|
|
1537
|
+
# Extract subcellular expression summary
|
|
1538
|
+
subcell_expr_elem = root.find(".//subcellularExpression")
|
|
1539
|
+
if subcell_expr_elem is not None:
|
|
1540
|
+
subcell_summary_elem = subcell_expr_elem.find("summary")
|
|
1541
|
+
if subcell_summary_elem is not None:
|
|
1542
|
+
summary["subcellular_expression_summary"] = (
|
|
1543
|
+
subcell_summary_elem.text or ""
|
|
1544
|
+
)
|
|
1545
|
+
|
|
1546
|
+
return summary
|
|
1547
|
+
|
|
1548
|
+
def _extract_tissue_expression(self, root: ET.Element) -> List[Dict[str, Any]]:
|
|
1549
|
+
"""Extract detailed tissue expression data from actual HPA XML structure"""
|
|
1550
|
+
tissue_data = []
|
|
1551
|
+
|
|
1552
|
+
# Extract from tissueExpression data elements
|
|
1553
|
+
for tissue_expr in root.findall(".//tissueExpression"):
|
|
1554
|
+
for data_elem in tissue_expr.findall(".//data"):
|
|
1555
|
+
tissue_elem = data_elem.find("tissue")
|
|
1556
|
+
level_elem = data_elem.find("level")
|
|
1557
|
+
|
|
1558
|
+
if tissue_elem is not None:
|
|
1559
|
+
tissue_info = {
|
|
1560
|
+
"tissue_name": tissue_elem.text or "",
|
|
1561
|
+
"organ": tissue_elem.get("organ", ""),
|
|
1562
|
+
"expression_level": "",
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
if level_elem is not None:
|
|
1566
|
+
tissue_info["expression_level"] = (
|
|
1567
|
+
level_elem.get("type", "") + ": " + (level_elem.text or "")
|
|
1568
|
+
)
|
|
1569
|
+
|
|
1570
|
+
tissue_data.append(tissue_info)
|
|
1571
|
+
|
|
1572
|
+
return tissue_data
|
|
1573
|
+
|
|
1574
|
+
def _extract_cell_line_expression(self, root: ET.Element) -> List[Dict[str, Any]]:
|
|
1575
|
+
"""Extract cell line expression data from actual HPA XML structure"""
|
|
1576
|
+
cell_line_data = []
|
|
1577
|
+
|
|
1578
|
+
# Look for cell line expression in subcellular expression
|
|
1579
|
+
for subcell_expr in root.findall(".//subcellularExpression"):
|
|
1580
|
+
for data_elem in subcell_expr.findall(".//data"):
|
|
1581
|
+
cell_line_elem = data_elem.find("cellLine")
|
|
1582
|
+
if cell_line_elem is not None:
|
|
1583
|
+
cell_info = {
|
|
1584
|
+
"cell_line_name": cell_line_elem.get("name", "")
|
|
1585
|
+
or (cell_line_elem.text or ""),
|
|
1586
|
+
"expression_data": [],
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
if cell_info["expression_data"]:
|
|
1590
|
+
cell_line_data.append(cell_info)
|
|
1591
|
+
|
|
1592
|
+
return cell_line_data
|
|
1593
|
+
|
|
1594
|
+
|
|
1595
|
+
# --- Legacy/Compatibility Tools ---
|
|
1596
|
+
|
|
1597
|
+
|
|
1598
|
+
@register_tool("HPAGetGeneJSONTool")
|
|
1599
|
+
class HPAGetGeneJSONTool(HPAJsonApiTool):
|
|
1600
|
+
"""
|
|
1601
|
+
Enhanced legacy tool - Get basic gene information using Ensembl Gene ID.
|
|
1602
|
+
Now uses the efficient JSON API instead of search API.
|
|
1603
|
+
"""
|
|
1604
|
+
|
|
1605
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
1606
|
+
ensembl_id = arguments.get("ensembl_id")
|
|
1607
|
+
if not ensembl_id:
|
|
1608
|
+
return {"error": "Parameter 'ensembl_id' is required"}
|
|
1609
|
+
|
|
1610
|
+
# Use JSON API to get comprehensive information
|
|
1611
|
+
data = self._make_api_request(ensembl_id)
|
|
1612
|
+
|
|
1613
|
+
if "error" in data:
|
|
1614
|
+
return data
|
|
1615
|
+
|
|
1616
|
+
# Convert to response similar to original JSON format for compatibility
|
|
1617
|
+
return {
|
|
1618
|
+
"Ensembl": ensembl_id,
|
|
1619
|
+
"Gene": data.get("Gene", ""),
|
|
1620
|
+
"Gene synonym": data.get("Gene synonym", ""),
|
|
1621
|
+
"Uniprot": data.get("Uniprot", ""),
|
|
1622
|
+
"Biological process": data.get("Biological process", ""),
|
|
1623
|
+
"RNA tissue specific nTPM": data.get("RNA tissue specific nTPM", ""),
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
|
|
1627
|
+
@register_tool("HPAGetGeneXMLTool")
|
|
1628
|
+
class HPAGetGeneXMLTool(HPASearchApiTool):
|
|
1629
|
+
"""
|
|
1630
|
+
Legacy tool - Get gene TSV format data (alternative to XML).
|
|
1631
|
+
"""
|
|
1632
|
+
|
|
1633
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
1634
|
+
ensembl_id = arguments.get("ensembl_id")
|
|
1635
|
+
if not ensembl_id:
|
|
1636
|
+
return {"error": "Parameter 'ensembl_id' is required"}
|
|
1637
|
+
|
|
1638
|
+
# Use TSV format to get detailed data
|
|
1639
|
+
columns = "g,gs,up,upbp,rnatsm,cell_RNA_a549,cell_RNA_hela"
|
|
1640
|
+
result = self._make_api_request(ensembl_id, columns, format_type="tsv")
|
|
1641
|
+
|
|
1642
|
+
if "error" in result:
|
|
1643
|
+
return result
|
|
1644
|
+
|
|
1645
|
+
return {"tsv_data": result.get("tsv_data", "")}
|