tooluniverse 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +340 -4
- tooluniverse/admetai_tool.py +84 -0
- tooluniverse/agentic_tool.py +563 -0
- tooluniverse/alphafold_tool.py +96 -0
- tooluniverse/base_tool.py +129 -6
- tooluniverse/boltz_tool.py +207 -0
- tooluniverse/chem_tool.py +192 -0
- tooluniverse/compose_scripts/__init__.py +1 -0
- tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
- tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
- tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
- tooluniverse/compose_scripts/literature_tool.py +34 -0
- tooluniverse/compose_scripts/output_summarizer.py +279 -0
- tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
- tooluniverse/compose_scripts/tool_discover.py +705 -0
- tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
- tooluniverse/compose_tool.py +371 -0
- tooluniverse/ctg_tool.py +1002 -0
- tooluniverse/custom_tool.py +81 -0
- tooluniverse/dailymed_tool.py +108 -0
- tooluniverse/data/admetai_tools.json +155 -0
- tooluniverse/data/agentic_tools.json +1156 -0
- tooluniverse/data/alphafold_tools.json +87 -0
- tooluniverse/data/boltz_tools.json +9 -0
- tooluniverse/data/chembl_tools.json +16 -0
- tooluniverse/data/clait_tools.json +108 -0
- tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
- tooluniverse/data/compose_tools.json +202 -0
- tooluniverse/data/dailymed_tools.json +70 -0
- tooluniverse/data/dataset_tools.json +646 -0
- tooluniverse/data/disease_target_score_tools.json +712 -0
- tooluniverse/data/efo_tools.json +17 -0
- tooluniverse/data/embedding_tools.json +319 -0
- tooluniverse/data/enrichr_tools.json +31 -0
- tooluniverse/data/europe_pmc_tools.json +22 -0
- tooluniverse/data/expert_feedback_tools.json +10 -0
- tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
- tooluniverse/data/fda_drug_labeling_tools.json +1 -1
- tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
- tooluniverse/data/finder_tools.json +209 -0
- tooluniverse/data/gene_ontology_tools.json +113 -0
- tooluniverse/data/gwas_tools.json +1082 -0
- tooluniverse/data/hpa_tools.json +333 -0
- tooluniverse/data/humanbase_tools.json +47 -0
- tooluniverse/data/idmap_tools.json +74 -0
- tooluniverse/data/mcp_client_tools_example.json +113 -0
- tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
- tooluniverse/data/medlineplus_tools.json +141 -0
- tooluniverse/data/monarch_tools.json +1 -1
- tooluniverse/data/openalex_tools.json +36 -0
- tooluniverse/data/opentarget_tools.json +1 -1
- tooluniverse/data/output_summarization_tools.json +101 -0
- tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
- tooluniverse/data/packages/categorized_tools.txt +206 -0
- tooluniverse/data/packages/cheminformatics_tools.json +347 -0
- tooluniverse/data/packages/earth_sciences_tools.json +74 -0
- tooluniverse/data/packages/genomics_tools.json +776 -0
- tooluniverse/data/packages/image_processing_tools.json +38 -0
- tooluniverse/data/packages/machine_learning_tools.json +789 -0
- tooluniverse/data/packages/neuroscience_tools.json +62 -0
- tooluniverse/data/packages/original_tools.txt +0 -0
- tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
- tooluniverse/data/packages/scientific_computing_tools.json +560 -0
- tooluniverse/data/packages/single_cell_tools.json +453 -0
- tooluniverse/data/packages/software_tools.json +4954 -0
- tooluniverse/data/packages/structural_biology_tools.json +396 -0
- tooluniverse/data/packages/visualization_tools.json +399 -0
- tooluniverse/data/pubchem_tools.json +215 -0
- tooluniverse/data/pubtator_tools.json +68 -0
- tooluniverse/data/rcsb_pdb_tools.json +1332 -0
- tooluniverse/data/reactome_tools.json +19 -0
- tooluniverse/data/semantic_scholar_tools.json +26 -0
- tooluniverse/data/special_tools.json +2 -25
- tooluniverse/data/tool_composition_tools.json +88 -0
- tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
- tooluniverse/data/txagent_client_tools.json +9 -0
- tooluniverse/data/uniprot_tools.json +211 -0
- tooluniverse/data/url_fetch_tools.json +94 -0
- tooluniverse/data/uspto_downloader_tools.json +9 -0
- tooluniverse/data/uspto_tools.json +811 -0
- tooluniverse/data/xml_tools.json +3275 -0
- tooluniverse/dataset_tool.py +296 -0
- tooluniverse/default_config.py +165 -0
- tooluniverse/efo_tool.py +42 -0
- tooluniverse/embedding_database.py +630 -0
- tooluniverse/embedding_sync.py +396 -0
- tooluniverse/enrichr_tool.py +266 -0
- tooluniverse/europe_pmc_tool.py +52 -0
- tooluniverse/execute_function.py +1775 -95
- tooluniverse/extended_hooks.py +444 -0
- tooluniverse/gene_ontology_tool.py +194 -0
- tooluniverse/graphql_tool.py +158 -36
- tooluniverse/gwas_tool.py +358 -0
- tooluniverse/hpa_tool.py +1645 -0
- tooluniverse/humanbase_tool.py +389 -0
- tooluniverse/logging_config.py +254 -0
- tooluniverse/mcp_client_tool.py +764 -0
- tooluniverse/mcp_integration.py +413 -0
- tooluniverse/mcp_tool_registry.py +925 -0
- tooluniverse/medlineplus_tool.py +337 -0
- tooluniverse/openalex_tool.py +228 -0
- tooluniverse/openfda_adv_tool.py +283 -0
- tooluniverse/openfda_tool.py +393 -160
- tooluniverse/output_hook.py +1122 -0
- tooluniverse/package_tool.py +195 -0
- tooluniverse/pubchem_tool.py +158 -0
- tooluniverse/pubtator_tool.py +168 -0
- tooluniverse/rcsb_pdb_tool.py +38 -0
- tooluniverse/reactome_tool.py +108 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
- tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
- tooluniverse/remote/expert_feedback/simple_test.py +23 -0
- tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
- tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
- tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
- tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
- tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
- tooluniverse/remote/immune_compass/compass_tool.py +327 -0
- tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
- tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
- tooluniverse/remote_tool.py +99 -0
- tooluniverse/restful_tool.py +53 -30
- tooluniverse/scripts/generate_tool_graph.py +408 -0
- tooluniverse/scripts/visualize_tool_graph.py +829 -0
- tooluniverse/semantic_scholar_tool.py +62 -0
- tooluniverse/smcp.py +2452 -0
- tooluniverse/smcp_server.py +975 -0
- tooluniverse/test/mcp_server_test.py +0 -0
- tooluniverse/test/test_admetai_tool.py +370 -0
- tooluniverse/test/test_agentic_tool.py +129 -0
- tooluniverse/test/test_alphafold_tool.py +71 -0
- tooluniverse/test/test_chem_tool.py +37 -0
- tooluniverse/test/test_compose_lieraturereview.py +63 -0
- tooluniverse/test/test_compose_tool.py +448 -0
- tooluniverse/test/test_dailymed.py +69 -0
- tooluniverse/test/test_dataset_tool.py +200 -0
- tooluniverse/test/test_disease_target_score.py +56 -0
- tooluniverse/test/test_drugbank_filter_examples.py +179 -0
- tooluniverse/test/test_efo.py +31 -0
- tooluniverse/test/test_enrichr_tool.py +21 -0
- tooluniverse/test/test_europe_pmc_tool.py +20 -0
- tooluniverse/test/test_fda_adv.py +95 -0
- tooluniverse/test/test_fda_drug_labeling.py +91 -0
- tooluniverse/test/test_gene_ontology_tools.py +66 -0
- tooluniverse/test/test_gwas_tool.py +139 -0
- tooluniverse/test/test_hpa.py +625 -0
- tooluniverse/test/test_humanbase_tool.py +20 -0
- tooluniverse/test/test_idmap_tools.py +61 -0
- tooluniverse/test/test_mcp_server.py +211 -0
- tooluniverse/test/test_mcp_tool.py +247 -0
- tooluniverse/test/test_medlineplus.py +220 -0
- tooluniverse/test/test_openalex_tool.py +32 -0
- tooluniverse/test/test_opentargets.py +28 -0
- tooluniverse/test/test_pubchem_tool.py +116 -0
- tooluniverse/test/test_pubtator_tool.py +37 -0
- tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
- tooluniverse/test/test_reactome.py +54 -0
- tooluniverse/test/test_semantic_scholar_tool.py +24 -0
- tooluniverse/test/test_software_tools.py +147 -0
- tooluniverse/test/test_tool_description_optimizer.py +49 -0
- tooluniverse/test/test_tool_finder.py +26 -0
- tooluniverse/test/test_tool_finder_llm.py +252 -0
- tooluniverse/test/test_tools_find.py +195 -0
- tooluniverse/test/test_uniprot_tools.py +74 -0
- tooluniverse/test/test_uspto_tool.py +72 -0
- tooluniverse/test/test_xml_tool.py +113 -0
- tooluniverse/tool_finder_embedding.py +267 -0
- tooluniverse/tool_finder_keyword.py +693 -0
- tooluniverse/tool_finder_llm.py +699 -0
- tooluniverse/tool_graph_web_ui.py +955 -0
- tooluniverse/tool_registry.py +416 -0
- tooluniverse/uniprot_tool.py +155 -0
- tooluniverse/url_tool.py +253 -0
- tooluniverse/uspto_tool.py +240 -0
- tooluniverse/utils.py +369 -41
- tooluniverse/xml_tool.py +369 -0
- tooluniverse-1.0.0.dist-info/METADATA +377 -0
- tooluniverse-1.0.0.dist-info/RECORD +186 -0
- tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
- tooluniverse/generate_mcp_tools.py +0 -113
- tooluniverse/mcp_server.py +0 -3340
- tooluniverse-0.2.0.dist-info/METADATA +0 -139
- tooluniverse-0.2.0.dist-info/RECORD +0 -21
- tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
tooluniverse/ctg_tool.py
ADDED
|
@@ -0,0 +1,1002 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
from urllib.parse import urljoin
|
|
3
|
+
from .restful_tool import RESTfulTool, execute_RESTful_query
|
|
4
|
+
from .tool_registry import register_tool
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@register_tool("ClinicalTrialsTool")
|
|
8
|
+
class ClinicalTrialsTool(RESTfulTool):
|
|
9
|
+
def __init__(self, tool_config):
|
|
10
|
+
base_url = "https://clinicaltrials.gov/api/v2" # Base URL for CTG API v2
|
|
11
|
+
full_url = urljoin(base_url + "/", tool_config["tool_url"].lstrip("/"))
|
|
12
|
+
super().__init__(tool_config, full_url)
|
|
13
|
+
|
|
14
|
+
self.list_params_to_join = [
|
|
15
|
+
"filter.ids",
|
|
16
|
+
"filter.overallStatus",
|
|
17
|
+
"fields",
|
|
18
|
+
"sort",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
self.param_name_mapper = {
|
|
22
|
+
"condition": "query.cond",
|
|
23
|
+
"title": "query.titles",
|
|
24
|
+
"intervention": "query.intr",
|
|
25
|
+
"outcome": "query.outc",
|
|
26
|
+
"overall_status": "filter.overallStatus",
|
|
27
|
+
"query_term": "query.term",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def _map_param_names(self, arguments):
|
|
31
|
+
"""
|
|
32
|
+
Maps the parameter names in the arguments dictionary to the expected parameter names defined in the tool's JSON configuration.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
arguments (dict): Runtime arguments provided to the tool's run method.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
dict: A new dictionary with mapped parameter names.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
mapped_arguments = {}
|
|
42
|
+
for key, value in arguments.items():
|
|
43
|
+
if key in self.param_name_mapper:
|
|
44
|
+
mapped_key = self.param_name_mapper[key]
|
|
45
|
+
mapped_arguments[mapped_key] = value
|
|
46
|
+
else:
|
|
47
|
+
mapped_arguments[key] = value
|
|
48
|
+
return mapped_arguments
|
|
49
|
+
|
|
50
|
+
def _prepare_api_params(self, arguments):
|
|
51
|
+
"""
|
|
52
|
+
Prepares the dictionary of parameters for the API query string based on tool config and runtime arguments.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
arguments (dict): Runtime arguments provided to the tool's run method.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
dict: A dictionary of parameters ready for the API requests.
|
|
59
|
+
"""
|
|
60
|
+
api_params = {}
|
|
61
|
+
|
|
62
|
+
for param_name, value in arguments.items():
|
|
63
|
+
if value is not None:
|
|
64
|
+
# Handle parameters defined as lists that need joining
|
|
65
|
+
if param_name in self.list_params_to_join and isinstance(value, list):
|
|
66
|
+
# Join list items into a comma-separated string
|
|
67
|
+
api_params[param_name] = ",".join(map(str, value))
|
|
68
|
+
else:
|
|
69
|
+
api_params[param_name] = value
|
|
70
|
+
|
|
71
|
+
return api_params
|
|
72
|
+
|
|
73
|
+
def _format_endpoint_url(self, arguments):
|
|
74
|
+
"""
|
|
75
|
+
Formats the endpoint URL by substituting path parameters (like {nctId}) with values from the arguments dictionary.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
arguments (dict): Runtime arguments provided to the tool's run method.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
str: The formatted endpoint URL.
|
|
82
|
+
"""
|
|
83
|
+
url_to_format = self.endpoint_url
|
|
84
|
+
try:
|
|
85
|
+
# Find keys in arguments that match placeholders in the URL template
|
|
86
|
+
# e.g., if url_to_format is ".../studies/{nctId}", find 'nctId' in arguments
|
|
87
|
+
path_params = {
|
|
88
|
+
k: v for k, v in arguments.items() if f"{{{k}}}" in url_to_format
|
|
89
|
+
}
|
|
90
|
+
# Perform the substitution
|
|
91
|
+
return url_to_format.format(**path_params)
|
|
92
|
+
except KeyError as e:
|
|
93
|
+
# This might happen if a placeholder exists but the corresponding key is missing in arguments
|
|
94
|
+
print(
|
|
95
|
+
f"Warning: Missing key {e} in arguments for URL formatting: {url_to_format}"
|
|
96
|
+
)
|
|
97
|
+
# Return the original URL; the API call will likely fail, but avoids crashing here
|
|
98
|
+
return url_to_format
|
|
99
|
+
|
|
100
|
+
def run(self, arguments):
|
|
101
|
+
raise NotImplementedError("The run method should be implemented in subclasses.")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@register_tool("ClinicalTrialsSearchTool")
|
|
105
|
+
# Searching studies (/studies)
|
|
106
|
+
class ClinicalTrialsSearchTool(ClinicalTrialsTool):
|
|
107
|
+
def __init__(self, tool_config):
|
|
108
|
+
super().__init__(tool_config)
|
|
109
|
+
self.default_params_not_shown = {
|
|
110
|
+
"format": "json", # Default format for the response
|
|
111
|
+
"sort": "@relevance", # Default sort order
|
|
112
|
+
"fields": [
|
|
113
|
+
"NCTId",
|
|
114
|
+
"BriefTitle",
|
|
115
|
+
# "OfficialTitle",
|
|
116
|
+
"OverallStatus",
|
|
117
|
+
# "StartDate",
|
|
118
|
+
# "PrimaryCompletionDate",
|
|
119
|
+
# "PrimaryOutcomeMeasure",
|
|
120
|
+
# "DescriptionModule",
|
|
121
|
+
"BriefSummary",
|
|
122
|
+
"Condition",
|
|
123
|
+
"Phase",
|
|
124
|
+
# "Intervention",
|
|
125
|
+
# "InterventionName",
|
|
126
|
+
# "InterventionArmGroupLabel",
|
|
127
|
+
# "InterventionOtherName",
|
|
128
|
+
# "WhyStopped",
|
|
129
|
+
# "HasResults",
|
|
130
|
+
], # NOTE: Can change this one
|
|
131
|
+
"countTotal": True, # NOTE: Can change this one
|
|
132
|
+
"filter.advanced": "AREA[HasResults]true AND (AREA[Phase]PHASE2 OR AREA[Phase]PHASE3 OR AREA[Phase]PHASE4)",
|
|
133
|
+
# TODO: Consider adding a YEAR filter for the query to remove trials that are too early? E.g., "AREA[LastUpdatePostDate]RANGE[2000-01-01,MAX]"
|
|
134
|
+
}
|
|
135
|
+
# "title": {
|
|
136
|
+
# "type": "string",
|
|
137
|
+
# "description": "Query for study titles using Essie expression syntax (e.g., 'lung cancer').",
|
|
138
|
+
# "required": false
|
|
139
|
+
# },
|
|
140
|
+
# "outcome": {
|
|
141
|
+
# "type": "string",
|
|
142
|
+
# "description": "Query for outcome measures using Essie expression syntax (e.g., 'overall survival', 'adverse events', 'progress-free survival').",
|
|
143
|
+
# "required": false
|
|
144
|
+
# },
|
|
145
|
+
# "query.locn": {
|
|
146
|
+
# "type": "string",
|
|
147
|
+
# "description": "Query for location terms using Essie expression syntax (e.g., 'California')."
|
|
148
|
+
# },
|
|
149
|
+
# "overall_status": {
|
|
150
|
+
# "type": "array",
|
|
151
|
+
# "description": "Filter by a list of overall study statuses (e.g., ['RECRUITING', 'COMPLETED']). ",
|
|
152
|
+
# "items": {
|
|
153
|
+
# "type": "string",
|
|
154
|
+
# "enum": ["ACTIVE_NOT_RECRUITING", "COMPLETED", "ENROLLING_BY_INVITATION", "NOT_YET_RECRUITING", "RECRUITING", "SUSPENDED", "TERMINATED", "WITHDRAWN", "AVAILABLE", "NO_LONGER_AVAILABLE", "TEMPORARILY_NOT_AVAILABLE", "APPROVED_FOR_MARKETING", "WITHHELD", "UNKNOWN"]
|
|
155
|
+
# },
|
|
156
|
+
# "required": false
|
|
157
|
+
# },
|
|
158
|
+
# "filter.ids": {
|
|
159
|
+
# "type": "array",
|
|
160
|
+
# "description": "Filter by a list of NCT IDs (e.g., ['NCT04852770', 'NCT01728545']).",
|
|
161
|
+
# "items": {
|
|
162
|
+
# "type": "string"
|
|
163
|
+
# }
|
|
164
|
+
# },
|
|
165
|
+
# "sort": {
|
|
166
|
+
# "type": "array",
|
|
167
|
+
# "description": "Comma- or pipe-separated list of fields to sort by for the studies, with optional direction. The returning studies are not sorted by default. Every list item contains a field/piece name and an optional sort direction (asc for ascending or desc for descending) after colon character (e.g., ['LastUpdatePostDate:desc', 'EnrollmentCount'], [@relevance]). Default sort order varies by field type. Special value '@relevance' sorts by query relevance.",
|
|
168
|
+
# "items": {
|
|
169
|
+
# "type": "string"
|
|
170
|
+
# }
|
|
171
|
+
# },
|
|
172
|
+
# "fields": {
|
|
173
|
+
# "type": "array",
|
|
174
|
+
# "description": "List of fields to return (e.g., ['NCTId', 'BriefTitle', 'OverallStatus', 'Phase', 'PrimaryCompletionDate', 'PrimaryOutcomeMeasure']). By default, we look at the following fields: ['NCTId', 'BriefTitle', 'OfficialTitle', 'OverallStatus', 'StartDate', 'PrimaryCompletionDate', 'PrimaryOutcomeMeasure', 'DescriptionModule', 'Condition', 'Phase', 'WhyStopped', 'HasResults'].",
|
|
175
|
+
# "items": {
|
|
176
|
+
# "type": "string"
|
|
177
|
+
# },
|
|
178
|
+
# "required": false
|
|
179
|
+
# },
|
|
180
|
+
|
|
181
|
+
def run(self, arguments):
|
|
182
|
+
"""
|
|
183
|
+
Executes the search query for clinical trials.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
arguments (dict): A dictionary containing parameters provided by the user/LLM
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
dict or str: The JSON response from the API as a dictionary,
|
|
190
|
+
or raw text for non-JSON responses, or an error dictionary.
|
|
191
|
+
"""
|
|
192
|
+
arguments = self._map_param_names(arguments)
|
|
193
|
+
query_params = deepcopy(self.query_schema)
|
|
194
|
+
expected_param_names = self._map_param_names(
|
|
195
|
+
self.parameters
|
|
196
|
+
).keys() # NOTE: Workaround for not having an aligned schema in the JSON config
|
|
197
|
+
|
|
198
|
+
# Prepare API parameters from arguments
|
|
199
|
+
for k in expected_param_names:
|
|
200
|
+
if k in arguments and arguments[k] is not None:
|
|
201
|
+
query_params[k] = arguments[k]
|
|
202
|
+
|
|
203
|
+
# Add default parameters that are not shown in the schema
|
|
204
|
+
for k, v in self.default_params_not_shown.items():
|
|
205
|
+
if k not in query_params:
|
|
206
|
+
query_params[k] = v
|
|
207
|
+
|
|
208
|
+
# Process list parameters that need to be joined
|
|
209
|
+
api_params = self._prepare_api_params(query_params)
|
|
210
|
+
|
|
211
|
+
# Fix a bug where 'countTotal' is a boolean but should be a string as input to API
|
|
212
|
+
if "countTotal" in api_params and isinstance(api_params["countTotal"], bool):
|
|
213
|
+
api_params["countTotal"] = str(api_params["countTotal"]).lower()
|
|
214
|
+
|
|
215
|
+
formatted_endpoint_url = self.endpoint_url
|
|
216
|
+
|
|
217
|
+
response = execute_RESTful_query(
|
|
218
|
+
endpoint_url=formatted_endpoint_url, variables=api_params
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Simplify the output if the response is valid
|
|
222
|
+
if (
|
|
223
|
+
response is not None
|
|
224
|
+
and response
|
|
225
|
+
and "studies" in response.keys()
|
|
226
|
+
and len(response["studies"]) > 0
|
|
227
|
+
):
|
|
228
|
+
response = self._simplify_output(response)
|
|
229
|
+
else:
|
|
230
|
+
return "No studies found for the given query parameters. Please examine your input and try different parameters."
|
|
231
|
+
|
|
232
|
+
return response
|
|
233
|
+
|
|
234
|
+
def _simplify_output(self, response):
|
|
235
|
+
new_response = []
|
|
236
|
+
|
|
237
|
+
for study in response["studies"]:
|
|
238
|
+
new_study = {
|
|
239
|
+
"NCT ID": study["protocolSection"]["identificationModule"].get("nctId"),
|
|
240
|
+
}
|
|
241
|
+
if "identificationModule" in study["protocolSection"]:
|
|
242
|
+
new_study["brief_title"] = study["protocolSection"][
|
|
243
|
+
"identificationModule"
|
|
244
|
+
].get("briefTitle")
|
|
245
|
+
if "descriptionModule" in study["protocolSection"]:
|
|
246
|
+
new_study["brief_summary"] = study["protocolSection"][
|
|
247
|
+
"descriptionModule"
|
|
248
|
+
].get("briefSummary")
|
|
249
|
+
if "statusModule" in study["protocolSection"]:
|
|
250
|
+
new_study["overall_status"] = study["protocolSection"][
|
|
251
|
+
"statusModule"
|
|
252
|
+
].get("overallStatus")
|
|
253
|
+
if "conditionsModule" in study["protocolSection"]:
|
|
254
|
+
new_study["condition"] = study["protocolSection"][
|
|
255
|
+
"conditionsModule"
|
|
256
|
+
].get("conditions")
|
|
257
|
+
if "designModule" in study["protocolSection"]:
|
|
258
|
+
new_study["phase"] = study["protocolSection"]["designModule"].get(
|
|
259
|
+
"phases"
|
|
260
|
+
)
|
|
261
|
+
new_study = {
|
|
262
|
+
k: v for k, v in new_study.items() if v is not None
|
|
263
|
+
} # Remove None values
|
|
264
|
+
new_response.append(new_study)
|
|
265
|
+
|
|
266
|
+
# def remove_empty_values(obj):
|
|
267
|
+
# if isinstance(obj, dict):
|
|
268
|
+
# return {k: remove_empty_values(v) for k, v in obj.items()
|
|
269
|
+
# if v not in [0, [], None]}
|
|
270
|
+
# elif isinstance(obj, list):
|
|
271
|
+
# return [remove_empty_values(v) for v in obj if v not in [0, [], None]]
|
|
272
|
+
# else:
|
|
273
|
+
# return obj
|
|
274
|
+
# new_response = remove_empty_values(new_response)
|
|
275
|
+
|
|
276
|
+
new_response = {"studies": new_response}
|
|
277
|
+
if "nextPageToken" in response:
|
|
278
|
+
new_response["nextPageToken"] = response["nextPageToken"]
|
|
279
|
+
if "totalCount" in response:
|
|
280
|
+
new_response["total_count"] = response["totalCount"]
|
|
281
|
+
|
|
282
|
+
return new_response
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
@register_tool("ClinicalTrialsDetailsTool")
|
|
286
|
+
class ClinicalTrialsDetailsTool(ClinicalTrialsTool):
|
|
287
|
+
def __init__(self, tool_config):
|
|
288
|
+
super().__init__(tool_config)
|
|
289
|
+
self.default_params_not_shown = {
|
|
290
|
+
"format": "json",
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
def run(self, arguments):
|
|
294
|
+
arguments = self._map_param_names(arguments)
|
|
295
|
+
expected_param_names = self._map_param_names(self.parameters).keys()
|
|
296
|
+
query_params = deepcopy(self.query_schema)
|
|
297
|
+
|
|
298
|
+
nct_ids_list = arguments.get("nct_ids")
|
|
299
|
+
if (
|
|
300
|
+
not nct_ids_list
|
|
301
|
+
or not isinstance(nct_ids_list, list)
|
|
302
|
+
or len(nct_ids_list) == 0
|
|
303
|
+
):
|
|
304
|
+
return {
|
|
305
|
+
"error": "Missing or invalid required parameter: nct_ids (must be a non-empty list)"
|
|
306
|
+
}
|
|
307
|
+
del arguments[
|
|
308
|
+
"nct_ids"
|
|
309
|
+
] # Remove 'nct_ids' from query_params as it is not a valid API parameter
|
|
310
|
+
|
|
311
|
+
# Prepare API parameters from arguments
|
|
312
|
+
for k in expected_param_names:
|
|
313
|
+
if k in arguments and arguments[k] is not None:
|
|
314
|
+
query_params[k] = arguments[k]
|
|
315
|
+
|
|
316
|
+
# Add default parameters that are not shown in the schema
|
|
317
|
+
for k, v in self.default_params_not_shown.items():
|
|
318
|
+
if k not in query_params:
|
|
319
|
+
query_params[k] = v
|
|
320
|
+
|
|
321
|
+
if "description_type" in expected_param_names:
|
|
322
|
+
query_type = "description"
|
|
323
|
+
if query_params["description_type"].lower() == "full":
|
|
324
|
+
query_params["fields"] = [
|
|
325
|
+
"NCTId",
|
|
326
|
+
"BriefTitle",
|
|
327
|
+
"OfficialTitle",
|
|
328
|
+
"BriefSummary",
|
|
329
|
+
"DetailedDescription",
|
|
330
|
+
"Phase",
|
|
331
|
+
]
|
|
332
|
+
else:
|
|
333
|
+
query_params["fields"] = [
|
|
334
|
+
"NCTId",
|
|
335
|
+
"BriefTitle",
|
|
336
|
+
"BriefSummary",
|
|
337
|
+
"Phase",
|
|
338
|
+
]
|
|
339
|
+
del query_params["description_type"]
|
|
340
|
+
elif "status_and_date" in expected_param_names:
|
|
341
|
+
query_type = "status_and_date"
|
|
342
|
+
if "status_and_date" in query_params:
|
|
343
|
+
del query_params["status_and_date"]
|
|
344
|
+
query_params["fields"] = [
|
|
345
|
+
"NCTId",
|
|
346
|
+
"OverallStatus",
|
|
347
|
+
"LastKnownStatus",
|
|
348
|
+
"WhyStopped",
|
|
349
|
+
"StartDate",
|
|
350
|
+
"PrimaryCompletionDate",
|
|
351
|
+
"CompletionDate",
|
|
352
|
+
]
|
|
353
|
+
elif "condition_and_intervention" in expected_param_names:
|
|
354
|
+
query_type = "condition_and_intervention"
|
|
355
|
+
if "condition_and_intervention" in query_params:
|
|
356
|
+
del query_params["condition_and_intervention"]
|
|
357
|
+
query_params["fields"] = [
|
|
358
|
+
"NCTId",
|
|
359
|
+
"Condition",
|
|
360
|
+
"ArmGroupLabel",
|
|
361
|
+
"ArmGroupType",
|
|
362
|
+
"ArmGroupDescription",
|
|
363
|
+
"ArmGroupInterventionName",
|
|
364
|
+
"InterventionType",
|
|
365
|
+
"InterventionName",
|
|
366
|
+
"InterventionOtherName",
|
|
367
|
+
"InterventionDescription",
|
|
368
|
+
# "InterventionArmGroupLabel",
|
|
369
|
+
]
|
|
370
|
+
elif "eligibility_criteria" in expected_param_names:
|
|
371
|
+
query_type = "eligibility_criteria"
|
|
372
|
+
if "eligibility_criteria" in query_params:
|
|
373
|
+
del query_params["eligibility_criteria"]
|
|
374
|
+
query_params["fields"] = [
|
|
375
|
+
"NCTId",
|
|
376
|
+
"HealthyVolunteers",
|
|
377
|
+
"Sex",
|
|
378
|
+
"GenderBased",
|
|
379
|
+
"GenderDescription",
|
|
380
|
+
"MinimumAge",
|
|
381
|
+
"MaximumAge",
|
|
382
|
+
"StudyPopulation",
|
|
383
|
+
"EligibilityCriteria",
|
|
384
|
+
# "SamplingMethod",
|
|
385
|
+
]
|
|
386
|
+
elif "location" in expected_param_names:
|
|
387
|
+
query_type = "location"
|
|
388
|
+
if "location" in query_params:
|
|
389
|
+
del query_params["location"]
|
|
390
|
+
query_params["fields"] = [
|
|
391
|
+
"NCTId",
|
|
392
|
+
"LocationFacility",
|
|
393
|
+
"LocationStatus",
|
|
394
|
+
"LocationCity",
|
|
395
|
+
"LocationState",
|
|
396
|
+
"LocationCountry",
|
|
397
|
+
]
|
|
398
|
+
elif "outcome_measures" in expected_param_names:
|
|
399
|
+
query_type = "outcome_measures"
|
|
400
|
+
if query_params["outcome_measures"].lower() == "primary":
|
|
401
|
+
query_params["fields"] = [
|
|
402
|
+
"NCTId",
|
|
403
|
+
"PrimaryOutcome",
|
|
404
|
+
]
|
|
405
|
+
elif query_params["outcome_measures"].lower() == "secondary":
|
|
406
|
+
query_params["fields"] = [
|
|
407
|
+
"NCTId",
|
|
408
|
+
"SecondaryOutcome",
|
|
409
|
+
]
|
|
410
|
+
else:
|
|
411
|
+
query_params["fields"] = [
|
|
412
|
+
"NCTId",
|
|
413
|
+
"PrimaryOutcome",
|
|
414
|
+
"SecondaryOutcome",
|
|
415
|
+
# "OtherOutcome",
|
|
416
|
+
]
|
|
417
|
+
del query_params["outcome_measures"]
|
|
418
|
+
elif "references" in expected_param_names:
|
|
419
|
+
query_type = "references"
|
|
420
|
+
if "references" in query_params:
|
|
421
|
+
del query_params["references"]
|
|
422
|
+
query_params["fields"] = [
|
|
423
|
+
"NCTId",
|
|
424
|
+
"Reference",
|
|
425
|
+
"SeeAlsoLink",
|
|
426
|
+
]
|
|
427
|
+
|
|
428
|
+
# more difficult extractions here
|
|
429
|
+
elif "baseline_characteristics" in expected_param_names:
|
|
430
|
+
query_type = "baseline_characteristics"
|
|
431
|
+
del query_params["baseline_characteristics"]
|
|
432
|
+
query_params["fields"] = [
|
|
433
|
+
"NCTId",
|
|
434
|
+
"BaselineCharacteristicsModule",
|
|
435
|
+
]
|
|
436
|
+
# TODO: Add this to the schema
|
|
437
|
+
|
|
438
|
+
elif "outcome_measure" in expected_param_names:
|
|
439
|
+
query_type = "outcome"
|
|
440
|
+
outcome_measure = query_params["outcome_measure"]
|
|
441
|
+
del query_params["outcome_measure"]
|
|
442
|
+
query_params["fields"] = [
|
|
443
|
+
"NCTId",
|
|
444
|
+
"OutcomeMeasure",
|
|
445
|
+
]
|
|
446
|
+
|
|
447
|
+
elif "adverse_event_type" in expected_param_names:
|
|
448
|
+
query_type = "safety"
|
|
449
|
+
organs = query_params.get("organ_systems", [])
|
|
450
|
+
adverse_event_type = query_params.get("adverse_event_type", "serious")
|
|
451
|
+
if "organ_systems" in query_params:
|
|
452
|
+
del query_params["organ_systems"]
|
|
453
|
+
del query_params["adverse_event_type"]
|
|
454
|
+
query_params["fields"] = [
|
|
455
|
+
"NCTId",
|
|
456
|
+
"AdverseEventsModule",
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
api_params = self._prepare_api_params(query_params)
|
|
460
|
+
formatted_endpoint_url = self.endpoint_url
|
|
461
|
+
|
|
462
|
+
responses = []
|
|
463
|
+
for nct_id in nct_ids_list:
|
|
464
|
+
formatted_endpoint_url = self._format_endpoint_url({"nctId": nct_id})
|
|
465
|
+
response = execute_RESTful_query(
|
|
466
|
+
endpoint_url=formatted_endpoint_url, variables=api_params
|
|
467
|
+
)
|
|
468
|
+
if response:
|
|
469
|
+
responses.append(response)
|
|
470
|
+
|
|
471
|
+
if query_type not in {"outcome", "safety"}:
|
|
472
|
+
responses = [
|
|
473
|
+
self._simplify_output(response, query_type) for response in responses
|
|
474
|
+
]
|
|
475
|
+
elif query_type == "outcome":
|
|
476
|
+
responses = [
|
|
477
|
+
self._extract_outcomes_from_output(response, outcome_measure)
|
|
478
|
+
for response in responses
|
|
479
|
+
]
|
|
480
|
+
elif query_type == "safety":
|
|
481
|
+
responses = [
|
|
482
|
+
self._extract_safety_from_output(response, organs, adverse_event_type)
|
|
483
|
+
for response in responses
|
|
484
|
+
]
|
|
485
|
+
|
|
486
|
+
if sum([len(response) > 1 for response in responses]) == 0:
|
|
487
|
+
return "No relevant information found for the given NCT IDs."
|
|
488
|
+
|
|
489
|
+
return responses
|
|
490
|
+
|
|
491
|
+
def _simplify_output(self, study, query_type):
|
|
492
|
+
"""Manually extract generally most useful information"""
|
|
493
|
+
new_study = {
|
|
494
|
+
"NCT ID": study["protocolSection"]["identificationModule"].get("nctId"),
|
|
495
|
+
}
|
|
496
|
+
if "identificationModule" in study["protocolSection"]:
|
|
497
|
+
if "briefTitle" in study["protocolSection"]["identificationModule"]:
|
|
498
|
+
new_study["brief_title"] = study["protocolSection"][
|
|
499
|
+
"identificationModule"
|
|
500
|
+
].get("briefTitle")
|
|
501
|
+
if "officialTitle" in study["protocolSection"]["identificationModule"]:
|
|
502
|
+
new_study["official_title"] = study["protocolSection"][
|
|
503
|
+
"identificationModule"
|
|
504
|
+
].get("officialTitle")
|
|
505
|
+
if "statusModule" in study["protocolSection"]:
|
|
506
|
+
if "overallStatus" in study["protocolSection"]["statusModule"]:
|
|
507
|
+
new_study["overall_status"] = study["protocolSection"][
|
|
508
|
+
"statusModule"
|
|
509
|
+
].get("overallStatus")
|
|
510
|
+
if "lastKnownStatus" in study["protocolSection"]["statusModule"]:
|
|
511
|
+
new_study["last_known_status"] = study["protocolSection"][
|
|
512
|
+
"statusModule"
|
|
513
|
+
].get("lastKnownStatus")
|
|
514
|
+
if "whyStopped" in study["protocolSection"]["statusModule"]:
|
|
515
|
+
new_study["why_stopped"] = study["protocolSection"]["statusModule"].get(
|
|
516
|
+
"whyStopped"
|
|
517
|
+
)
|
|
518
|
+
if "startDateStruct" in study["protocolSection"]["statusModule"]:
|
|
519
|
+
new_study["start_date"] = study["protocolSection"]["statusModule"][
|
|
520
|
+
"startDateStruct"
|
|
521
|
+
].get("date")
|
|
522
|
+
if (
|
|
523
|
+
"primaryCompletionDateStruct"
|
|
524
|
+
in study["protocolSection"]["statusModule"]
|
|
525
|
+
):
|
|
526
|
+
new_study["primary_completion_date"] = study["protocolSection"][
|
|
527
|
+
"statusModule"
|
|
528
|
+
]["primaryCompletionDateStruct"].get("date")
|
|
529
|
+
if "completionDateStruct" in study["protocolSection"]["statusModule"]:
|
|
530
|
+
new_study["completion_date"] = study["protocolSection"]["statusModule"][
|
|
531
|
+
"completionDateStruct"
|
|
532
|
+
].get("date")
|
|
533
|
+
if "descriptionModule" in study["protocolSection"]:
|
|
534
|
+
if "briefSummary" in study["protocolSection"]["descriptionModule"]:
|
|
535
|
+
new_study["brief_summary"] = study["protocolSection"][
|
|
536
|
+
"descriptionModule"
|
|
537
|
+
].get("briefSummary")
|
|
538
|
+
if "detailedDescription" in study["protocolSection"]["descriptionModule"]:
|
|
539
|
+
new_study["detailed_description"] = study["protocolSection"][
|
|
540
|
+
"descriptionModule"
|
|
541
|
+
].get("detailedDescription")
|
|
542
|
+
if "conditionsModule" in study["protocolSection"]:
|
|
543
|
+
if "conditions" in study["protocolSection"]["conditionsModule"]:
|
|
544
|
+
new_study["condition"] = study["protocolSection"][
|
|
545
|
+
"conditionsModule"
|
|
546
|
+
].get("conditions")
|
|
547
|
+
if "designModule" in study["protocolSection"]:
|
|
548
|
+
if "phases" in study["protocolSection"]["designModule"]:
|
|
549
|
+
new_study["phase"] = study["protocolSection"]["designModule"].get(
|
|
550
|
+
"phases"
|
|
551
|
+
)
|
|
552
|
+
if "patientRegistry" in study["protocolSection"]["designModule"]:
|
|
553
|
+
new_study["patient_registry"] = study["protocolSection"][
|
|
554
|
+
"designModule"
|
|
555
|
+
].get("patientRegistry")
|
|
556
|
+
if "enrollmentInfo" in study["protocolSection"]["designModule"]:
|
|
557
|
+
new_study["enrollment_info"] = study["protocolSection"][
|
|
558
|
+
"designModule"
|
|
559
|
+
].get("enrollmentInfo")
|
|
560
|
+
if "armsInterventionsModule" in study["protocolSection"]:
|
|
561
|
+
if "armGroups" in study["protocolSection"]["armsInterventionsModule"]:
|
|
562
|
+
new_study["arm_groups"] = study["protocolSection"][
|
|
563
|
+
"armsInterventionsModule"
|
|
564
|
+
].get("armGroups")
|
|
565
|
+
if "interventions" in study["protocolSection"]["armsInterventionsModule"]:
|
|
566
|
+
new_study["interventions"] = study["protocolSection"][
|
|
567
|
+
"armsInterventionsModule"
|
|
568
|
+
].get("interventions")
|
|
569
|
+
if "outcomesModule" in study["protocolSection"]:
|
|
570
|
+
if "primaryOutcomes" in study["protocolSection"]["outcomesModule"]:
|
|
571
|
+
new_study["primary_outcomes"] = study["protocolSection"][
|
|
572
|
+
"outcomesModule"
|
|
573
|
+
].get("primaryOutcomes")
|
|
574
|
+
if "secondaryOutcomes" in study["protocolSection"]["outcomesModule"]:
|
|
575
|
+
new_study["secondary_outcomes"] = study["protocolSection"][
|
|
576
|
+
"outcomesModule"
|
|
577
|
+
].get("secondaryOutcomes")
|
|
578
|
+
# if "otherOutcomes" in study["protocolSection"]["outcomesModule"]:
|
|
579
|
+
# new_study["other_outcomes"] = study["protocolSection"]["outcomesModule"].get("otherOutcomes")
|
|
580
|
+
if "eligibilityModule" in study["protocolSection"]:
|
|
581
|
+
if "eligibilityCriteria" in study["protocolSection"]["eligibilityModule"]:
|
|
582
|
+
new_study["eligibility_criteria"] = study["protocolSection"][
|
|
583
|
+
"eligibilityModule"
|
|
584
|
+
].get("eligibilityCriteria")
|
|
585
|
+
if "healthyVolunteers" in study["protocolSection"]["eligibilityModule"]:
|
|
586
|
+
new_study["healthy_volunteers"] = study["protocolSection"][
|
|
587
|
+
"eligibilityModule"
|
|
588
|
+
].get("healthyVolunteers")
|
|
589
|
+
if "sex" in study["protocolSection"]["eligibilityModule"]:
|
|
590
|
+
new_study["sex"] = study["protocolSection"]["eligibilityModule"].get(
|
|
591
|
+
"sex"
|
|
592
|
+
)
|
|
593
|
+
if "genderBased" in study["protocolSection"]["eligibilityModule"]:
|
|
594
|
+
new_study["gender_based"] = study["protocolSection"][
|
|
595
|
+
"eligibilityModule"
|
|
596
|
+
].get("genderBased")
|
|
597
|
+
if "genderDescription" in study["protocolSection"]["eligibilityModule"]:
|
|
598
|
+
new_study["gender_description"] = study["protocolSection"][
|
|
599
|
+
"eligibilityModule"
|
|
600
|
+
].get("genderDescription")
|
|
601
|
+
if "minimumAge" in study["protocolSection"]["eligibilityModule"]:
|
|
602
|
+
new_study["minimum_age"] = study["protocolSection"][
|
|
603
|
+
"eligibilityModule"
|
|
604
|
+
].get("minimumAge")
|
|
605
|
+
if "maximumAge" in study["protocolSection"]["eligibilityModule"]:
|
|
606
|
+
new_study["maximum_age"] = study["protocolSection"][
|
|
607
|
+
"eligibilityModule"
|
|
608
|
+
].get("maximumAge")
|
|
609
|
+
if "studyPopulation" in study["protocolSection"]["eligibilityModule"]:
|
|
610
|
+
new_study["study_population"] = study["protocolSection"][
|
|
611
|
+
"eligibilityModule"
|
|
612
|
+
].get("studyPopulation")
|
|
613
|
+
# if "samplingMethod" in study["protocolSection"]["eligibilityModule"]:
|
|
614
|
+
# new_study["sampling_method"] = study["protocolSection"]["eligibilityModule"].get("samplingMethod")
|
|
615
|
+
if "contactsLocationsModule" in study["protocolSection"]:
|
|
616
|
+
if "locations" in study["protocolSection"]["contactsLocationsModule"]:
|
|
617
|
+
new_study["locations"] = study["protocolSection"][
|
|
618
|
+
"contactsLocationsModule"
|
|
619
|
+
].get("locations")
|
|
620
|
+
if "referencesModule" in study["protocolSection"]:
|
|
621
|
+
if "references" in study["protocolSection"]["referencesModule"]:
|
|
622
|
+
new_study["references"] = study["protocolSection"][
|
|
623
|
+
"referencesModule"
|
|
624
|
+
].get("references")
|
|
625
|
+
if "seeAlsoLinks" in study["protocolSection"]["referencesModule"]:
|
|
626
|
+
new_study["see_also_links"] = study["protocolSection"][
|
|
627
|
+
"referencesModule"
|
|
628
|
+
].get("seeAlsoLinks")
|
|
629
|
+
|
|
630
|
+
new_study = self._remove_empty_values(new_study)
|
|
631
|
+
|
|
632
|
+
return new_study
|
|
633
|
+
|
|
634
|
+
def _extract_outcomes_from_output(self, study, outcome_measure):
|
|
635
|
+
new_study = {}
|
|
636
|
+
outcome_measure = outcome_measure.lower()
|
|
637
|
+
new_study["NCT ID"] = study["protocolSection"]["identificationModule"].get(
|
|
638
|
+
"nctId"
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
if (
|
|
642
|
+
"resultsSection" in study
|
|
643
|
+
and "outcomeMeasuresModule" in study["resultsSection"]
|
|
644
|
+
and "outcomeMeasures" in study["resultsSection"]["outcomeMeasuresModule"]
|
|
645
|
+
):
|
|
646
|
+
raw_outcomes = study["resultsSection"]["outcomeMeasuresModule"][
|
|
647
|
+
"outcomeMeasures"
|
|
648
|
+
]
|
|
649
|
+
outcomes = []
|
|
650
|
+
for outcome in raw_outcomes:
|
|
651
|
+
new_outcome = {}
|
|
652
|
+
|
|
653
|
+
if (outcome_measure == "primary") and outcome.get("type") != "PRIMARY":
|
|
654
|
+
continue
|
|
655
|
+
if (outcome_measure == "secondary") and outcome.get(
|
|
656
|
+
"type"
|
|
657
|
+
) != "SECONDARY":
|
|
658
|
+
continue
|
|
659
|
+
if (outcome_measure == "all") and outcome.get("type") not in [
|
|
660
|
+
"PRIMARY",
|
|
661
|
+
"SECONDARY",
|
|
662
|
+
]:
|
|
663
|
+
continue
|
|
664
|
+
if outcome_measure not in ["primary", "secondary", "all"]:
|
|
665
|
+
outcome_measure_variants = [outcome_measure]
|
|
666
|
+
# TODO: Add more rules here
|
|
667
|
+
outcome_measure_variants.append(outcome_measure.replace("-", " "))
|
|
668
|
+
outcome_measure_variants.append(outcome_measure.replace(" ", "-"))
|
|
669
|
+
outcome_measure_variants.append(
|
|
670
|
+
outcome_measure.replace("progression", "progress")
|
|
671
|
+
)
|
|
672
|
+
outcome_measure_variants.append(
|
|
673
|
+
outcome_measure.replace("progress ", "progression ")
|
|
674
|
+
)
|
|
675
|
+
outcome_measure_variants.append(
|
|
676
|
+
outcome_measure.replace("progress-", "progression-")
|
|
677
|
+
)
|
|
678
|
+
outcome_measure_variants.append(
|
|
679
|
+
outcome_measure.replace("patient", "participant")
|
|
680
|
+
)
|
|
681
|
+
outcome_measure_variants.append(
|
|
682
|
+
outcome_measure.replace("participant", "patient")
|
|
683
|
+
)
|
|
684
|
+
outcome_measure_variants.append(outcome_measure.replace("_", " "))
|
|
685
|
+
outcome_measure_variants.append(
|
|
686
|
+
outcome_measure.replace("percentage", "percent")
|
|
687
|
+
)
|
|
688
|
+
outcome_measure_variants.append(
|
|
689
|
+
outcome_measure.replace("percent ", "percentage ")
|
|
690
|
+
)
|
|
691
|
+
outcome_measure_variants.append(
|
|
692
|
+
outcome_measure.replace("percent-", "percentage-")
|
|
693
|
+
)
|
|
694
|
+
outcome_measure_variants.append(
|
|
695
|
+
outcome_measure.replace("proportion", "percentage")
|
|
696
|
+
)
|
|
697
|
+
outcome_measure_variants.append(
|
|
698
|
+
outcome_measure.replace("percentage", "proportion")
|
|
699
|
+
)
|
|
700
|
+
outcome_measure_variants.append(
|
|
701
|
+
outcome_measure.replace("proportion", "percent")
|
|
702
|
+
)
|
|
703
|
+
outcome_measure_variants.append(
|
|
704
|
+
outcome_measure.replace("percent", "proportion")
|
|
705
|
+
)
|
|
706
|
+
outcome_measure_variants.append(
|
|
707
|
+
outcome_measure.replace("time to event", "time-to-event")
|
|
708
|
+
)
|
|
709
|
+
outcome_measure_variants.append(
|
|
710
|
+
outcome_measure.replace("time-to-event", "time to event")
|
|
711
|
+
)
|
|
712
|
+
outcome_measure_variants = list(set(outcome_measure_variants))
|
|
713
|
+
found_match = False
|
|
714
|
+
for o in outcome_measure_variants:
|
|
715
|
+
if (
|
|
716
|
+
o in outcome.get("title", "").lower()
|
|
717
|
+
or o in outcome.get("description", "").lower()
|
|
718
|
+
):
|
|
719
|
+
found_match = True
|
|
720
|
+
break
|
|
721
|
+
if not found_match:
|
|
722
|
+
continue
|
|
723
|
+
|
|
724
|
+
new_outcome["title"] = outcome.get("title")
|
|
725
|
+
new_outcome["description"] = outcome.get("description")
|
|
726
|
+
new_outcome["population"] = outcome.get("populationDescription")
|
|
727
|
+
new_outcome["time_frame"] = outcome.get("timeFrame")
|
|
728
|
+
new_outcome["unit_analyzed"] = outcome.get("typeUnitsAnalyzed")
|
|
729
|
+
|
|
730
|
+
measurement_type = outcome.get("paramType")
|
|
731
|
+
if measurement_type:
|
|
732
|
+
measurement_type = measurement_type.lower()
|
|
733
|
+
# GEOMETRIC_MEAN - Geometric Mean
|
|
734
|
+
# GEOMETRIC_LEAST_SQUARES_MEAN - Geometric Least Squares Mean
|
|
735
|
+
# LEAST_SQUARES_MEAN - Least Squares Mean
|
|
736
|
+
# LOG_MEAN - Log Mean
|
|
737
|
+
# MEAN - Mean
|
|
738
|
+
# MEDIAN - Median
|
|
739
|
+
# NUMBER - Number
|
|
740
|
+
# COUNT_OF_PARTICIPANTS - Count of Participants
|
|
741
|
+
# COUNT_OF_UNITS - Count of Units
|
|
742
|
+
|
|
743
|
+
unit = outcome.get("unitOfMeasure")
|
|
744
|
+
|
|
745
|
+
new_outcome["groups"] = outcome.get("groups")
|
|
746
|
+
|
|
747
|
+
denoms = outcome.get("denoms")
|
|
748
|
+
if denoms is not None:
|
|
749
|
+
if len(denoms) > 1:
|
|
750
|
+
# TODO: Investigate such trials
|
|
751
|
+
return f"Warning: Multiple denoms found for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
752
|
+
denoms = denoms[0]["counts"]
|
|
753
|
+
new_outcome["denominators"] = denoms
|
|
754
|
+
|
|
755
|
+
classes = outcome.get("classes")
|
|
756
|
+
if classes is not None:
|
|
757
|
+
if len(classes) > 1:
|
|
758
|
+
# TODO: Investigate such trials
|
|
759
|
+
return f"Warning: Multiple classes found for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
760
|
+
if "title" in classes[0] or "denoms" in classes[0]:
|
|
761
|
+
# TODO: Investigate such trials
|
|
762
|
+
return f"Warning: Unexpected structure in classes for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
763
|
+
classes = classes[0]
|
|
764
|
+
elif "categories" in classes[0]:
|
|
765
|
+
classes = classes[0]["categories"]
|
|
766
|
+
if len(classes) > 1:
|
|
767
|
+
# TODO: Investigate such trials
|
|
768
|
+
return f"Warning: Multiple classes-categories found for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
769
|
+
if "title" in classes[0]:
|
|
770
|
+
# TODO: Investigate such trials
|
|
771
|
+
return f"Warning: Unexpected structure in classes-categories for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
772
|
+
classes = classes[0]
|
|
773
|
+
elif "measurements" in classes[0]:
|
|
774
|
+
classes = classes[0]["measurements"]
|
|
775
|
+
else:
|
|
776
|
+
# TODO: Investigate such trials
|
|
777
|
+
return f"Warning: Unexpected structure in classes-categories for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
778
|
+
else:
|
|
779
|
+
# TODO: Investigate such trials
|
|
780
|
+
return f"Warning: Unexpected structure in classes for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
781
|
+
|
|
782
|
+
if measurement_type and unit:
|
|
783
|
+
new_outcome[measurement_type + " (" + unit + ")"] = classes
|
|
784
|
+
else:
|
|
785
|
+
# TODO: Investigate such trials
|
|
786
|
+
return f"Warning: Missing paramType or unitOfMeasure for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
787
|
+
|
|
788
|
+
analyses = outcome.get("analyses")
|
|
789
|
+
if analyses is not None:
|
|
790
|
+
if len(analyses) > 1:
|
|
791
|
+
# TODO: Investigate such trials
|
|
792
|
+
return f"Warning: Multiple analyses found for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
793
|
+
analyses = analyses[0]
|
|
794
|
+
pvalue = analyses.get("pValue")
|
|
795
|
+
pvalue_comment = analyses.get("pValueComment")
|
|
796
|
+
statistic_test = analyses.get("statisticalMethod")
|
|
797
|
+
statistic_comment = analyses.get("statisticalComment")
|
|
798
|
+
|
|
799
|
+
statistic_name = analyses.get("paramType")
|
|
800
|
+
statistic = analyses.get("paramValue")
|
|
801
|
+
if statistic_name and statistic_test and statistic and pvalue:
|
|
802
|
+
new_outcome["p-value (" + statistic_test + ")"] = pvalue
|
|
803
|
+
new_outcome[statistic_name] = statistic
|
|
804
|
+
else:
|
|
805
|
+
# TODO: Investigate such trials
|
|
806
|
+
return f"Warning: Missing paramType, paramValue, statisticalMethod or pvalue for outcome {new_outcome['title']} in study {new_study['NCT ID']}."
|
|
807
|
+
if statistic_comment:
|
|
808
|
+
new_outcome["statistic_comment"] = statistic_comment
|
|
809
|
+
if pvalue_comment:
|
|
810
|
+
new_outcome["pvalue_comment"] = pvalue_comment
|
|
811
|
+
|
|
812
|
+
statistic_test_type = analyses.get("nonInferiorityType")
|
|
813
|
+
statistic_test_type_comment = analyses.get(
|
|
814
|
+
"nonInferiorityTypeComment"
|
|
815
|
+
)
|
|
816
|
+
if statistic_test_type and statistic_test_type_comment:
|
|
817
|
+
new_outcome["statistic_test_type"] = statistic_test_type
|
|
818
|
+
new_outcome["statistic_test_type_comment"] = (
|
|
819
|
+
statistic_test_type_comment
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
outcomes.append(new_outcome)
|
|
823
|
+
|
|
824
|
+
new_study["outcomes"] = outcomes
|
|
825
|
+
new_study = self._remove_empty_values(new_study)
|
|
826
|
+
|
|
827
|
+
return new_study
|
|
828
|
+
|
|
829
|
+
def _extract_safety_from_output(self, study, organs, adverse_event_type):
|
|
830
|
+
new_study = {}
|
|
831
|
+
adverse_event_type = adverse_event_type.lower()
|
|
832
|
+
organs = [org.lower() for org in organs]
|
|
833
|
+
new_study["NCT ID"] = study["protocolSection"]["identificationModule"].get(
|
|
834
|
+
"nctId"
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
if (
|
|
838
|
+
"resultsSection" in study
|
|
839
|
+
and "adverseEventsModule" in study["resultsSection"]
|
|
840
|
+
):
|
|
841
|
+
ae_data = study["resultsSection"]["adverseEventsModule"]
|
|
842
|
+
new_study["freq_threshold"] = (
|
|
843
|
+
ae_data["frequencyThreshold"] + "%"
|
|
844
|
+
if "frequencyThreshold" in ae_data
|
|
845
|
+
else None
|
|
846
|
+
)
|
|
847
|
+
groups = ae_data["eventGroups"]
|
|
848
|
+
for group in groups:
|
|
849
|
+
if "deathsNumAffected" in group:
|
|
850
|
+
del group["deathsNumAffected"]
|
|
851
|
+
if "deathsNumAtRisk" in group:
|
|
852
|
+
del group["deathsNumAtRisk"]
|
|
853
|
+
# if "seriousNumAffected" in group:
|
|
854
|
+
# del group["seriousNumAffected"]
|
|
855
|
+
# if "seriousNumAtRisk" in group:
|
|
856
|
+
# del group["seriousNumAtRisk"]
|
|
857
|
+
# if "otherNumAffected" in group:
|
|
858
|
+
# del group["otherNumAffected"]
|
|
859
|
+
# if "otherNumAtRisk" in group:
|
|
860
|
+
# del group["otherNumAtRisk"]
|
|
861
|
+
|
|
862
|
+
new_study["groups"] = groups
|
|
863
|
+
|
|
864
|
+
if "seriousEvents" in ae_data and adverse_event_type != "other":
|
|
865
|
+
raw_aes = ae_data["seriousEvents"]
|
|
866
|
+
serious_aes = []
|
|
867
|
+
for ae in raw_aes:
|
|
868
|
+
if adverse_event_type not in {"serious", "all"}:
|
|
869
|
+
ae_name = ae.get("term", "").lower()
|
|
870
|
+
if adverse_event_type not in ae_name:
|
|
871
|
+
continue
|
|
872
|
+
if len(organs) > 0:
|
|
873
|
+
organ_system = ae.get("organSystem", "").lower()
|
|
874
|
+
if organ_system not in organs:
|
|
875
|
+
continue
|
|
876
|
+
|
|
877
|
+
if "sourceVocabulary" in ae:
|
|
878
|
+
del ae["sourceVocabulary"]
|
|
879
|
+
if "assessmentType" in ae:
|
|
880
|
+
del ae["assessmentType"]
|
|
881
|
+
|
|
882
|
+
if "stats" in ae and len(ae["stats"]) > 0:
|
|
883
|
+
for group_stats in ae["stats"]:
|
|
884
|
+
if (
|
|
885
|
+
group_stats.get("numAffected") is not None
|
|
886
|
+
and group_stats.get("numAtRisk") is not None
|
|
887
|
+
and group_stats.get("numAtRisk", 0) > 0
|
|
888
|
+
):
|
|
889
|
+
group_stats["percentage"] = (
|
|
890
|
+
str(
|
|
891
|
+
round(
|
|
892
|
+
group_stats.get("numAffected", 0)
|
|
893
|
+
/ group_stats.get("numAtRisk", 1)
|
|
894
|
+
* 100,
|
|
895
|
+
2,
|
|
896
|
+
)
|
|
897
|
+
)
|
|
898
|
+
+ "%"
|
|
899
|
+
)
|
|
900
|
+
elif (
|
|
901
|
+
group_stats.get("numEvents") is not None
|
|
902
|
+
and group_stats.get("numAtRisk") is not None
|
|
903
|
+
and group_stats.get("numAtRisk", 0) > 0
|
|
904
|
+
):
|
|
905
|
+
group_stats["percentage"] = (
|
|
906
|
+
str(
|
|
907
|
+
round(
|
|
908
|
+
group_stats.get("numEvents", 0)
|
|
909
|
+
/ group_stats.get("numAtRisk", 1)
|
|
910
|
+
* 100,
|
|
911
|
+
2,
|
|
912
|
+
)
|
|
913
|
+
)
|
|
914
|
+
+ "%"
|
|
915
|
+
)
|
|
916
|
+
else:
|
|
917
|
+
group_stats["percentage"] = None
|
|
918
|
+
|
|
919
|
+
if "numEvents" in group_stats:
|
|
920
|
+
del group_stats["numEvents"]
|
|
921
|
+
|
|
922
|
+
serious_aes.append(ae)
|
|
923
|
+
|
|
924
|
+
new_study["serious_adverse_events"] = serious_aes
|
|
925
|
+
|
|
926
|
+
if "otherEvents" in ae_data and adverse_event_type != "serious":
|
|
927
|
+
raw_aes = ae_data["otherEvents"]
|
|
928
|
+
other_aes = []
|
|
929
|
+
for ae in raw_aes:
|
|
930
|
+
if adverse_event_type not in {"other", "all"}:
|
|
931
|
+
ae_name = ae.get("term", "").lower()
|
|
932
|
+
if adverse_event_type not in ae_name:
|
|
933
|
+
continue
|
|
934
|
+
if len(organs) > 0:
|
|
935
|
+
organ_system = ae.get("organSystem", "").lower()
|
|
936
|
+
if organ_system not in organs:
|
|
937
|
+
continue
|
|
938
|
+
|
|
939
|
+
if "sourceVocabulary" in ae:
|
|
940
|
+
del ae["sourceVocabulary"]
|
|
941
|
+
if "assessmentType" in ae:
|
|
942
|
+
del ae["assessmentType"]
|
|
943
|
+
|
|
944
|
+
if "stats" in ae and len(ae["stats"]) > 0:
|
|
945
|
+
for group_stats in ae["stats"]:
|
|
946
|
+
if (
|
|
947
|
+
group_stats.get("numAffected") is not None
|
|
948
|
+
and group_stats.get("numAtRisk") is not None
|
|
949
|
+
and group_stats.get("numAtRisk", 0) > 0
|
|
950
|
+
):
|
|
951
|
+
group_stats["percentage"] = (
|
|
952
|
+
str(
|
|
953
|
+
round(
|
|
954
|
+
group_stats.get("numAffected", 0)
|
|
955
|
+
/ group_stats.get("numAtRisk", 1)
|
|
956
|
+
* 100,
|
|
957
|
+
2,
|
|
958
|
+
)
|
|
959
|
+
)
|
|
960
|
+
+ "%"
|
|
961
|
+
)
|
|
962
|
+
elif (
|
|
963
|
+
group_stats.get("numEvents") is not None
|
|
964
|
+
and group_stats.get("numAtRisk") is not None
|
|
965
|
+
and group_stats.get("numAtRisk", 0) > 0
|
|
966
|
+
):
|
|
967
|
+
group_stats["percentage"] = (
|
|
968
|
+
str(
|
|
969
|
+
round(
|
|
970
|
+
group_stats.get("numeEvents", 0)
|
|
971
|
+
/ group_stats.get("numAtRisk", 1)
|
|
972
|
+
* 100,
|
|
973
|
+
2,
|
|
974
|
+
)
|
|
975
|
+
)
|
|
976
|
+
+ "%"
|
|
977
|
+
)
|
|
978
|
+
else:
|
|
979
|
+
group_stats["percentage"] = None
|
|
980
|
+
|
|
981
|
+
if "numEvents" in group_stats:
|
|
982
|
+
del group_stats["numEvents"]
|
|
983
|
+
|
|
984
|
+
other_aes.append(ae)
|
|
985
|
+
|
|
986
|
+
new_study["other_adverse_events"] = other_aes
|
|
987
|
+
|
|
988
|
+
new_study = self._remove_empty_values(new_study)
|
|
989
|
+
|
|
990
|
+
return new_study
|
|
991
|
+
|
|
992
|
+
def _remove_empty_values(self, obj):
|
|
993
|
+
if isinstance(obj, dict):
|
|
994
|
+
return {
|
|
995
|
+
k: self._remove_empty_values(v)
|
|
996
|
+
for k, v in obj.items()
|
|
997
|
+
if v not in [[], None]
|
|
998
|
+
}
|
|
999
|
+
elif isinstance(obj, list):
|
|
1000
|
+
return [self._remove_empty_values(v) for v in obj if v not in [[], None]]
|
|
1001
|
+
else:
|
|
1002
|
+
return obj
|