vpop-calibration 2.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vpop_calibration/__init__.py +22 -0
- vpop_calibration/data_generation.py +186 -0
- vpop_calibration/diagnostics.py +162 -0
- vpop_calibration/model/__init__.py +3 -0
- vpop_calibration/model/data.py +420 -0
- vpop_calibration/model/gp.py +517 -0
- vpop_calibration/model/plot.py +243 -0
- vpop_calibration/nlme.py +840 -0
- vpop_calibration/ode.py +203 -0
- vpop_calibration/saem.py +945 -0
- vpop_calibration/structural_model.py +200 -0
- vpop_calibration/test/__init__.py +11 -0
- vpop_calibration/test/test_data.py +21 -0
- vpop_calibration/test/test_gp_flavors.py +89 -0
- vpop_calibration/test/test_gp_saem.py +175 -0
- vpop_calibration/test/test_ode_saem.py +121 -0
- vpop_calibration/utils.py +9 -0
- vpop_calibration/vpop.py +50 -0
- vpop_calibration-2.2.8.dist-info/METADATA +78 -0
- vpop_calibration-2.2.8.dist-info/RECORD +22 -0
- vpop_calibration-2.2.8.dist-info/WHEEL +4 -0
- vpop_calibration-2.2.8.dist-info/licenses/LICENSE +21 -0
vpop_calibration/ode.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from scipy.integrate import solve_ivp
|
|
4
|
+
import multiprocessing as mp
|
|
5
|
+
from typing import Any, Callable, Optional
|
|
6
|
+
|
|
7
|
+
from .utils import smoke_test
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OdeModel:
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
equations: Callable,
|
|
14
|
+
variable_names: list[str],
|
|
15
|
+
param_names: list[str],
|
|
16
|
+
tol: Optional[float] = 1e-6,
|
|
17
|
+
multithreaded: Optional[bool] = True,
|
|
18
|
+
):
|
|
19
|
+
"""OdeModel
|
|
20
|
+
|
|
21
|
+
Create a computational model given a set of equations.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
equations (callable): A function describing the right hand side of the ODE system
|
|
25
|
+
variable_names (list[str]): The names of the outputs of the system
|
|
26
|
+
param_names (list[str]): The name of the parameters of the system
|
|
27
|
+
"""
|
|
28
|
+
self.equations = equations
|
|
29
|
+
self.variable_names = variable_names
|
|
30
|
+
self.nb_outputs = len(variable_names)
|
|
31
|
+
|
|
32
|
+
self.param_names = param_names
|
|
33
|
+
self.nb_parameters = len(param_names)
|
|
34
|
+
# Define the name of initial conditions as `<variable>_0`
|
|
35
|
+
self.initial_cond_names = [v + "_0" for v in self.variable_names]
|
|
36
|
+
|
|
37
|
+
self.tol = tol
|
|
38
|
+
if smoke_test:
|
|
39
|
+
self.use_multiprocessing = False
|
|
40
|
+
else:
|
|
41
|
+
self.use_multiprocessing = multithreaded
|
|
42
|
+
|
|
43
|
+
def simulate_model(
|
|
44
|
+
self,
|
|
45
|
+
input_data: pd.DataFrame,
|
|
46
|
+
) -> pd.DataFrame:
|
|
47
|
+
"""Solve the ODE model using formatted input data
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
input_data (pd.DataFrame): a DataFrame containing 'id', 'output_name', 'time', and columns for all individual parameters and initial conditions ('var_0', for var in variable_names) for each patient
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
pd.DataFrame: a DataFrame with the same inputs and a new 'predicted_value' column
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# group the data by individual to create tasks for each process
|
|
57
|
+
tasks: list[Any] = []
|
|
58
|
+
for _, ind_df in input_data.groupby("id"):
|
|
59
|
+
for _, filtered_df in ind_df.groupby("protocol_arm"):
|
|
60
|
+
params = filtered_df[self.param_names].iloc[0].values
|
|
61
|
+
initial_conditions = filtered_df[self.initial_cond_names].iloc[0].values
|
|
62
|
+
input_df = filtered_df[["id", "protocol_arm", "output_name", "time"]]
|
|
63
|
+
indiv_task = {
|
|
64
|
+
"patient_inputs": input_df,
|
|
65
|
+
"initial_conditions": initial_conditions,
|
|
66
|
+
"params": params,
|
|
67
|
+
"equations": self.equations,
|
|
68
|
+
"output_names": self.variable_names,
|
|
69
|
+
"tol": self.tol,
|
|
70
|
+
}
|
|
71
|
+
tasks.append(indiv_task)
|
|
72
|
+
if self.use_multiprocessing:
|
|
73
|
+
with mp.Pool() as pool:
|
|
74
|
+
all_solutions: list[pd.DataFrame] = pool.map(_simulate_patient, tasks)
|
|
75
|
+
else:
|
|
76
|
+
all_solutions: list[pd.DataFrame] = list(map(_simulate_patient, tasks))
|
|
77
|
+
output_data = pd.concat(all_solutions)
|
|
78
|
+
return output_data
|
|
79
|
+
|
|
80
|
+
def run_trial(
|
|
81
|
+
self,
|
|
82
|
+
vpop: pd.DataFrame,
|
|
83
|
+
initial_conditions: np.ndarray,
|
|
84
|
+
protocol_design: Optional[pd.DataFrame],
|
|
85
|
+
time_steps: np.ndarray,
|
|
86
|
+
) -> pd.DataFrame:
|
|
87
|
+
"""Run a trial given a vpop, protocol and solving times
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
vpop (pd.DataFrame): The patient descriptors. Should contain the following columns
|
|
91
|
+
- `id`
|
|
92
|
+
- `protocol_arm`
|
|
93
|
+
- `output_name`
|
|
94
|
+
- one column per patient descriptor
|
|
95
|
+
initial_conditions (np.ndarray): one set of initial conditions (same for all patients)
|
|
96
|
+
protocol_design (Optional[pd.DataFrame]): Protocol design linking `protocol_arm` to actual parameter overrides
|
|
97
|
+
time_steps (np.ndarray): The requested observation times. Same for all outputs
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
pd.DataFrame: A merged output containing the following columns
|
|
101
|
+
- `id`
|
|
102
|
+
- one column per patient descriptor
|
|
103
|
+
- `protocol_arm`
|
|
104
|
+
- `output_name`
|
|
105
|
+
- `predicted_value`: the simulated value
|
|
106
|
+
|
|
107
|
+
Notes:
|
|
108
|
+
Each patient will be run on each protocol arm, and all outputs will be included
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
# list the requested time steps for each output (here we use same solving times for all outputs)
|
|
112
|
+
time_steps_df = pd.DataFrame({"time": time_steps})
|
|
113
|
+
# Assemble the initial conditions in a dataframe
|
|
114
|
+
init_cond_df = pd.DataFrame(
|
|
115
|
+
data=[initial_conditions], columns=self.initial_cond_names
|
|
116
|
+
)
|
|
117
|
+
if protocol_design is None:
|
|
118
|
+
protocol_design_to_use = pd.DataFrame({"protocol_arm": "identity"})
|
|
119
|
+
else:
|
|
120
|
+
protocol_design_to_use = protocol_design
|
|
121
|
+
|
|
122
|
+
# Merge the data frames together
|
|
123
|
+
# Add time steps and output names for all patients
|
|
124
|
+
full_input_data = vpop.merge(time_steps_df, how="cross")
|
|
125
|
+
# Add initial conditions for all patients
|
|
126
|
+
full_input_data = full_input_data.merge(init_cond_df, how="cross")
|
|
127
|
+
# Add protocol arm info by merging the protocol design
|
|
128
|
+
full_input_data = full_input_data.merge(
|
|
129
|
+
protocol_design_to_use, how="left", on="protocol_arm"
|
|
130
|
+
)
|
|
131
|
+
# Run the model
|
|
132
|
+
output = self.simulate_model(full_input_data)
|
|
133
|
+
|
|
134
|
+
merged_df = pd.merge(
|
|
135
|
+
full_input_data,
|
|
136
|
+
output,
|
|
137
|
+
on=["id", "output_name", "time", "protocol_arm"],
|
|
138
|
+
how="left",
|
|
139
|
+
)
|
|
140
|
+
return merged_df
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _simulate_patient(args: dict) -> pd.DataFrame:
|
|
144
|
+
"""Worker function to simulate a model on a single patient
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
args (dict): describes the simulation to be performed. Requires the following
|
|
148
|
+
patient_inputs (pd.DataFrame): a data frame describing the patient to be simulated. The output data frame will be identical, with an additional `predicted_value` column. The inputs expect the following columns
|
|
149
|
+
`id`
|
|
150
|
+
`protocol_arm`
|
|
151
|
+
`output_name`
|
|
152
|
+
`time`
|
|
153
|
+
initial_conditions (dict[str,float]): the initial conditions for each variable
|
|
154
|
+
params (dict[str,float]): the patients descriptors
|
|
155
|
+
equations (Callable): system right-hand side
|
|
156
|
+
output_names (list[str]): the model output names, in the same order as in the equations
|
|
157
|
+
tol (float): solver tolerance
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
list(dict): A list of model result entries
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
# extract args
|
|
164
|
+
input_df: pd.DataFrame = args["patient_inputs"]
|
|
165
|
+
ind_id: pd.Series = input_df["id"].drop_duplicates()
|
|
166
|
+
if ind_id.shape[0] > 1:
|
|
167
|
+
raise ValueError("More than 1 patient was provided to `simulate_patient`")
|
|
168
|
+
|
|
169
|
+
time_steps: list[float] = input_df["time"].drop_duplicates().to_list()
|
|
170
|
+
initial_conditions: np.ndarray = args["initial_conditions"]
|
|
171
|
+
params: np.ndarray = args["params"]
|
|
172
|
+
equations: Callable = args["equations"]
|
|
173
|
+
output_names: list[str] = args["output_names"]
|
|
174
|
+
tol: float = args["tol"]
|
|
175
|
+
|
|
176
|
+
time_span = (time_steps[0], time_steps[-1])
|
|
177
|
+
|
|
178
|
+
sol = solve_ivp(
|
|
179
|
+
equations,
|
|
180
|
+
time_span,
|
|
181
|
+
initial_conditions,
|
|
182
|
+
method="LSODA",
|
|
183
|
+
t_eval=time_steps,
|
|
184
|
+
rtol=tol,
|
|
185
|
+
atol=tol,
|
|
186
|
+
args=params,
|
|
187
|
+
)
|
|
188
|
+
if not sol.success:
|
|
189
|
+
raise ValueError(f"ODE integration failed: {sol.message}")
|
|
190
|
+
|
|
191
|
+
# Filter the solver output to keep only the requested time steps for each output
|
|
192
|
+
simulation_outputs_df = pd.DataFrame(data=sol.y.transpose(), columns=output_names)
|
|
193
|
+
simulation_outputs_df["time"] = time_steps
|
|
194
|
+
simulation_outputs_df = simulation_outputs_df.melt(
|
|
195
|
+
id_vars=["time"],
|
|
196
|
+
value_vars=list(output_names),
|
|
197
|
+
var_name="output_name",
|
|
198
|
+
value_name="predicted_value",
|
|
199
|
+
)
|
|
200
|
+
full_output = input_df.merge(
|
|
201
|
+
simulation_outputs_df, how="left", on=["output_name", "time"]
|
|
202
|
+
)
|
|
203
|
+
return full_output
|