vpop-calibration 2.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from scipy.integrate import solve_ivp
4
+ import multiprocessing as mp
5
+ from typing import Any, Callable, Optional
6
+
7
+ from .utils import smoke_test
8
+
9
+
10
+ class OdeModel:
11
+ def __init__(
12
+ self,
13
+ equations: Callable,
14
+ variable_names: list[str],
15
+ param_names: list[str],
16
+ tol: Optional[float] = 1e-6,
17
+ multithreaded: Optional[bool] = True,
18
+ ):
19
+ """OdeModel
20
+
21
+ Create a computational model given a set of equations.
22
+
23
+ Args:
24
+ equations (callable): A function describing the right hand side of the ODE system
25
+ variable_names (list[str]): The names of the outputs of the system
26
+ param_names (list[str]): The name of the parameters of the system
27
+ """
28
+ self.equations = equations
29
+ self.variable_names = variable_names
30
+ self.nb_outputs = len(variable_names)
31
+
32
+ self.param_names = param_names
33
+ self.nb_parameters = len(param_names)
34
+ # Define the name of initial conditions as `<variable>_0`
35
+ self.initial_cond_names = [v + "_0" for v in self.variable_names]
36
+
37
+ self.tol = tol
38
+ if smoke_test:
39
+ self.use_multiprocessing = False
40
+ else:
41
+ self.use_multiprocessing = multithreaded
42
+
43
+ def simulate_model(
44
+ self,
45
+ input_data: pd.DataFrame,
46
+ ) -> pd.DataFrame:
47
+ """Solve the ODE model using formatted input data
48
+
49
+ Args:
50
+ input_data (pd.DataFrame): a DataFrame containing 'id', 'output_name', 'time', and columns for all individual parameters and initial conditions ('var_0', for var in variable_names) for each patient
51
+
52
+ Returns:
53
+ pd.DataFrame: a DataFrame with the same inputs and a new 'predicted_value' column
54
+ """
55
+
56
+ # group the data by individual to create tasks for each process
57
+ tasks: list[Any] = []
58
+ for _, ind_df in input_data.groupby("id"):
59
+ for _, filtered_df in ind_df.groupby("protocol_arm"):
60
+ params = filtered_df[self.param_names].iloc[0].values
61
+ initial_conditions = filtered_df[self.initial_cond_names].iloc[0].values
62
+ input_df = filtered_df[["id", "protocol_arm", "output_name", "time"]]
63
+ indiv_task = {
64
+ "patient_inputs": input_df,
65
+ "initial_conditions": initial_conditions,
66
+ "params": params,
67
+ "equations": self.equations,
68
+ "output_names": self.variable_names,
69
+ "tol": self.tol,
70
+ }
71
+ tasks.append(indiv_task)
72
+ if self.use_multiprocessing:
73
+ with mp.Pool() as pool:
74
+ all_solutions: list[pd.DataFrame] = pool.map(_simulate_patient, tasks)
75
+ else:
76
+ all_solutions: list[pd.DataFrame] = list(map(_simulate_patient, tasks))
77
+ output_data = pd.concat(all_solutions)
78
+ return output_data
79
+
80
+ def run_trial(
81
+ self,
82
+ vpop: pd.DataFrame,
83
+ initial_conditions: np.ndarray,
84
+ protocol_design: Optional[pd.DataFrame],
85
+ time_steps: np.ndarray,
86
+ ) -> pd.DataFrame:
87
+ """Run a trial given a vpop, protocol and solving times
88
+
89
+ Args:
90
+ vpop (pd.DataFrame): The patient descriptors. Should contain the following columns
91
+ - `id`
92
+ - `protocol_arm`
93
+ - `output_name`
94
+ - one column per patient descriptor
95
+ initial_conditions (np.ndarray): one set of initial conditions (same for all patients)
96
+ protocol_design (Optional[pd.DataFrame]): Protocol design linking `protocol_arm` to actual parameter overrides
97
+ time_steps (np.ndarray): The requested observation times. Same for all outputs
98
+
99
+ Returns:
100
+ pd.DataFrame: A merged output containing the following columns
101
+ - `id`
102
+ - one column per patient descriptor
103
+ - `protocol_arm`
104
+ - `output_name`
105
+ - `predicted_value`: the simulated value
106
+
107
+ Notes:
108
+ Each patient will be run on each protocol arm, and all outputs will be included
109
+ """
110
+
111
+ # list the requested time steps for each output (here we use same solving times for all outputs)
112
+ time_steps_df = pd.DataFrame({"time": time_steps})
113
+ # Assemble the initial conditions in a dataframe
114
+ init_cond_df = pd.DataFrame(
115
+ data=[initial_conditions], columns=self.initial_cond_names
116
+ )
117
+ if protocol_design is None:
118
+ protocol_design_to_use = pd.DataFrame({"protocol_arm": "identity"})
119
+ else:
120
+ protocol_design_to_use = protocol_design
121
+
122
+ # Merge the data frames together
123
+ # Add time steps and output names for all patients
124
+ full_input_data = vpop.merge(time_steps_df, how="cross")
125
+ # Add initial conditions for all patients
126
+ full_input_data = full_input_data.merge(init_cond_df, how="cross")
127
+ # Add protocol arm info by merging the protocol design
128
+ full_input_data = full_input_data.merge(
129
+ protocol_design_to_use, how="left", on="protocol_arm"
130
+ )
131
+ # Run the model
132
+ output = self.simulate_model(full_input_data)
133
+
134
+ merged_df = pd.merge(
135
+ full_input_data,
136
+ output,
137
+ on=["id", "output_name", "time", "protocol_arm"],
138
+ how="left",
139
+ )
140
+ return merged_df
141
+
142
+
143
+ def _simulate_patient(args: dict) -> pd.DataFrame:
144
+ """Worker function to simulate a model on a single patient
145
+
146
+ Args:
147
+ args (dict): describes the simulation to be performed. Requires the following
148
+ patient_inputs (pd.DataFrame): a data frame describing the patient to be simulated. The output data frame will be identical, with an additional `predicted_value` column. The inputs expect the following columns
149
+ `id`
150
+ `protocol_arm`
151
+ `output_name`
152
+ `time`
153
+ initial_conditions (dict[str,float]): the initial conditions for each variable
154
+ params (dict[str,float]): the patients descriptors
155
+ equations (Callable): system right-hand side
156
+ output_names (list[str]): the model output names, in the same order as in the equations
157
+ tol (float): solver tolerance
158
+
159
+ Returns:
160
+ list(dict): A list of model result entries
161
+ """
162
+
163
+ # extract args
164
+ input_df: pd.DataFrame = args["patient_inputs"]
165
+ ind_id: pd.Series = input_df["id"].drop_duplicates()
166
+ if ind_id.shape[0] > 1:
167
+ raise ValueError("More than 1 patient was provided to `simulate_patient`")
168
+
169
+ time_steps: list[float] = input_df["time"].drop_duplicates().to_list()
170
+ initial_conditions: np.ndarray = args["initial_conditions"]
171
+ params: np.ndarray = args["params"]
172
+ equations: Callable = args["equations"]
173
+ output_names: list[str] = args["output_names"]
174
+ tol: float = args["tol"]
175
+
176
+ time_span = (time_steps[0], time_steps[-1])
177
+
178
+ sol = solve_ivp(
179
+ equations,
180
+ time_span,
181
+ initial_conditions,
182
+ method="LSODA",
183
+ t_eval=time_steps,
184
+ rtol=tol,
185
+ atol=tol,
186
+ args=params,
187
+ )
188
+ if not sol.success:
189
+ raise ValueError(f"ODE integration failed: {sol.message}")
190
+
191
+ # Filter the solver output to keep only the requested time steps for each output
192
+ simulation_outputs_df = pd.DataFrame(data=sol.y.transpose(), columns=output_names)
193
+ simulation_outputs_df["time"] = time_steps
194
+ simulation_outputs_df = simulation_outputs_df.melt(
195
+ id_vars=["time"],
196
+ value_vars=list(output_names),
197
+ var_name="output_name",
198
+ value_name="predicted_value",
199
+ )
200
+ full_output = input_df.merge(
201
+ simulation_outputs_df, how="left", on=["output_name", "time"]
202
+ )
203
+ return full_output