PyPI - vpop-calibration - Versions diffs - 2.2.8__py3-none-any.whl - Mend

vpop-calibration 2.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

vpop_calibration/__init__.py +22 -0
vpop_calibration/data_generation.py +186 -0
vpop_calibration/diagnostics.py +162 -0
vpop_calibration/model/__init__.py +3 -0
vpop_calibration/model/data.py +420 -0
vpop_calibration/model/gp.py +517 -0
vpop_calibration/model/plot.py +243 -0
vpop_calibration/nlme.py +840 -0
vpop_calibration/ode.py +203 -0
vpop_calibration/saem.py +945 -0
vpop_calibration/structural_model.py +200 -0
vpop_calibration/test/__init__.py +11 -0
vpop_calibration/test/test_data.py +21 -0
vpop_calibration/test/test_gp_flavors.py +89 -0
vpop_calibration/test/test_gp_saem.py +175 -0
vpop_calibration/test/test_ode_saem.py +121 -0
vpop_calibration/utils.py +9 -0
vpop_calibration/vpop.py +50 -0
vpop_calibration-2.2.8.dist-info/METADATA +78 -0
vpop_calibration-2.2.8.dist-info/RECORD +22 -0
vpop_calibration-2.2.8.dist-info/WHEEL +4 -0
vpop_calibration-2.2.8.dist-info/licenses/LICENSE +21 -0

vpop_calibration/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+from .nlme import NlmeModel
+from .saem import PySaem
+from .structural_model import StructuralGp, StructuralOdeModel
+from .model import *
+from .ode import OdeModel
+from .vpop import generate_vpop_from_ranges
+from .data_generation import simulate_dataset_from_omega, simulate_dataset_from_ranges
+from .diagnostics import check_surrogate_validity_gp, plot_map_estimates
+__all__ = [
+    "GP",
+    "OdeModel",
+    "StructuralGp",
+    "StructuralOdeModel",
+    "NlmeModel",
+    "PySaem",
+    "simulate_dataset_from_omega",
+    "simulate_dataset_from_ranges",
+    "generate_vpop_from_ranges",
+    "check_surrogate_validity_gp",
+    "plot_map_estimates",
+]

vpop_calibration/data_generation.py ADDED Viewed

@@ -0,0 +1,186 @@
+import numpy as np
+import pandas as pd
+from typing import Optional
+from .ode import OdeModel
+from .vpop import generate_vpop_from_ranges
+from .structural_model import StructuralOdeModel
+from .nlme import NlmeModel
+def simulate_dataset_from_ranges(
+    ode_model: OdeModel,
+    log_nb_individuals: int,
+    param_ranges: dict[str, dict[str, float | bool]],
+    initial_conditions: np.ndarray,
+    protocol_design: Optional[pd.DataFrame],
+    residual_error_variance: Optional[np.ndarray],
+    error_model: Optional[str],  # "additive" or "proportional"
+    time_steps: np.ndarray,
+) -> pd.DataFrame:
+    """Generate a simulated data set with an ODE model
+    Simulates a dataset for training a surrogate model. Timesteps can be different for each output.
+    The parameter space is explored with Sobol sequences.
+    Args:
+        log_nb_individuals (int): The number of simulated patients will be 2^this parameter
+        param_ranges (list[dict]): For each parameter in the model, a dict describing the search space 'low': low bound, 'high': high bound, and 'log': True if the search space is log-scaled
+        initial_conditions (array): set of initial conditions, one for each variable
+        protocol_design (optional): a DataFrame with a `protocol_arm` column, and one column per parameter override
+        residual_error_variance (np.array): A 1D array of residual error variances for each output.
+        error_model (str): the type of error model ("additive" or "proportional").
+        time_steps (np.array): an array with the time points
+    Returns:
+        pd.DataFrame: A DataFrame with columns 'id', parameter names, 'time', 'output_name', and 'value'.
+    Notes:
+        If a parameter appears both in the ranges and in the protocol design, the ranges take precedence.
+    """
+    # Validate input data
+    params_to_explore = list(param_ranges.keys())
+    if protocol_design is None:
+        print("No protocol")
+        params = params_to_explore
+        params_in_protocol = []
+        protocol_design_filt = pd.DataFrame({"protocol_arm": ["identity"]})
+    else:
+        params_in_protocol = protocol_design.drop(
+            "protocol_arm", axis=1
+        ).columns.tolist()
+        # Find the paramaters that appear both in the ranges and the protocol
+        overlap = set(params_to_explore) & set(params_in_protocol)
+        if overlap != set():
+            protocol_design_filt = protocol_design.drop(list(overlap), axis=1)
+            print(
+                f"Warning: ignoring entries {overlap} from the protocol design (already defined in the ranges)."
+            )
+        else:
+            protocol_design_filt = protocol_design
+        params = params_to_explore + params_in_protocol
+    if set(params) != set(ode_model.param_names):
+        raise ValueError(
+            f"Under-defined system: missing {set(ode_model.param_names) - set(params)}"
+        )
+    # Generate the vpop using sobol sequences
+    patients_df = generate_vpop_from_ranges(log_nb_individuals, param_ranges)
+    # Add a choice of protocol arm for each patient
+    protocol_arms = pd.DataFrame(protocol_design_filt["protocol_arm"].drop_duplicates())
+    patients_df = patients_df.merge(protocol_arms, how="cross")
+    # Add the outputs for each patient
+    outputs = pd.DataFrame({"output_name": ode_model.variable_names})
+    patients_df = patients_df.merge(outputs, how="cross")
+    # Simulate the ODE model
+    output_df = ode_model.run_trial(
+        patients_df, initial_conditions, protocol_design_filt, time_steps
+    )
+    # Pivot to wide to add noise per model output
+    wide_output = output_df.pivot_table(
+        index=["id", *ode_model.param_names, "time", "protocol_arm"],
+        columns="output_name",
+        values="predicted_value",
+    ).reset_index()
+    if error_model is None:
+        pass
+    else:
+        if residual_error_variance is None:
+            raise ValueError("Undefined residual error variance.")
+        else:
+            # Add noise to the data
+            noise = np.random.normal(
+                np.zeros_like(residual_error_variance),
+                np.sqrt(residual_error_variance),
+                (wide_output.shape[0], ode_model.nb_outputs),
+            )
+            if error_model == "additive":
+                wide_output[ode_model.variable_names] += noise
+            elif error_model == "proportional":
+                wide_output[ode_model.variable_names] += (
+                    noise * wide_output[ode_model.variable_names]
+                )
+            else:
+                raise ValueError(f"Incorrect error_model choice: {error_model}")
+    # Pivot back to long format
+    long_output = wide_output.melt(
+        id_vars=[
+            "id",
+            "protocol_arm",
+            "time",
+            *ode_model.param_names,
+        ],
+        value_vars=ode_model.variable_names,
+        var_name="output_name",
+        value_name="value",
+    )
+    # Remove the protocol arm overrides from the data set, they described by the protocol_arm column now
+    long_output = long_output.drop(params_in_protocol, axis=1)
+    return long_output
+def simulate_dataset_from_omega(
+    ode_model: OdeModel,
+    protocol_design: pd.DataFrame,
+    time_steps: np.ndarray,
+    init_conditions: np.ndarray,
+    log_mi: dict[str, float],
+    log_pdu: dict[str, dict[str, float]],
+    error_model: str,
+    res_var: list[float],
+    covariate_map: dict[str, dict[str, dict[str, str | float]]],
+    patient_covariates: pd.DataFrame,
+) -> pd.DataFrame:
+    """Generate synthetic data set using an ODE model and population distributions of parameters
+    Args:
+        ode_model (OdeModel): The equations to be simulated
+        protocol_design (pd.DataFrame): _description_
+        time_steps (np.ndarray): _description_
+        init_conditions (np.ndarray): _description_
+        log_mi (dict[str, float]): _description_
+        log_pdu (dict[str, dict[str, float]]): _description_
+        error_model (str): _description_
+        res_var (list[float]): _description_
+        covariate_map (dict[str, dict[str, dict[str, str  |  float]]]): _description_
+        patient_covariates (pd.DataFrame): _description_
+    Returns:
+        pd.DataFrame: _description_
+    """
+    structural_model = StructuralOdeModel(ode_model, protocol_design, init_conditions)
+    nlme_model = NlmeModel(
+        structural_model,
+        patient_covariates,
+        log_mi,
+        log_pdu,
+        res_var,
+        covariate_map,
+        error_model,
+    )
+    etas = nlme_model.sample_individual_etas()
+    theta = nlme_model.individual_parameters(etas)
+    vpop = pd.DataFrame(data=theta.cpu().numpy(), columns=nlme_model.descriptors)
+    vpop["id"] = nlme_model.patients
+    protocol_arms = patient_covariates[["id", "protocol_arm"]]
+    vpop = vpop.merge(protocol_arms, on=["id"], how="left")
+    vpop = vpop.merge(
+        pd.DataFrame(data=nlme_model.outputs_names, columns=["output_name"]),
+        how="cross",
+    )
+    time_df = pd.DataFrame(data=time_steps, columns=["time"])
+    vpop = vpop.merge(time_df, how="cross")
+    # add a dummy observation value
+    vpop["value"] = 1.0
+    nlme_model.add_observations(vpop)
+    out_tensor, _ = nlme_model.predict_outputs_from_theta(theta)
+    out_with_noise = nlme_model.add_residual_error(out_tensor)
+    out_df = nlme_model.outputs_to_df(out_with_noise)
+    out_df = out_df.rename(columns={"predicted_value": "value"})
+    return out_df

vpop_calibration/diagnostics.py ADDED Viewed

@@ -0,0 +1,162 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from .nlme import NlmeModel
+from .saem import PySaem
+from .model.gp import GP
+from .structural_model import StructuralGp
+from .utils import smoke_test
+def check_surrogate_validity_gp(nlme_model: NlmeModel) -> tuple[dict, dict]:
+    pdus = nlme_model.descriptors
+    gp_model_struct = nlme_model.structural_model
+    assert isinstance(
+        gp_model_struct, StructuralGp
+    ), "Posterior surrogate validity check only implemented for GP structural model."
+    gp_model: GP = gp_model_struct.gp_model
+    train_data = gp_model.data.full_df_raw[pdus].drop_duplicates()
+    map_data = nlme_model.map_estimates_descriptors()
+    patients = nlme_model.patients
+    n_plots = len(pdus)
+    n_cols = 3
+    n_rows = int(np.ceil(n_plots / n_cols))
+    scaling_indiv_plots = 3
+    _, axes1 = plt.subplots(
+        n_rows,
+        n_cols,
+        squeeze=False,
+        figsize=[scaling_indiv_plots * n_cols, scaling_indiv_plots * n_rows],
+    )
+    diagnostics = {}
+    recommended_ranges = {}
+    for k, param in enumerate(pdus):
+        i, j = k // n_cols, k % n_cols
+        train_samples = np.log(train_data[param])
+        train_min, train_max = train_samples.min(axis=0), train_samples.max(axis=0)
+        map_samples = np.log(map_data[param])
+        flag_high = np.where(map_samples > train_max)[0]
+        flag_low = np.where(map_samples < train_min)[0]
+        recommend_low, recommend_high = train_min, train_max
+        param_diagnostic = {}
+        if flag_high.shape[0] > 0:
+            param_diagnostic.update({"above": [patients[p] for p in flag_high]})
+            recommend_high = map_samples.max()
+        else:
+            param_diagnostic.update({"above": None})
+        if flag_low.shape[0] > 0:
+            param_diagnostic.update({"below": [patients[p] for p in flag_low]})
+            recommend_low = map_samples.min()
+        else:
+            param_diagnostic.update({"below": None})
+        diagnostics.update({param: param_diagnostic})
+        recommended_ranges.update(
+            {
+                param: {
+                    "low": f"{recommend_low:.2f}",
+                    "high": f"{recommend_high:.2f}",
+                    "log": True,
+                }
+            }
+        )
+        ax = axes1[i, j]
+        ax.hist([train_samples, map_samples], density=True)
+        ax.axvline(train_min, linestyle="dashed", color="black")
+        ax.axvline(train_max, linestyle="dashed", color="black")
+        ax.set_title(f"{param}")
+    scaling_2by2_plots = 2
+    _, axes2 = plt.subplots(
+        n_plots,
+        n_plots,
+        squeeze=False,
+        figsize=[scaling_2by2_plots * n_plots, scaling_2by2_plots * n_plots],
+        sharex="col",
+        sharey="row",
+    )
+    for k1, param1 in enumerate(pdus):
+        train_samples_1 = np.log(train_data[param1])
+        map_samples_1 = np.log(map_data[param1])
+        for k2, param2 in enumerate(pdus):
+            train_samples_2 = np.log(train_data[param2])
+            map_samples_2 = np.log(map_data[param2])
+            ax = axes2[k1, k2]
+            if k1 != k2:
+                # param 1 is the row -> y axis
+                # param 2 is the column -> x axis
+                ax.scatter(train_samples_2, train_samples_1, alpha=0.5, s=1.0)
+                ax.scatter(map_samples_2, map_samples_1, s=5)
+            if k2 == 0:
+                ax.set_ylabel(param1)
+            if k1 == len(pdus) - 1:
+                ax.set_xlabel(param2)
+    return diagnostics, recommended_ranges
+def plot_map_estimates(nlme_model: NlmeModel) -> None:
+    observed = nlme_model.observations_df
+    simulated_df = nlme_model.map_estimates_predictions()
+    n_cols = nlme_model.nb_outputs
+    n_rows = nlme_model.structural_model.nb_protocols
+    _, axes = plt.subplots(
+        n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows), squeeze=False
+    )
+    cmap = plt.get_cmap("Spectral")
+    colors = cmap(np.linspace(0, 1, nlme_model.nb_patients))
+    for output_index, output_name in enumerate(nlme_model.outputs_names):
+        for protocol_index, protocol_arm in enumerate(
+            nlme_model.structural_model.protocols
+        ):
+            obs_loop = observed.loc[
+                (observed["output_name"] == output_name)
+                & (observed["protocol_arm"] == protocol_arm)
+            ]
+            pred_loop = simulated_df.loc[
+                (simulated_df["output_name"] == output_name)
+                & (simulated_df["protocol_arm"] == protocol_arm)
+            ]
+            ax = axes[protocol_index, output_index]
+            ax.set_xlabel("Time")
+            patients_protocol = obs_loop["id"].drop_duplicates().to_list()
+            for patient_ind in patients_protocol:
+                patient_num = nlme_model.patients.index(patient_ind)
+                patient_obs = obs_loop.loc[obs_loop["id"] == patient_ind]
+                patient_pred = pred_loop.loc[pred_loop["id"] == patient_ind]
+                time_vec = patient_obs["time"].values
+                sorted_indices = np.argsort(time_vec)
+                sorted_times = time_vec[sorted_indices]
+                obs_vec = patient_obs["value"].values[sorted_indices]
+                ax.plot(
+                    sorted_times,
+                    obs_vec,
+                    "+",
+                    color=colors[patient_num],
+                    linewidth=2,
+                    alpha=0.6,
+                )
+                if patient_pred.shape[0] > 0:
+                    pred_vec = patient_pred["predicted_value"].values[sorted_indices]
+                    ax.plot(
+                        sorted_times,
+                        pred_vec,
+                        "-",
+                        color=colors[patient_num],
+                        linewidth=2,
+                        alpha=0.5,
+                    )
+            title = f"{output_name} in {protocol_arm}"  # More descriptive title
+            ax.set_title(title)
+    if not smoke_test:
+        plt.tight_layout()
+        plt.show()

vpop_calibration/model/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .gp import GP
+__all__ = ["GP"]