PyPI - vpop-calibration - Versions diffs - 2.2.8__py3-none-any.whl - Mend

vpop-calibration 2.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

vpop_calibration/__init__.py +22 -0
vpop_calibration/data_generation.py +186 -0
vpop_calibration/diagnostics.py +162 -0
vpop_calibration/model/__init__.py +3 -0
vpop_calibration/model/data.py +420 -0
vpop_calibration/model/gp.py +517 -0
vpop_calibration/model/plot.py +243 -0
vpop_calibration/nlme.py +840 -0
vpop_calibration/ode.py +203 -0
vpop_calibration/saem.py +945 -0
vpop_calibration/structural_model.py +200 -0
vpop_calibration/test/__init__.py +11 -0
vpop_calibration/test/test_data.py +21 -0
vpop_calibration/test/test_gp_flavors.py +89 -0
vpop_calibration/test/test_gp_saem.py +175 -0
vpop_calibration/test/test_ode_saem.py +121 -0
vpop_calibration/utils.py +9 -0
vpop_calibration/vpop.py +50 -0
vpop_calibration-2.2.8.dist-info/METADATA +78 -0
vpop_calibration-2.2.8.dist-info/RECORD +22 -0
vpop_calibration-2.2.8.dist-info/WHEEL +4 -0
vpop_calibration-2.2.8.dist-info/licenses/LICENSE +21 -0

vpop_calibration/structural_model.py ADDED Viewed

@@ -0,0 +1,200 @@
+import torch
+import pandas as pd
+import numpy as np
+import uuid
+from .model.gp import GP
+from .ode import OdeModel
+from .utils import device
+class StructuralModel:
+    def __init__(
+        self,
+        parameter_names,
+        output_names,
+        protocol_arms,
+        tasks,
+        task_idx_to_output_idx,
+        task_idx_to_protocol,
+    ):
+        """Initialize a structural model
+        Args:
+            parameter_names (list[str]): _description_
+            output_names (list[str]): _description_
+            protocol_arms (list[str]): _description_
+            tasks (list[str]): _description_
+            task_idx_to_output_idx (list[str]): _description_
+            task_idx_to_protocol (list[str]): _description_
+        """
+        self.parameter_names: list[str] = parameter_names
+        self.nb_parameters: int = len(self.parameter_names)
+        self.output_names: list[str] = output_names
+        self.nb_outputs: int = len(self.output_names)
+        self.protocols: list[str] = protocol_arms
+        self.nb_protocols: int = len(self.protocols)
+        self.tasks: list[str] = tasks
+        self.task_idx_to_output_idx: dict[int, int] = task_idx_to_output_idx
+        self.task_idx_to_protocol: dict[int, str] = task_idx_to_protocol
+    def simulate(
+        self,
+        X: torch.Tensor,
+        prediction_index: tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+        chunks: list[int],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        raise ValueError("Not implemented")
+class StructuralGp(StructuralModel):
+    def __init__(self, gp_model: GP):
+        """Create a structural model from a GP
+        Args:
+            gp_model (GP): The trained GP
+        """
+        # list the GP parameters, except time, as it will be handled differently in the NLME model
+        parameter_names = [p for p in gp_model.data.parameter_names if p != "time"]
+        super().__init__(
+            parameter_names,
+            gp_model.data.output_names,
+            gp_model.data.protocol_arms,
+            gp_model.data.tasks,
+            gp_model.data.task_idx_to_output_idx,
+            gp_model.data.task_idx_to_protocol,
+        )
+        self.gp_model = gp_model
+    def simulate(
+        self,
+        X: torch.Tensor,
+        prediction_index: tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+        chunks: list[int],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # X contains [nb_patients, nb_timesteps, nb_params + 1]
+        # Simulate the GP
+        (nb_patients, nb_timesteps, nb_params) = X.shape
+        X_vertical = X.view(-1, nb_params)
+        out_cat, var_cat = self.gp_model.predict_wide_scaled(X_vertical)
+        out_wide = out_cat.view(nb_patients, nb_timesteps, -1)
+        var_wide = var_cat.view(nb_patients, nb_timesteps, -1)
+        # Retrieve the necessary rows and columns to transform into a single column tensor
+        y = out_wide[prediction_index]
+        var = var_wide[prediction_index]
+        return y, var
+class StructuralOdeModel(StructuralModel):
+    def __init__(
+        self,
+        ode_model: OdeModel,
+        protocol_design: pd.DataFrame,
+        init_conditions: np.ndarray,
+    ):
+        self.ode_model = ode_model
+        protocol_arms = protocol_design["protocol_arm"].drop_duplicates().to_list()
+        self.protocol_design = protocol_design
+        output_names: list[str] = self.ode_model.variable_names
+        tasks: list[str] = [
+            output + "_" + protocol
+            for protocol in protocol_arms
+            for output in output_names
+        ]
+        # Map tasks to output names
+        task_to_output = {
+            output_name + "_" + protocol_arm: output_name
+            for output_name in output_names
+            for protocol_arm in protocol_arms
+        }
+        # Map task index to output index
+        task_idx_to_output_idx = {
+            tasks.index(k): output_names.index(v) for k, v in task_to_output.items()
+        }
+        # Map task to protocol arm
+        task_to_protocol = {
+            output_name + "_" + protocol_arm: protocol_arm
+            for output_name in output_names
+            for protocol_arm in protocol_arms
+        }
+        # Map task index to protocol arm
+        task_idx_to_protocol = {tasks.index(k): v for k, v in task_to_protocol.items()}
+        # list the structural model parameters: the protocol overrides are ignored
+        self.protocol_overrides = self.protocol_design.drop(
+            columns="protocol_arm"
+        ).columns.to_list()
+        parameter_names = list(
+            set(self.ode_model.param_names) - set(self.protocol_overrides)
+        )
+        self.nb_protocol_overrides = len(self.protocol_overrides)
+        super().__init__(
+            parameter_names,
+            output_names,
+            protocol_arms,
+            tasks,
+            task_idx_to_output_idx,
+            task_idx_to_protocol,
+        )
+        self.init_cond_df = pd.DataFrame(
+            data=[init_conditions], columns=self.ode_model.initial_cond_names
+        )
+    def simulate(
+        self,
+        X: torch.Tensor,
+        prediction_index: tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+        chunks: list[int],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        (nb_patients, nb_timesteps, nb_params) = X.shape
+        list_X = [ind_X for ind_X in X]
+        patient_index_full, rows_full, tasks_full = prediction_index
+        list_rows = torch.split(rows_full, chunks)
+        list_tasks = torch.split(tasks_full, chunks)
+        input_df_list = []
+        for ind_X, ind_rows, ind_tasks in zip(list_X, list_rows, list_tasks):
+            temp_id = str(uuid.uuid4())
+            # Extract the parameters and time values
+            params = ind_X.index_select(0, ind_rows).cpu().detach().numpy()
+            # Extract the task order
+            task_index = ind_tasks.cpu().detach().numpy()
+            # Format the data inputs
+            # This step is where the order of parameters is implicit
+            input_df_temp = pd.DataFrame(
+                data=params, columns=self.parameter_names + ["time"]
+            )
+            # The passed params include the _global_ time steps
+            # Filter the time steps that we actually want for this patient
+            input_df_temp = input_df_temp.iloc[ind_rows.cpu().numpy()]
+            # Add the task index as a temporary column
+            input_df_temp["task_index"] = task_index
+            # Deduce protocol arm and output name from task index
+            input_df_temp["protocol_arm"] = input_df_temp["task_index"].apply(
+                lambda t: self.task_idx_to_protocol[t]
+            )
+            input_df_temp["output_name"] = input_df_temp["task_index"].apply(
+                lambda t: self.output_names[self.task_idx_to_output_idx[t]]
+            )
+            # Remove the unnecessary task index column
+            input_df_temp = input_df_temp.drop(columns=["task_index"])
+            input_df_temp["id"] = temp_id
+            # Add the protocol overrides
+            if self.nb_protocol_overrides > 0:
+                input_df_temp = input_df_temp.merge(
+                    self.protocol_design, how="left", on=["protocol_arm"]
+                )
+            # Add the initial conditions
+            input_df_temp = input_df_temp.merge(self.init_cond_df, how="cross")
+            input_df_list.append(input_df_temp)
+        full_input = pd.concat(input_df_list)
+        # Simulate the ODE model
+        output_df = self.ode_model.simulate_model(full_input)
+        # Convert back to tensor
+        out_tensor = torch.as_tensor(output_df["predicted_value"].values, device=device)
+        out_var = torch.zeros_like(out_tensor, device=device)
+        return out_tensor, out_var

vpop_calibration/test/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+import numpy as np
+import torch
+# Initialize the seeds for all random operators used in the tests
+np_rng = np.random.default_rng(42)
+np.random.seed(42)
+torch.manual_seed(0)
+saem_mi_maxfun = 1
+multithreaded = False
+__all__ = ["np_rng", "saem_mi_maxfun", "multithreaded"]

vpop_calibration/test/test_data.py ADDED Viewed

@@ -0,0 +1,21 @@
+import pandas as pd
+from vpop_calibration.model.data import TrainingDataSet
+from vpop_calibration.test import *
+training_df = pd.DataFrame(
+    {
+        "id": ["1", "1", "2", "2"],
+        "protocol_arm": ["arm-A"] * 4,
+        "output_name": ["s1", "s2", "s1", "s2"],
+        "k1": [1.0, 1.0, 2.0, 2.0],
+        "value": [0.0, 1.0, 2.0, 3.0],
+    }
+)
+def test_loading():
+    TrainingDataSet(training_df, ["k1"], 1.0)
+    TrainingDataSet(training_df, ["k1"], 1.0, data_already_normalized=True)
+    TrainingDataSet(training_df, ["k1"], 1.0, log_inputs=["k1"], log_outputs=["s1"])
+    TrainingDataSet(training_df.drop(columns={"protocol_arm"}), ["k1"], 0.5)

vpop_calibration/test/test_gp_flavors.py ADDED Viewed

@@ -0,0 +1,89 @@
+import pandas as pd
+import numpy as np
+import pickle
+from vpop_calibration import *
+from vpop_calibration.test import *
+# Create a dummy training data frame
+patients = ["patient-01", "patient-02"]
+nb_patients = len(patients)
+obsIds = ["obs-01", "obs-02"]
+protocol_arms = ["arm-A", "arm-B"]
+time_steps = np.arange(0, 10.0, 1.0)
+patient_descriptors = ["k1", "k2", "k3"]
+gp_params = [*patient_descriptors, "time"]
+training_df = pd.DataFrame({"id": patients})
+for descriptor in patient_descriptors:
+    training_df[descriptor] = np_rng.normal(0, 1, nb_patients)
+training_df = training_df.merge(
+    pd.DataFrame({"protocol_arm": protocol_arms}), how="cross"
+)
+training_df = training_df.merge(pd.DataFrame({"time": time_steps}), how="cross")
+training_df = training_df.merge(pd.DataFrame({"output_name": obsIds}), how="cross")
+training_df["value"] = np_rng.normal(0, 1, training_df.shape[0])
+training_df_bootstrapped = training_df.sample(frac=0.5, random_state=np_rng)
+implemented_kernels = ["RBF", "SMK", "Matern"]
+implemented_var_strat = ["IMV", "LMCV"]
+implemented_mll = ["ELBO", "PLL"]
+deep_or_not = [True, False]
+model_file = "vpop_calibration/test/gp_model_for_tests.pkl"
+def gp_init_flavor(var_strat, kernel, deep_kernel, mll):
+    gp = GP(
+        training_df,
+        gp_params,
+        var_strat=var_strat,
+        mll=mll,
+        kernel=kernel,
+        deep_kernel=deep_kernel,
+        nb_latents=2,
+        nb_features=5,
+        num_mixtures=3,
+        nb_training_iter=2,
+    )
+    gp.train()
+def test_all_gp_flavors():
+    for deep_kernel in deep_or_not:
+        for kernel in implemented_kernels:
+            for var_strat in implemented_var_strat:
+                for mll in implemented_mll:
+                    gp_init_flavor(var_strat, kernel, deep_kernel, mll)
+def test_batching_1():
+    gp = GP(training_df, gp_params, nb_training_iter=2)
+    gp.train(mini_batching=True, mini_batch_size=8)
+def test_batching_2():
+    gp = GP(training_df, gp_params, nb_training_iter=2)
+    gp.train(mini_batching=True, mini_batch_size=None)
+def test_eval_with_valid():
+    gp = GP(training_df, gp_params, nb_training_iter=2)
+    gp.eval_perf()
+def test_eval_no_valid():
+    gp = GP(training_df, gp_params, nb_training_iter=2, training_proportion=1)
+    gp.eval_perf()
+def test_gp_incomplete_data():
+    gp = GP(training_df_bootstrapped, gp_params, nb_training_iter=2)
+    gp.train()
+    gp.train(mini_batching=True, mini_batch_size=8)
+    gp.eval_perf()
+def test_gp_pickle():
+    gp = GP(training_df, gp_params)
+    with open(model_file, "wb") as file:
+        pickle.dump(gp, file)

vpop_calibration/test/test_gp_saem.py ADDED Viewed

@@ -0,0 +1,175 @@
+import numpy as np
+import pandas as pd
+import pickle
+import uuid
+from vpop_calibration import *
+from vpop_calibration.test import *
+def equations_with_abs(t, y, k_a, k_12, k_21, k_el):
+    # y[0] is A_absorption, y[1] is A_central, y[2] is A_peripheral
+    A_absorption, A_central, A_peripheral = y[0], y[1], y[2]
+    dA_absorption_dt = -k_a * A_absorption
+    dA_central_dt = (
+        k_a * A_absorption + k_21 * A_peripheral - k_12 * A_central - k_el * A_central
+    )
+    dA_peripheral_dt = k_12 * A_central - k_21 * A_peripheral
+    ydot = [dA_absorption_dt, dA_central_dt, dA_peripheral_dt]
+    return ydot
+variable_names = ["A0", "A1", "A2"]
+parameter_names = ["k_a", "k_12", "k_21", "k_el"]
+tmax = 24.0
+initial_conditions = np.array([10.0, 0.0, 0.0])
+protocol_design = pd.DataFrame(
+    {"protocol_arm": ["arm-A", "arm-B"], "k_el": [0.5, 10.0]}
+)
+nb_protocols = len(protocol_design)
+pk_two_compartments_model = OdeModel(
+    equations_with_abs, variable_names, parameter_names, multithreaded=multithreaded
+)
+model_file = "vpop_calibration/test/gp_model_for_tests.pkl"
+def test_gp_training():
+    # Define the ode model
+    nb_timesteps = 3
+    time_steps = np.linspace(0.0, tmax, nb_timesteps)
+    log_nb_patients = 1
+    param_ranges = {
+        "k_12": {"low": -2.0, "high": 0.0, "log": True},
+        "k_21": {"low": -1.0, "high": 0.3, "log": True},
+        "k_a": {"low": -1.0, "high": 0.0, "log": True},
+    }
+    dataset = simulate_dataset_from_ranges(
+        pk_two_compartments_model,
+        log_nb_patients,
+        param_ranges,
+        initial_conditions,
+        protocol_design,
+        None,
+        None,
+        time_steps,
+    )
+    learned_ode_params = list(param_ranges.keys())
+    descriptors = learned_ode_params + ["time"]
+    # initiate our GP class
+    myGP = GP(
+        dataset,
+        descriptors,
+        var_strat="IMV",  # either IMV (Independent Multitask Variational) or LMCV (Linear Model of Coregionalization Variational)
+        kernel="RBF",  # Either RBF or SMK
+        data_already_normalized=False,  # default
+        nb_inducing_points=10,
+        mll="ELBO",  # default, otherwise PLL
+        nb_training_iter=1,
+        training_proportion=0.7,
+        learning_rate=0.1,
+        lr_decay=0.99,
+        jitter=1e-6,
+        log_inputs=learned_ode_params,
+    )
+    myGP.train()
+    myGP.plot_loss()
+    myGP.plot_obs_vs_predicted("training")
+    myGP.plot_individual_solution(0)
+    myGP.plot_all_solutions("training")
+    with open(model_file, "wb") as file:
+        pickle.dump(myGP, file)
+def test_gp_saem():
+    time_span_rw = (0, 24)
+    nb_steps_rw = 2
+    # For each output and for each patient, give a list of time steps to be simulated
+    time_steps_rw = np.linspace(time_span_rw[0], time_span_rw[1], nb_steps_rw).tolist()
+    # Parameter definitions
+    true_log_MI = {"k_21": 0.0}
+    true_log_PDU = {
+        "k_12": {"mean": -1.0, "sd": 0.25},
+    }
+    error_model_type = "additive"
+    true_res_var = [0.5, 0.02, 0.1]
+    true_covariate_map = {
+        "k_12": {"foo": {"coef": "cov_foo_k12", "value": 0.2}},
+    }
+    # Create a patient data frame
+    # It should contain at the very minimum one `id` per patient
+    nb_patients = 3
+    patients_df = pd.DataFrame({"id": [str(uuid.uuid4()) for _ in range(nb_patients)]})
+    patients_df["protocol_arm"] = np_rng.binomial(1, 0.5, nb_patients)
+    patients_df["protocol_arm"] = patients_df["protocol_arm"].apply(
+        lambda x: "arm-A" if x == 0 else "arm-B"
+    )
+    patients_df["k_a"] = np_rng.lognormal(-1, 0.1, nb_patients)
+    patients_df["foo"] = np_rng.lognormal(0.1, 0.1, nb_patients)
+    print(f"Simulating {nb_patients} patients on {nb_protocols} protocol arms")
+    obs_df = simulate_dataset_from_omega(
+        pk_two_compartments_model,
+        protocol_design,
+        time_steps_rw,
+        initial_conditions,
+        true_log_MI,
+        true_log_PDU,
+        error_model_type,
+        true_res_var,
+        true_covariate_map,
+        patients_df,
+    )
+    # Initial pop estimates
+    # Parameter definitions
+    init_log_MI = {"k_21": -1.0}
+    init_log_PDU = {
+        "k_12": {"mean": -0.1, "sd": 0.1},
+    }
+    error_model_type = "additive"
+    init_res_var = [0.1, 0.05, 0.5]
+    init_covariate_map = {
+        "k_12": {"foo": {"coef": "cov_foo_k12", "value": -0.1}},
+    }
+    with open(model_file, "rb") as file:
+        myGP = pickle.load(file)
+    # Create a structural model
+    structural_gp = StructuralGp(myGP)
+    # Create a NLME moedl
+    nlme_surrogate = NlmeModel(
+        structural_gp,
+        patients_df,
+        init_log_MI,
+        init_log_PDU,
+        init_res_var,
+        init_covariate_map,
+        error_model_type,
+    )
+    # Create an optimizer: here we use SAEM
+    optimizer = PySaem(
+        nlme_surrogate,
+        obs_df,
+        nb_phase1_iterations=1,
+        nb_phase2_iterations=0,
+        optim_max_fun=saem_mi_maxfun,
+    )
+    optimizer.run()
+    optimizer.continue_iterating(nb_add_iters_ph1=1, nb_add_iters_ph2=1)
+    optimizer.plot_convergence_history()
+    plot_map_estimates(nlme_surrogate)
+    check_surrogate_validity_gp(nlme_surrogate)

vpop_calibration/test/test_ode_saem.py ADDED Viewed

@@ -0,0 +1,121 @@
+import numpy as np
+import pandas as pd
+import uuid
+from vpop_calibration import *
+from vpop_calibration.test import *
+def equations_with_abs(t, y, k_a, k_12, k_21, k_el):
+    # y[0] is A_absorption, y[1] is A_central, y[2] is A_peripheral
+    A_absorption, A_central, A_peripheral = y[0], y[1], y[2]
+    dA_absorption_dt = -k_a * A_absorption
+    dA_central_dt = (
+        k_a * A_absorption + k_21 * A_peripheral - k_12 * A_central - k_el * A_central
+    )
+    dA_peripheral_dt = k_12 * A_central - k_21 * A_peripheral
+    ydot = [dA_absorption_dt, dA_central_dt, dA_peripheral_dt]
+    return ydot
+variable_names = ["A0", "A1", "A2"]
+parameter_names = ["k_a", "k_12", "k_21", "k_el"]
+tmax = 24.0
+initial_conditions = np.array([10.0, 0.0, 0.0])
+protocol_design = pd.DataFrame(
+    {"protocol_arm": ["arm-A", "arm-B"], "k_el": [0.5, 10.0]}
+)
+nb_protocols = len(protocol_design)
+pk_two_compartments_model = OdeModel(
+    equations_with_abs, variable_names, parameter_names, multithreaded=multithreaded
+)
+model_file = "vpop_calibration/test/gp_model_for_tests.pkl"
+def test_ode_saem():
+    time_span_rw = (0, 24)
+    nb_steps_rw = 2
+    # For each output and for each patient, give a list of time steps to be simulated
+    time_steps_rw = np.linspace(time_span_rw[0], time_span_rw[1], nb_steps_rw).tolist()
+    # Parameter definitions
+    true_log_MI = {"k_21": 0.0}
+    true_log_PDU = {
+        "k_12": {"mean": -1.0, "sd": 0.25},
+    }
+    error_model_type = "additive"
+    true_res_var = [0.5, 0.02, 0.1]
+    true_covariate_map = {
+        "k_12": {"foo": {"coef": "cov_foo_k12", "value": 0.2}},
+    }
+    # Create a patient data frame
+    # It should contain at the very minimum one `id` per patient
+    nb_patients = 3
+    patients_df = pd.DataFrame({"id": [str(uuid.uuid4()) for _ in range(nb_patients)]})
+    patients_df["protocol_arm"] = np_rng.binomial(1, 0.5, nb_patients)
+    patients_df["protocol_arm"] = patients_df["protocol_arm"].apply(
+        lambda x: "arm-A" if x == 0 else "arm-B"
+    )
+    patients_df["k_a"] = np_rng.lognormal(-1, 0.1, nb_patients)
+    patients_df["foo"] = np_rng.lognormal(0.1, 0.1, nb_patients)
+    print(f"Simulating {nb_patients} patients on {nb_protocols} protocol arms")
+    obs_df = simulate_dataset_from_omega(
+        pk_two_compartments_model,
+        protocol_design,
+        time_steps_rw,
+        initial_conditions,
+        true_log_MI,
+        true_log_PDU,
+        error_model_type,
+        true_res_var,
+        true_covariate_map,
+        patients_df,
+    )
+    # Initial pop estimates
+    # Parameter definitions
+    init_log_MI = {"k_12": -1.0}
+    init_log_PDU = {
+        "k_21": {"mean": -1.0, "sd": 0.2},
+    }
+    error_model_type = "additive"
+    init_res_var = [0.1, 0.05, 0.5]
+    init_covariate_map = {
+        "k_21": {"foo": {"coef": "cov_foo_k12", "value": -0.1}},
+    }
+    # Create a structural model
+    structural_ode = StructuralOdeModel(
+        pk_two_compartments_model, protocol_design, initial_conditions
+    )
+    # Create a NLME moedl
+    nlme = NlmeModel(
+        structural_ode,
+        patients_df,
+        init_log_MI,
+        init_log_PDU,
+        init_res_var,
+        init_covariate_map,
+        error_model_type,
+    )
+    # Create an optimizer: here we use SAEM
+    optimizer = PySaem(
+        nlme,
+        obs_df,
+        nb_phase1_iterations=1,
+        nb_phase2_iterations=0,
+        optim_max_fun=saem_mi_maxfun,
+    )
+    optimizer.run()
+    optimizer.continue_iterating(nb_add_iters_ph1=0, nb_add_iters_ph2=1)
+    optimizer.plot_convergence_history()
+    plot_map_estimates(nlme)

vpop_calibration/utils.py ADDED Viewed

@@ -0,0 +1,9 @@
+import os
+import torch
+if "IS_PYTEST_RUNNING" in os.environ:
+    smoke_test = True
+else:
+    smoke_test = False
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

vpop_calibration/vpop.py ADDED Viewed

@@ -0,0 +1,50 @@
+import pandas as pd
+import uuid
+import numpy as np
+from scipy.stats.qmc import Sobol, scale
+def generate_vpop_from_ranges(
+    log_nb_individuals: int, param_ranges: dict[str, dict[str, float | bool]]
+) -> pd.DataFrame:
+    """Generate a vpop of patients from parameter ranges using Sobol sequences
+    Args:
+        log_nb_individuals (int): The vpop size will be 2^log_nb_individuals
+        param_ranges (dict[str, dict[str, float  |  bool]]): One entry for each parameter to be explored
+        - `param_name`: {`low`: float, `high`: float, `log`: bool}. Turn `log` to true to define log-scaled ranges
+    Returns:
+        pd.DataFrame: A set of patients with a generated `id`, and a column per descriptor
+    Note:
+        This method may be called with an empty dict, to return a list of patient ids.
+    """
+    nb_individuals = np.power(2, log_nb_individuals)
+    params_to_explore = list(param_ranges.keys())
+    nb_parameters = len(params_to_explore)
+    if nb_parameters != 0:
+        # Create a sobol sampler to generate parameter values
+        sobol_engine = Sobol(d=nb_parameters, scramble=True)
+        sobol_sequence = sobol_engine.random_base2(log_nb_individuals)
+        samples = scale(
+            sobol_sequence,
+            [param_ranges[param_name]["low"] for param_name in params_to_explore],
+            [param_ranges[param_name]["high"] for param_name in params_to_explore],
+        )
+        # Handle log-scaled parameters
+        for j, param_name in enumerate(params_to_explore):
+            if param_ranges[param_name]["log"] == True:
+                samples[:, j] = np.exp(samples[:, j])
+        # Create the full data frame of patient descriptors
+        patients_df = pd.DataFrame(data=samples, columns=params_to_explore)
+    else:
+        # No parameter requested, create empty data frame
+        patients_df = pd.DataFrame()
+    ids = [str(uuid.uuid4()) for _ in range(nb_individuals)]
+    patients_df.insert(0, "id", ids)
+    return patients_df