vivarium-public-health 4.3.2__py3-none-any.whl → 4.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,356 +0,0 @@
1
- """
2
- ###################
3
- # Exposure Effect #
4
- ###################
5
-
6
- This module contains tools for modeling the relationship between risk
7
- exposure models and disease models.
8
-
9
- """
10
- import warnings
11
- from abc import ABC, abstractmethod
12
- from collections.abc import Callable
13
- from importlib import import_module
14
- from typing import Any
15
-
16
- import numpy as np
17
- import pandas as pd
18
- import scipy
19
- from layered_config_tree import ConfigurationError
20
- from vivarium import Component
21
- from vivarium.framework.engine import Builder
22
- from vivarium.framework.values import Pipeline
23
-
24
- from vivarium_public_health.risks.data_transformations import (
25
- load_exposure_data,
26
- pivot_categorical,
27
- )
28
- from vivarium_public_health.utilities import EntityString, TargetString, get_lookup_columns
29
-
30
- from .exposure import Exposure
31
-
32
-
33
- class ExposureEffect(Component, ABC):
34
- """A component to model the effect of a risk-like factor on an affected target.
35
-
36
- This component can source data either from builder.data or from parameters
37
- supplied in the configuration.
38
-
39
- """
40
-
41
- def __init__(self, entity: str, target: str):
42
- """
43
-
44
- Parameters
45
- ----------
46
- entity
47
- Type and name of exposure, supplied in the form
48
- "entity_type.entity_name" where entity_type should be singular (e.g.,
49
- exposure instead of exposures).
50
- target
51
- Type, name, and target rate of entity to be affected by risk factor,
52
- supplied in the form "entity_type.entity_name.measure"
53
- where entity_type should be singular (e.g., cause instead of causes).
54
- """
55
- super().__init__()
56
- self.entity = EntityString(entity)
57
- self.target = TargetString(target)
58
-
59
- self._exposure_distribution_type = None
60
- self.target_pipeline_name = f"{self.target.name}.{self.target.measure}"
61
- self.target_paf_pipeline_name = f"{self.target_pipeline_name}.paf"
62
-
63
- ###############
64
- # Properties #
65
- ##############
66
-
67
- @property
68
- def name(self) -> str:
69
- return self.get_name(self.entity, self.target)
70
-
71
- @abstractmethod
72
- def get_name(self) -> Callable[[EntityString, TargetString], str]:
73
- """Abstract property that must be implemented by subclasses to provide a naming function."""
74
- raise NotImplementedError
75
-
76
- @property
77
- def configuration_defaults(self) -> dict[str, Any]:
78
- """Default values for any configurations managed by this component."""
79
- return {
80
- self.name: {
81
- "data_sources": {
82
- "relative_risk": f"{self.entity}.relative_risk",
83
- "population_attributable_fraction": f"{self.entity}.population_attributable_fraction",
84
- },
85
- "data_source_parameters": {
86
- "relative_risk": {},
87
- },
88
- }
89
- }
90
-
91
- @property
92
- def is_exposure_categorical(self) -> bool:
93
- return self._exposure_distribution_type in [
94
- "dichotomous",
95
- "ordered_polytomous",
96
- "unordered_polytomous",
97
- ]
98
-
99
- #####################
100
- # Lifecycle methods #
101
- #####################
102
-
103
- # noinspection PyAttributeOutsideInit
104
- def setup(self, builder: Builder) -> None:
105
- self.measure = self.get_exposure_callable(builder)
106
-
107
- self._relative_risk_source = self.get_relative_risk_source(builder)
108
- self.relative_risk = self.get_relative_risk_pipeline(builder)
109
-
110
- self.register_target_modifier(builder)
111
- self.register_paf_modifier(builder)
112
-
113
- #################
114
- # Setup methods #
115
- #################
116
-
117
- def setup_component(self, builder: Builder) -> None:
118
- self.exposure_component = self._get_exposure_class(builder)
119
- self.exposure_pipeline_name = (
120
- f"{self.entity.name}.{self.exposure_component.exposure_type}"
121
- )
122
- super().setup_component(builder)
123
-
124
- def build_all_lookup_tables(self, builder: Builder) -> None:
125
- self._exposure_distribution_type = self.get_distribution_type(builder)
126
-
127
- rr_data = self.load_relative_risk(builder)
128
- rr_value_cols = None
129
- if self.is_exposure_categorical:
130
- rr_data, rr_value_cols = self.process_categorical_data(builder, rr_data)
131
- self.lookup_tables["relative_risk"] = self.build_lookup_table(
132
- builder, rr_data, rr_value_cols
133
- )
134
-
135
- paf_data = self.get_filtered_data(
136
- builder, self.configuration.data_sources.population_attributable_fraction
137
- )
138
- self.lookup_tables["population_attributable_fraction"] = self.build_lookup_table(
139
- builder, paf_data
140
- )
141
-
142
- def get_distribution_type(self, builder: Builder) -> str:
143
- """Get the distribution type for the risk from the configuration."""
144
- if self.exposure_component.distribution_type:
145
- return self.exposure_component.distribution_type
146
- return self.exposure_component.get_distribution_type(builder)
147
-
148
- def load_relative_risk(
149
- self,
150
- builder: Builder,
151
- configuration=None,
152
- ) -> str | float | pd.DataFrame:
153
- if configuration is None:
154
- configuration = self.configuration
155
-
156
- rr_source = configuration.data_sources.relative_risk
157
- rr_dist_parameters = configuration.data_source_parameters.relative_risk.to_dict()
158
-
159
- if isinstance(rr_source, str):
160
- try:
161
- distribution = getattr(import_module("scipy.stats"), rr_source)
162
- rng = np.random.default_rng(builder.randomness.get_seed(self.name))
163
- rr_data = distribution(**rr_dist_parameters).ppf(rng.random())
164
- except AttributeError:
165
- rr_data = self.get_filtered_data(builder, rr_source)
166
- except TypeError:
167
- raise ConfigurationError(
168
- f"Parameters {rr_dist_parameters} are not valid for distribution {rr_source}."
169
- )
170
- else:
171
- rr_data = self.get_filtered_data(builder, rr_source)
172
- return rr_data
173
-
174
- def get_filtered_data(
175
- self, builder: "Builder", data_source: str | float | pd.DataFrame
176
- ) -> float | pd.DataFrame:
177
- data = super().get_data(builder, data_source)
178
-
179
- if isinstance(data, pd.DataFrame):
180
- # filter data to only include the target entity and measure
181
- correct_target_mask = True
182
- columns_to_drop = []
183
- if "affected_entity" in data.columns:
184
- correct_target_mask &= data["affected_entity"] == self.target.name
185
- columns_to_drop.append("affected_entity")
186
- if "affected_measure" in data.columns:
187
- correct_target_mask &= data["affected_measure"] == self.target.measure
188
- columns_to_drop.append("affected_measure")
189
- data = data[correct_target_mask].drop(columns=columns_to_drop)
190
- return data
191
-
192
- def process_categorical_data(
193
- self, builder: Builder, rr_data: str | float | pd.DataFrame
194
- ) -> tuple[str | float | pd.DataFrame, list[str]]:
195
- if not isinstance(rr_data, pd.DataFrame):
196
- exposed = builder.data.load("population.demographic_dimensions")
197
- exposed[
198
- "parameter"
199
- ] = self.exposure_component.dichotomous_exposure_category_names.exposed
200
- exposed["value"] = rr_data
201
- unexposed = exposed.copy()
202
- unexposed[
203
- "parameter"
204
- ] = self.exposure_component.dichotomous_exposure_category_names.unexposed
205
- unexposed["value"] = 1
206
- rr_data = pd.concat([exposed, unexposed], ignore_index=True)
207
- if "parameter" in rr_data.index.names:
208
- rr_data = rr_data.reset_index("parameter")
209
-
210
- rr_value_cols = list(rr_data["parameter"].unique())
211
- rr_data = pivot_categorical(builder, self.entity, rr_data, "parameter")
212
- return rr_data, rr_value_cols
213
-
214
- # todo currently this isn't being called. we need to properly set rrs if
215
- # the exposure has been rebinned
216
- def rebin_relative_risk_data(
217
- self, builder, relative_risk_data: pd.DataFrame
218
- ) -> pd.DataFrame:
219
- """Rebin relative risk data.
220
-
221
- When the polytomous risk is rebinned, matching relative risk needs to be rebinned.
222
- After rebinning, rr for both exposed and unexposed categories should be the weighted sum of relative risk
223
- of the component categories where weights are relative proportions of exposure of those categories.
224
- For example, if cat1, cat2, cat3 are exposed categories and cat4 is unexposed with exposure [0.1,0.2,0.3,0.4],
225
- for the matching rr = [rr1, rr2, rr3, 1], rebinned rr for the rebinned cat1 should be:
226
- (0.1 *rr1 + 0.2 * rr2 + 0.3* rr3) / (0.1+0.2+0.3)
227
- """
228
- if not self.entity in builder.configuration.to_dict():
229
- return relative_risk_data
230
-
231
- rebin_exposed_categories = set(builder.configuration[self.entity]["rebinned_exposed"])
232
-
233
- if rebin_exposed_categories:
234
- # todo make sure this works
235
- exposure_data = load_exposure_data(builder, self.entity)
236
- relative_risk_data = self._rebin_relative_risk_data(
237
- relative_risk_data, exposure_data, rebin_exposed_categories
238
- )
239
-
240
- return relative_risk_data
241
-
242
- def _rebin_relative_risk_data(
243
- self,
244
- relative_risk_data: pd.DataFrame,
245
- exposure_data: pd.DataFrame,
246
- rebin_exposed_categories: set,
247
- ) -> pd.DataFrame:
248
- cols = list(exposure_data.columns.difference(["value"]))
249
-
250
- relative_risk_data = relative_risk_data.merge(exposure_data, on=cols)
251
- relative_risk_data["value_x"] = relative_risk_data.value_x.multiply(
252
- relative_risk_data.value_y
253
- )
254
- relative_risk_data.parameter = relative_risk_data["parameter"].map(
255
- lambda p: "cat1" if p in rebin_exposed_categories else "cat2"
256
- )
257
- relative_risk_data = relative_risk_data.groupby(cols).sum().reset_index()
258
- relative_risk_data["value"] = relative_risk_data.value_x.divide(
259
- relative_risk_data.value_y
260
- ).fillna(0)
261
- return relative_risk_data.drop(columns=["value_x", "value_y"])
262
-
263
- def get_exposure_callable(self, builder: Builder) -> Callable[[pd.Index], pd.Series]:
264
- return builder.value.get_value(self.exposure_pipeline_name)
265
-
266
- def adjust_target(self, index: pd.Index, target: pd.Series) -> pd.Series:
267
- relative_risk = self.relative_risk(index)
268
- return target * relative_risk
269
-
270
- def get_relative_risk_source(self, builder: Builder) -> Callable[[pd.Index], pd.Series]:
271
-
272
- if not self.is_exposure_categorical:
273
- tmred = builder.data.load(f"{self.entity}.tmred")
274
- tmrel = 0.5 * (tmred["min"] + tmred["max"])
275
- scale = builder.data.load(f"{self.entity}.relative_risk_scalar")
276
-
277
- def generate_relative_risk(index: pd.Index) -> pd.Series:
278
- rr = self.lookup_tables["relative_risk"](index)
279
- exposure = self.measure(index)
280
- relative_risk = np.maximum(rr.values ** ((exposure - tmrel) / scale), 1)
281
- return relative_risk
282
-
283
- else:
284
- index_columns = ["index", self.entity.name]
285
-
286
- def generate_relative_risk(index: pd.Index) -> pd.Series:
287
- rr = self.lookup_tables["relative_risk"](index)
288
- exposure = self.measure(index).reset_index()
289
- exposure.columns = index_columns
290
- exposure = exposure.set_index(index_columns)
291
-
292
- relative_risk = rr.stack().reset_index()
293
- relative_risk.columns = index_columns + ["value"]
294
- # Check if we need to remap cat1 and cat2 to exposed and unexposed categories
295
- if (
296
- "cat1" in relative_risk[self.entity.name].unique()
297
- and self._exposure_distribution_type == "dichotomous"
298
- ):
299
- warnings.warn(
300
- "Using 'cat1' and 'cat2' for dichotomous exposure is deprecated and will be removed in a future release. Use 'exposed' and 'unexposed' instead.",
301
- FutureWarning,
302
- stacklevel=2,
303
- )
304
- relative_risk[self.entity.name] = relative_risk[self.entity.name].replace(
305
- {
306
- "cat1": self.exposure_component.dichotomous_exposure_category_names.exposed,
307
- "cat2": self.exposure_component.dichotomous_exposure_category_names.unexposed,
308
- }
309
- )
310
- relative_risk = relative_risk.set_index(index_columns)
311
-
312
- effect = relative_risk.loc[exposure.index, "value"].droplevel(
313
- self.entity.name
314
- )
315
- return effect
316
-
317
- return generate_relative_risk
318
-
319
- def get_relative_risk_pipeline(self, builder: Builder) -> Pipeline:
320
- return builder.value.register_value_producer(
321
- f"{self.entity.name}_on_{self.target.name}.relative_risk",
322
- self._relative_risk_source,
323
- component=self,
324
- required_resources=[self.measure],
325
- )
326
-
327
- def register_target_modifier(self, builder: Builder) -> None:
328
- builder.value.register_value_modifier(
329
- self.target_pipeline_name,
330
- modifier=self.adjust_target,
331
- component=self,
332
- required_resources=[self.relative_risk],
333
- )
334
-
335
- def register_paf_modifier(self, builder: Builder) -> None:
336
- required_columns = get_lookup_columns(
337
- [self.lookup_tables["population_attributable_fraction"]]
338
- )
339
- builder.value.register_value_modifier(
340
- self.target_paf_pipeline_name,
341
- modifier=self.lookup_tables["population_attributable_fraction"],
342
- component=self,
343
- required_resources=required_columns,
344
- )
345
-
346
- ##################
347
- # Helper methods #
348
- ##################
349
-
350
- def _get_exposure_class(self, builder: Builder) -> Exposure:
351
- exposure_component = builder.components.get_component(self.entity)
352
- if not isinstance(exposure_component, Exposure):
353
- raise ValueError(
354
- f"Exposure model {self.name} requires an Exposure component named {self.entity}"
355
- )
356
- return exposure_component
@@ -1,254 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any, NamedTuple
3
-
4
- import pandas as pd
5
- from vivarium import Component
6
- from vivarium.framework.engine import Builder
7
- from vivarium.framework.event import Event
8
- from vivarium.framework.population import SimulantData
9
- from vivarium.framework.randomness import RandomnessStream
10
- from vivarium.framework.resource import Resource
11
- from vivarium.framework.values import Pipeline
12
-
13
- from vivarium_public_health.exposure.distributions import (
14
- ContinuousDistribution,
15
- DichotomousDistribution,
16
- EnsembleDistribution,
17
- ExposureDistribution,
18
- PolytomousDistribution,
19
- )
20
- from vivarium_public_health.risks.data_transformations import get_exposure_post_processor
21
- from vivarium_public_health.utilities import EntityString, get_lookup_columns
22
-
23
-
24
- class Exposure(Component, ABC):
25
- """A base class to store common functionality for for risk-like health factors.
26
-
27
- This class is used to define the determinant of models health factors such as
28
- risks and the exposure to these risks, or interventions and the available coverage
29
- for these interventions.
30
-
31
- """
32
-
33
- exposure_distributions = {
34
- "dichotomous": DichotomousDistribution,
35
- "ordered_polytomous": PolytomousDistribution,
36
- "unordered_polytomous": PolytomousDistribution,
37
- "normal": ContinuousDistribution,
38
- "lognormal": ContinuousDistribution,
39
- "ensemble": EnsembleDistribution,
40
- }
41
-
42
- ##############
43
- # Properties #
44
- ##############
45
-
46
- @property
47
- def name(self) -> str:
48
- return self.entity
49
-
50
- @property
51
- def configuration_defaults(self) -> dict[str, Any]:
52
- return {
53
- self.name: {
54
- "data_sources": {
55
- f"{self.exposure_type}": f"{self.entity}.{self.exposure_type}",
56
- "ensemble_distribution_weights": f"{self.entity}.exposure_distribution_weights",
57
- "exposure_standard_deviation": f"{self.entity}.exposure_standard_deviation",
58
- },
59
- "distribution_type": f"{self.entity}.distribution",
60
- # rebinned_exposed only used for DichotomousDistribution
61
- "rebinned_exposed": [],
62
- "category_thresholds": [],
63
- }
64
- }
65
-
66
- @property
67
- def columns_created(self) -> list[str]:
68
- columns_to_create = [self.propensity_column_name]
69
- if self.create_exposure_column:
70
- columns_to_create.append(self.exposure_column_name)
71
- return columns_to_create
72
-
73
- @property
74
- def initialization_requirements(self) -> list[str | Resource]:
75
- return [self.randomness]
76
-
77
- @property
78
- @abstractmethod
79
- def exposure_type(self) -> str:
80
- raise NotImplementedError
81
-
82
- @property
83
- @abstractmethod
84
- def dichotomous_exposure_category_names(self) -> NamedTuple:
85
- """The name of the exposure categories. E.g. "exposed" and "unexposed" or
86
- "covered" and "uncovered".
87
-
88
- """
89
- raise NotImplementedError
90
-
91
- #####################
92
- # Lifecycle methods #
93
- #####################
94
-
95
- def __init__(self, entity: str) -> None:
96
- """
97
-
98
- Parameters
99
- ----------
100
- entity
101
- the type and name of a entity, specified as "type.name". Type is singular.
102
- level_type
103
- The type of level for the health factor, e.g., "exposure" or "coverage".
104
- """
105
- super().__init__()
106
- self.entity = EntityString(entity)
107
- self.distribution_type = None
108
- self.randomness_stream_name = f"initial_{self.entity.name}_propensity"
109
- self.propensity_column_name = f"{self.entity.name}_propensity"
110
- self.propensity_pipeline_name = f"{self.entity.name}.propensity"
111
- self.exposure_pipeline_name = f"{self.entity.name}.{self.exposure_type}"
112
- self.exposure_column_name = f"{self.entity.name}_{self.exposure_type}"
113
-
114
- #################
115
- # Setup methods #
116
- #################
117
-
118
- def build_all_lookup_tables(self, builder: "Builder") -> None:
119
- # All lookup tables are built in the exposure distribution
120
- pass
121
-
122
- # noinspection PyAttributeOutsideInit
123
- def setup(self, builder: Builder) -> None:
124
- self.distribution_type = self.get_distribution_type(builder)
125
- self.exposure_distribution = self.get_exposure_distribution(builder)
126
-
127
- self.randomness = self.get_randomness_stream(builder)
128
- self.propensity = self.get_propensity_pipeline(builder)
129
- self.exposure = self.get_exposure_callable(builder)
130
- # This will be overwritten in the Risk class if there is a non-loglinear risk effect
131
- # on that risk instance
132
- self.create_exposure_column = False
133
-
134
- def get_distribution_type(self, builder: Builder) -> str:
135
- """Get the distribution type for the risk from the configuration.
136
-
137
- If the configured distribution type is not one of the supported types,
138
- it is assumed to be a data source and the data is retrieved using the
139
- get_data method.
140
-
141
- Parameters
142
- ----------
143
- builder
144
- The builder object.
145
-
146
- Returns
147
- -------
148
- The distribution type.
149
- """
150
- if self.configuration is None:
151
- self.configuration = self.get_configuration(builder)
152
-
153
- distribution_type = self.configuration["distribution_type"]
154
- if distribution_type not in self.exposure_distributions.keys():
155
- # todo deal with incorrect typing
156
- distribution_type = self.get_data(builder, distribution_type)
157
-
158
- if self.configuration["rebinned_exposed"]:
159
- if distribution_type != "dichotomous" or "polytomous" not in distribution_type:
160
- raise ValueError(
161
- f"Unsupported risk distribution type '{distribution_type}' "
162
- f"for {self.name}. Rebinned exposed categories are only "
163
- "supported for dichotomous and polytomous distributions."
164
- )
165
- distribution_type = "dichotomous"
166
- return distribution_type
167
-
168
- def get_exposure_distribution(self, builder: Builder) -> ExposureDistribution:
169
- """Creates and sets up the exposure distribution component for the Risk
170
- based on its distribution type.
171
-
172
- Parameters
173
- ----------
174
- builder
175
- The builder object.
176
-
177
- Returns
178
- -------
179
- The exposure distribution.
180
-
181
- Raises
182
- ------
183
- NotImplementedError
184
- If the distribution type is not supported.
185
- """
186
- try:
187
- exposure_distribution = self.exposure_distributions[self.distribution_type](
188
- self, self.distribution_type
189
- )
190
- except KeyError:
191
- raise NotImplementedError(
192
- f"Distribution type {self.distribution_type} is not supported."
193
- )
194
-
195
- exposure_distribution.setup_component(builder)
196
- return exposure_distribution
197
-
198
- def get_randomness_stream(self, builder: Builder) -> RandomnessStream:
199
- return builder.randomness.get_stream(self.randomness_stream_name, component=self)
200
-
201
- def get_propensity_pipeline(self, builder: Builder) -> Pipeline:
202
- return builder.value.register_value_producer(
203
- self.propensity_pipeline_name,
204
- source=lambda index: (
205
- self.population_view.subview([self.propensity_column_name])
206
- .get(index)
207
- .squeeze(axis=1)
208
- ),
209
- component=self,
210
- required_resources=[self.propensity_column_name],
211
- )
212
-
213
- def get_exposure_callable(self, builder: Builder) -> Pipeline:
214
- required_columns = get_lookup_columns(
215
- self.exposure_distribution.lookup_tables.values()
216
- )
217
- return builder.value.register_value_producer(
218
- self.exposure_pipeline_name,
219
- source=self.get_current_exposure,
220
- component=self,
221
- required_resources=required_columns
222
- + [
223
- self.propensity,
224
- self.exposure_distribution.exposure_parameters,
225
- ],
226
- preferred_post_processor=get_exposure_post_processor(builder, self.name),
227
- )
228
-
229
- ########################
230
- # Event-driven methods #
231
- ########################
232
-
233
- def on_initialize_simulants(self, pop_data: SimulantData) -> None:
234
- propensity = pd.Series(
235
- self.randomness.get_draw(pop_data.index), name=self.propensity_column_name
236
- )
237
- self.population_view.update(propensity)
238
- self.update_exposure_column(pop_data.index)
239
-
240
- def on_time_step_prepare(self, event: Event) -> None:
241
- self.update_exposure_column(event.index)
242
-
243
- def update_exposure_column(self, index: pd.Index) -> None:
244
- if self.create_exposure_column:
245
- exposure = pd.Series(self.exposure_type(index), name=self.exposure_column_name)
246
- self.population_view.update(exposure)
247
-
248
- ##################################
249
- # Pipeline sources and modifiers #
250
- ##################################
251
-
252
- def get_current_exposure(self, index: pd.Index) -> pd.Series:
253
- propensity = self.propensity(index)
254
- return pd.Series(self.exposure_distribution.ppf(propensity), index=index)