vivarium-public-health 4.3.1__py3-none-any.whl → 4.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,13 +46,7 @@ from vivarium_public_health.risks import (
46
46
  Risk,
47
47
  RiskEffect,
48
48
  )
49
- from vivarium_public_health.treatment import (
50
- AbsoluteShift,
51
- Intervention,
52
- InterventionEffect,
53
- LinearScaleUp,
54
- TherapeuticInertia,
55
- )
49
+ from vivarium_public_health.treatment import AbsoluteShift, LinearScaleUp, TherapeuticInertia
56
50
 
57
51
  __all__ = [
58
52
  __author__,
@@ -1 +1 @@
1
- __version__ = "4.3.1"
1
+ __version__ = "4.3.3"
@@ -584,8 +584,8 @@ def rescale_final_age_bin(builder, population_data):
584
584
 
585
585
  def validate_crude_birth_rate_data(builder, data_year_max):
586
586
  population_config = builder.configuration.population.to_dict()
587
- untracking_age = population_config.get("untracking_age")
588
- age_end = population_config.get("age_end")
587
+ untracking_age = population_config.get("untracking_age", None)
588
+ age_end = population_config.get("age_end", None)
589
589
  if untracking_age and age_end and age_end != untracking_age:
590
590
  raise ValueError(
591
591
  "If you specify an exit age, the initial population age end must be the same "
@@ -1,7 +1,6 @@
1
1
  from .columns import COLUMNS
2
2
  from .disability import DisabilityObserver
3
3
  from .disease import DiseaseObserver
4
- from .intervention import CategoricalInterventionObserver
5
4
  from .mortality import MortalityObserver
6
5
  from .observer import PublicHealthObserver
7
6
  from .risk import CategoricalRiskObserver
@@ -7,15 +7,30 @@ This module contains tools for modeling categorical and continuous risk
7
7
  exposure.
8
8
 
9
9
  """
10
- import warnings
11
- from typing import NamedTuple
12
10
 
11
+ from typing import Any
12
+
13
+ import pandas as pd
14
+ from vivarium import Component
13
15
  from vivarium.framework.engine import Builder
16
+ from vivarium.framework.event import Event
17
+ from vivarium.framework.population import SimulantData
18
+ from vivarium.framework.randomness import RandomnessStream
19
+ from vivarium.framework.resource import Resource
20
+ from vivarium.framework.values import Pipeline
14
21
 
15
- from vivarium_public_health.exposure import Exposure
22
+ from vivarium_public_health.risks.data_transformations import get_exposure_post_processor
23
+ from vivarium_public_health.risks.distributions import (
24
+ ContinuousDistribution,
25
+ DichotomousDistribution,
26
+ EnsembleDistribution,
27
+ PolytomousDistribution,
28
+ RiskExposureDistribution,
29
+ )
30
+ from vivarium_public_health.utilities import EntityString, get_lookup_columns
16
31
 
17
32
 
18
- class Risk(Exposure):
33
+ class Risk(Component):
19
34
  """A model for a risk factor defined by either a continuous or a categorical value.
20
35
 
21
36
  For example,
@@ -74,52 +89,217 @@ class Risk(Exposure):
74
89
 
75
90
  """
76
91
 
77
- @property
78
- def risk(self) -> str:
79
- warnings.warn(
80
- "The 'risk' attribute is deprecated. Use 'entity' instead.",
81
- DeprecationWarning,
82
- stacklevel=2,
83
- )
84
- return self.entity
85
-
86
- @risk.setter
87
- def risk(self, value: str) -> None:
88
- warnings.warn(
89
- "The 'risk' attribute is deprecated. Use 'entity' instead.",
90
- DeprecationWarning,
91
- stacklevel=2,
92
- )
93
- self.entity = value
92
+ exposure_distributions = {
93
+ "dichotomous": DichotomousDistribution,
94
+ "ordered_polytomous": PolytomousDistribution,
95
+ "unordered_polytomous": PolytomousDistribution,
96
+ "normal": ContinuousDistribution,
97
+ "lognormal": ContinuousDistribution,
98
+ "ensemble": EnsembleDistribution,
99
+ }
100
+
101
+ ##############
102
+ # Properties #
103
+ ##############
94
104
 
95
105
  @property
96
- def exposure_type(self) -> str:
97
- """The measure of the risk exposure."""
98
- return "exposure"
106
+ def name(self) -> str:
107
+ return self.risk
99
108
 
100
109
  @property
101
- def dichotomous_exposure_category_names(self) -> NamedTuple:
102
- """The name of the exposed category for this intervention."""
110
+ def configuration_defaults(self) -> dict[str, Any]:
111
+ return {
112
+ self.name: {
113
+ "data_sources": {
114
+ "exposure": f"{self.risk}.exposure",
115
+ "ensemble_distribution_weights": f"{self.risk}.exposure_distribution_weights",
116
+ "exposure_standard_deviation": f"{self.risk}.exposure_standard_deviation",
117
+ },
118
+ "distribution_type": f"{self.risk}.distribution",
119
+ # rebinned_exposed only used for DichotomousDistribution
120
+ "rebinned_exposed": [],
121
+ "category_thresholds": [],
122
+ }
123
+ }
103
124
 
104
- class __Categories(NamedTuple):
105
- exposed: str = "exposed"
106
- unexposed: str = "unexposed"
125
+ @property
126
+ def columns_created(self) -> list[str]:
127
+ columns_to_create = [self.propensity_column_name]
128
+ if self.create_exposure_column:
129
+ columns_to_create.append(self.exposure_column_name)
130
+ return columns_to_create
107
131
 
108
- categories = __Categories()
109
- return categories
132
+ @property
133
+ def initialization_requirements(self) -> list[str | Resource]:
134
+ return [self.randomness]
110
135
 
111
136
  #####################
112
137
  # Lifecycle methods #
113
138
  #####################
114
139
 
140
+ def __init__(self, risk: str):
141
+ """
142
+
143
+ Parameters
144
+ ----------
145
+ risk
146
+ the type and name of a risk, specified as "type.name". Type is singular.
147
+ """
148
+ super().__init__()
149
+ self.risk = EntityString(risk)
150
+ self.distribution_type = None
151
+
152
+ self.randomness_stream_name = f"initial_{self.risk.name}_propensity"
153
+ self.propensity_column_name = f"{self.risk.name}_propensity"
154
+ self.propensity_pipeline_name = f"{self.risk.name}.propensity"
155
+ self.exposure_pipeline_name = f"{self.risk.name}.exposure"
156
+ self.exposure_column_name = f"{self.risk.name}_exposure"
157
+
158
+ #################
159
+ # Setup methods #
160
+ #################
161
+
162
+ def build_all_lookup_tables(self, builder: "Builder") -> None:
163
+ # All lookup tables are built in the exposure distribution
164
+ pass
165
+
166
+ # noinspection PyAttributeOutsideInit
115
167
  def setup(self, builder: Builder) -> None:
116
- super().setup(builder)
117
- # We want to set this to True if there is a non-loglinear risk effect
168
+ self.distribution_type = self.get_distribution_type(builder)
169
+ self.exposure_distribution = self.get_exposure_distribution(builder)
170
+
171
+ self.randomness = self.get_randomness_stream(builder)
172
+ self.propensity = self.get_propensity_pipeline(builder)
173
+ self.exposure = self.get_exposure_pipeline(builder)
174
+
175
+ # We want to set this to True iff there is a non-loglinear risk effect
118
176
  # on this risk instance
119
177
  self.create_exposure_column = bool(
120
178
  [
121
179
  component
122
180
  for component in builder.components.list_components()
123
- if component.startswith(f"non_log_linear_risk_effect.{self.entity.name}_on_")
181
+ if component.startswith(f"non_log_linear_risk_effect.{self.risk.name}_on_")
124
182
  ]
125
183
  )
184
+
185
+ def get_distribution_type(self, builder: Builder) -> str:
186
+ """Get the distribution type for the risk from the configuration.
187
+
188
+ If the configured distribution type is not one of the supported types,
189
+ it is assumed to be a data source and the data is retrieved using the
190
+ get_data method.
191
+
192
+ Parameters
193
+ ----------
194
+ builder
195
+ The builder object.
196
+
197
+ Returns
198
+ -------
199
+ The distribution type.
200
+ """
201
+ if self.configuration is None:
202
+ self.configuration = self.get_configuration(builder)
203
+
204
+ distribution_type = self.configuration["distribution_type"]
205
+ if distribution_type not in self.exposure_distributions.keys():
206
+ # todo deal with incorrect typing
207
+ distribution_type = self.get_data(builder, distribution_type)
208
+
209
+ if self.configuration["rebinned_exposed"]:
210
+ if distribution_type != "dichotomous" or "polytomous" not in distribution_type:
211
+ raise ValueError(
212
+ f"Unsupported risk distribution type '{distribution_type}' "
213
+ f"for {self.name}. Rebinned exposed categories are only "
214
+ "supported for dichotomous and polytomous distributions."
215
+ )
216
+ distribution_type = "dichotomous"
217
+ return distribution_type
218
+
219
+ def get_exposure_distribution(self, builder: Builder) -> RiskExposureDistribution:
220
+ """Creates and sets up the exposure distribution component for the Risk
221
+ based on its distribution type.
222
+
223
+ Parameters
224
+ ----------
225
+ builder
226
+ The builder object.
227
+
228
+ Returns
229
+ -------
230
+ The exposure distribution.
231
+
232
+ Raises
233
+ ------
234
+ NotImplementedError
235
+ If the distribution type is not supported.
236
+ """
237
+ try:
238
+ exposure_distribution = self.exposure_distributions[self.distribution_type](
239
+ self.risk, self.distribution_type
240
+ )
241
+ except KeyError:
242
+ raise NotImplementedError(
243
+ f"Distribution type {self.distribution_type} is not supported."
244
+ )
245
+
246
+ exposure_distribution.setup_component(builder)
247
+ return exposure_distribution
248
+
249
+ def get_randomness_stream(self, builder: Builder) -> RandomnessStream:
250
+ return builder.randomness.get_stream(self.randomness_stream_name, component=self)
251
+
252
+ def get_propensity_pipeline(self, builder: Builder) -> Pipeline:
253
+ return builder.value.register_value_producer(
254
+ self.propensity_pipeline_name,
255
+ source=lambda index: (
256
+ self.population_view.subview([self.propensity_column_name])
257
+ .get(index)
258
+ .squeeze(axis=1)
259
+ ),
260
+ component=self,
261
+ required_resources=[self.propensity_column_name],
262
+ )
263
+
264
+ def get_exposure_pipeline(self, builder: Builder) -> Pipeline:
265
+ required_columns = get_lookup_columns(
266
+ self.exposure_distribution.lookup_tables.values()
267
+ )
268
+ return builder.value.register_value_producer(
269
+ self.exposure_pipeline_name,
270
+ source=self.get_current_exposure,
271
+ component=self,
272
+ required_resources=required_columns
273
+ + [
274
+ self.propensity,
275
+ self.exposure_distribution.exposure_parameters,
276
+ ],
277
+ preferred_post_processor=get_exposure_post_processor(builder, self.name),
278
+ )
279
+
280
+ ########################
281
+ # Event-driven methods #
282
+ ########################
283
+
284
+ def on_initialize_simulants(self, pop_data: SimulantData) -> None:
285
+ propensity = pd.Series(
286
+ self.randomness.get_draw(pop_data.index), name=self.propensity_column_name
287
+ )
288
+ self.population_view.update(propensity)
289
+ self.update_exposure_column(pop_data.index)
290
+
291
+ def on_time_step_prepare(self, event: Event) -> None:
292
+ self.update_exposure_column(event.index)
293
+
294
+ def update_exposure_column(self, index: pd.Index) -> None:
295
+ if self.create_exposure_column:
296
+ exposure = pd.Series(self.exposure(index), name=self.exposure_column_name)
297
+ self.population_view.update(exposure)
298
+
299
+ ##################################
300
+ # Pipeline sources and modifiers #
301
+ ##################################
302
+
303
+ def get_current_exposure(self, index: pd.Index) -> pd.Series:
304
+ propensity = self.propensity(index)
305
+ return pd.Series(self.exposure_distribution.ppf(propensity), index=index)
@@ -1,32 +1,254 @@
1
1
  """
2
- ==============================
3
- Data Transformations (Legacy)
4
- ==============================
2
+ =========================
3
+ Risk Data Transformations
4
+ =========================
5
5
 
6
- .. deprecated:: 4.3.0
7
- This module is deprecated. Use :mod:`vivarium_public_health.exposure.data_transformations` instead.
6
+ This module contains tools for handling raw risk exposure and relative
7
+ risk data and performing any necessary data transformations.
8
8
 
9
- Backward compatibility module for risk data_transformations.
9
+ """
10
10
 
11
- This module provides backward compatibility for imports that expect risk
12
- distribution classes to be in vivarium_public_health.risks.data_transformations.
11
+ import numpy as np
12
+ import pandas as pd
13
+ from vivarium.framework.engine import Builder
13
14
 
14
- The actual distribution classes have been moved to:
15
- vivarium_public_health.exposure.data_transformations
15
+ from vivarium_public_health.utilities import EntityString, TargetString
16
16
 
17
- This module will be deprecated in a future version.
18
- """
17
+ #############
18
+ # Utilities #
19
+ #############
20
+
21
+
22
+ def pivot_categorical(
23
+ builder: Builder,
24
+ risk: EntityString,
25
+ data: pd.DataFrame,
26
+ pivot_column: str = "parameter",
27
+ reset_index: bool = True,
28
+ ) -> pd.DataFrame:
29
+ """Pivots data that is long on categories to be wide."""
30
+ # todo remove dependency on artifact manager having exactly one value column
31
+ value_column = builder.data.value_columns()(f"{risk}.exposure")[0]
32
+ index_cols = [
33
+ column for column in data.columns if column not in [value_column, pivot_column]
34
+ ]
35
+ data = data.pivot_table(index=index_cols, columns=pivot_column, values=value_column)
36
+ if reset_index:
37
+ data = data.reset_index()
38
+ data.columns.name = None
39
+
40
+ return data
41
+
42
+
43
+ ##########################
44
+ # Exposure data handlers #
45
+ ##########################
46
+
47
+
48
+ def get_exposure_post_processor(builder, risk: str):
49
+ thresholds = builder.configuration[risk]["category_thresholds"]
50
+
51
+ if thresholds:
52
+ thresholds = [-np.inf] + thresholds + [np.inf]
53
+ categories = [f"cat{i}" for i in range(1, len(thresholds))]
54
+
55
+ def post_processor(exposure, _):
56
+ return pd.Series(
57
+ pd.cut(exposure, thresholds, labels=categories), index=exposure.index
58
+ ).astype(str)
59
+
60
+ else:
61
+ post_processor = None
62
+
63
+ return post_processor
64
+
65
+
66
+ def load_exposure_data(builder: Builder, risk: EntityString) -> pd.DataFrame:
67
+ risk_component = builder.components.get_component(risk)
68
+ return risk_component.get_data(
69
+ builder, builder.configuration[risk_component.name]["data_sources"]["exposure"]
70
+ )
71
+
72
+
73
+ ###############################
74
+ # Relative risk data handlers #
75
+ ###############################
76
+
77
+
78
+ def rebin_relative_risk_data(
79
+ builder, risk: EntityString, relative_risk_data: pd.DataFrame
80
+ ) -> pd.DataFrame:
81
+ """Rebin relative risk data if necessary.
82
+
83
+ When the polytomous risk is rebinned, matching relative risk needs to be rebinned.
84
+ After rebinning, rr for both exposed and unexposed categories should be the weighted sum of relative risk
85
+ of the component categories where weights are relative proportions of exposure of those categories.
86
+ For example, if cat1, cat2, cat3 are exposed categories and cat4 is unexposed with exposure [0.1,0.2,0.3,0.4],
87
+ for the matching rr = [rr1, rr2, rr3, 1], rebinned rr for the rebinned cat1 should be:
88
+ (0.1 *rr1 + 0.2 * rr2 + 0.3* rr3) / (0.1+0.2+0.3)
89
+ """
90
+ if not risk in builder.configuration.to_dict():
91
+ return relative_risk_data
92
+
93
+ rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
94
+
95
+ if rebin_exposed_categories:
96
+ # todo make sure this works
97
+ exposure_data = load_exposure_data(builder, risk)
98
+ relative_risk_data = _rebin_relative_risk_data(
99
+ relative_risk_data, exposure_data, rebin_exposed_categories
100
+ )
101
+
102
+ return relative_risk_data
103
+
104
+
105
+ def _rebin_relative_risk_data(
106
+ relative_risk_data: pd.DataFrame,
107
+ exposure_data: pd.DataFrame,
108
+ rebin_exposed_categories: set,
109
+ ) -> pd.DataFrame:
110
+ cols = list(exposure_data.columns.difference(["value"]))
111
+
112
+ relative_risk_data = relative_risk_data.merge(exposure_data, on=cols)
113
+ relative_risk_data["value_x"] = relative_risk_data.value_x.multiply(
114
+ relative_risk_data.value_y
115
+ )
116
+ relative_risk_data.parameter = relative_risk_data["parameter"].map(
117
+ lambda p: "cat1" if p in rebin_exposed_categories else "cat2"
118
+ )
119
+ relative_risk_data = relative_risk_data.groupby(cols).sum().reset_index()
120
+ relative_risk_data["value"] = relative_risk_data.value_x.divide(
121
+ relative_risk_data.value_y
122
+ ).fillna(0)
123
+ return relative_risk_data.drop(columns=["value_x", "value_y"])
124
+
125
+
126
+ ##############
127
+ # Validators #
128
+ ##############
129
+
130
+
131
+ def validate_distribution_data_source(builder: Builder, risk: EntityString) -> None:
132
+ """Checks that the exposure distribution specification is valid."""
133
+ exposure_type = builder.configuration[risk]["data_sources"]["exposure"]
134
+ rebin = builder.configuration[risk]["rebinned_exposed"]
135
+ category_thresholds = builder.configuration[risk]["category_thresholds"]
136
+
137
+ if risk.type == "alternative_risk_factor":
138
+ if exposure_type != "data" or rebin:
139
+ raise ValueError(
140
+ "Parameterized risk components are not available for alternative risks."
141
+ )
142
+
143
+ if not category_thresholds:
144
+ raise ValueError("Must specify category thresholds to use alternative risks.")
145
+
146
+ elif risk.type not in ["risk_factor", "coverage_gap"]:
147
+ raise ValueError(f"Unknown risk type {risk.type} for risk {risk.name}")
148
+
149
+
150
+ def validate_relative_risk_data_source(builder, risk: EntityString, target: TargetString):
151
+ from vivarium_public_health.risks import RiskEffect
152
+
153
+ source_key = RiskEffect.get_name(risk, target)
154
+ source_config = builder.configuration[source_key]
155
+
156
+ provided_keys = set(
157
+ k
158
+ for k, v in source_config["distribution_args"].to_dict().items()
159
+ if isinstance(v, (int, float))
160
+ )
161
+
162
+ source_map = {
163
+ "data": set(),
164
+ "relative risk value": {"relative_risk"},
165
+ "normal distribution": {"mean", "se"},
166
+ "log distribution": {"log_mean", "log_se", "tau_squared"},
167
+ }
168
+
169
+ if provided_keys not in source_map.values():
170
+ raise ValueError(
171
+ f"The acceptable parameter options for specifying relative risk are: "
172
+ f"{source_map.values()}. You provided {provided_keys} for {source_key}."
173
+ )
174
+
175
+ source_type = [k for k, v in source_map.items() if provided_keys == v][0]
176
+
177
+ if source_type == "relative risk value":
178
+ if not 1 <= source_type <= 100:
179
+ raise ValueError(
180
+ "If specifying a single value for relative risk, it should be in the range [1, 100]. "
181
+ f"You provided {source_type} for {source_key}."
182
+ )
183
+ elif source_type == "normal distribution":
184
+ if source_config["mean"] <= 0 or source_config["se"] <= 0:
185
+ raise ValueError(
186
+ f"To specify parameters for a normal distribution for a risk effect, you must provide"
187
+ f"both mean and se above 0. This is not the case for {source_key}."
188
+ )
189
+ elif source_type == "log distribution":
190
+ if source_config["log_mean"] <= 0 or source_config["log_se"] <= 0:
191
+ raise ValueError(
192
+ f"To specify parameters for a log distribution for a risk effect, you must provide"
193
+ f"both log_mean and log_se above 0. This is not the case for {source_key}."
194
+ )
195
+ if source_config["tau_squared"] < 0:
196
+ raise ValueError(
197
+ f"To specify parameters for a log distribution for a risk effect, you must provide"
198
+ f"tau_squared >= 0. This is not the case for {source_key}."
199
+ )
200
+ else:
201
+ pass
202
+
203
+ return source_type
204
+
205
+
206
+ def validate_relative_risk_rebin_source(
207
+ builder, risk: EntityString, target: TargetString, data: pd.DataFrame
208
+ ):
209
+ if data.index.size == 0:
210
+ raise ValueError(
211
+ f"Subsetting {risk} relative risk data to {target.name} {target.measure} "
212
+ "returned an empty DataFrame. Check your artifact."
213
+ )
214
+ if risk in builder.configuration.to_dict():
215
+ validate_rebin_source(builder, risk, data)
216
+
217
+
218
+ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame) -> None:
219
+
220
+ if not isinstance(data, pd.DataFrame):
221
+ return
222
+
223
+ rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
224
+
225
+ if rebin_exposed_categories and builder.configuration[risk]["category_thresholds"]:
226
+ raise ValueError(
227
+ f"Rebinning and category thresholds are mutually exclusive. "
228
+ f"You provided both for {risk.name}."
229
+ )
19
230
 
20
- import warnings
231
+ if rebin_exposed_categories and "polytomous" not in builder.data.load(
232
+ f"{risk}.distribution"
233
+ ):
234
+ raise ValueError(
235
+ f"Rebinning is only supported for polytomous risks. You provided "
236
+ f"rebinning exposed categoriesfor {risk.name}, which is of "
237
+ f"type {builder.data.load(f'{risk}.distribution')}."
238
+ )
21
239
 
22
- # Issue a deprecation warning when this module is imported
23
- warnings.warn(
24
- "Importing from 'vivarium_public_health.risks.data_transformations' is deprecated. "
25
- "Please import from 'vivarium_public_health.exposure.data_transformations' instead.",
26
- DeprecationWarning,
27
- stacklevel=2,
28
- )
240
+ invalid_cats = rebin_exposed_categories.difference(set(data.parameter))
241
+ if invalid_cats:
242
+ raise ValueError(
243
+ f"The following provided categories for the rebinned exposed "
244
+ f"category of {risk.name} are not found in the exposure data: "
245
+ f"{invalid_cats}."
246
+ )
29
247
 
30
- # Import all the classes from the new location
31
- from vivarium_public_health.exposure.data_transformations import *
32
- from vivarium_public_health.exposure.data_transformations import _rebin_relative_risk_data
248
+ if rebin_exposed_categories == set(data.parameter):
249
+ raise ValueError(
250
+ f"The provided categories for the rebinned exposed category of "
251
+ f"{risk.name} comprise all categories for the exposure data. "
252
+ f"At least one category must be left out of the provided categories "
253
+ f"to be rebinned into the unexposed category."
254
+ )