vivarium-public-health 2.3.3__py3-none-any.whl → 3.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vivarium_public_health/_version.py +1 -1
- vivarium_public_health/disease/model.py +23 -21
- vivarium_public_health/disease/models.py +1 -0
- vivarium_public_health/disease/special_disease.py +40 -41
- vivarium_public_health/disease/state.py +42 -125
- vivarium_public_health/disease/transition.py +70 -27
- vivarium_public_health/mslt/delay.py +1 -0
- vivarium_public_health/mslt/disease.py +1 -0
- vivarium_public_health/mslt/intervention.py +1 -0
- vivarium_public_health/mslt/magic_wand_components.py +1 -0
- vivarium_public_health/mslt/observer.py +1 -0
- vivarium_public_health/mslt/population.py +1 -0
- vivarium_public_health/plugins/parser.py +61 -31
- vivarium_public_health/population/add_new_birth_cohorts.py +2 -3
- vivarium_public_health/population/base_population.py +2 -1
- vivarium_public_health/population/mortality.py +83 -80
- vivarium_public_health/{metrics → results}/__init__.py +2 -0
- vivarium_public_health/results/columns.py +22 -0
- vivarium_public_health/results/disability.py +187 -0
- vivarium_public_health/results/disease.py +222 -0
- vivarium_public_health/results/mortality.py +186 -0
- vivarium_public_health/results/observer.py +78 -0
- vivarium_public_health/results/risk.py +138 -0
- vivarium_public_health/results/simple_cause.py +18 -0
- vivarium_public_health/{metrics → results}/stratification.py +10 -8
- vivarium_public_health/risks/__init__.py +1 -2
- vivarium_public_health/risks/base_risk.py +134 -29
- vivarium_public_health/risks/data_transformations.py +65 -326
- vivarium_public_health/risks/distributions.py +315 -145
- vivarium_public_health/risks/effect.py +376 -75
- vivarium_public_health/risks/implementations/low_birth_weight_and_short_gestation.py +61 -89
- vivarium_public_health/treatment/magic_wand.py +1 -0
- vivarium_public_health/treatment/scale_up.py +1 -0
- vivarium_public_health/treatment/therapeutic_inertia.py +1 -0
- vivarium_public_health/utilities.py +17 -2
- {vivarium_public_health-2.3.3.dist-info → vivarium_public_health-3.0.1.dist-info}/METADATA +12 -2
- vivarium_public_health-3.0.1.dist-info/RECORD +49 -0
- {vivarium_public_health-2.3.3.dist-info → vivarium_public_health-3.0.1.dist-info}/WHEEL +1 -1
- vivarium_public_health/metrics/disability.py +0 -118
- vivarium_public_health/metrics/disease.py +0 -136
- vivarium_public_health/metrics/mortality.py +0 -144
- vivarium_public_health/metrics/risk.py +0 -110
- vivarium_public_health/testing/__init__.py +0 -0
- vivarium_public_health/testing/mock_artifact.py +0 -145
- vivarium_public_health/testing/utils.py +0 -71
- vivarium_public_health-2.3.3.dist-info/RECORD +0 -49
- {vivarium_public_health-2.3.3.dist-info → vivarium_public_health-3.0.1.dist-info}/LICENSE.txt +0 -0
- {vivarium_public_health-2.3.3.dist-info → vivarium_public_health-3.0.1.dist-info}/top_level.txt +0 -0
@@ -7,52 +7,86 @@ This module contains tools for modeling several different risk
|
|
7
7
|
exposure distributions.
|
8
8
|
|
9
9
|
"""
|
10
|
-
|
10
|
+
|
11
|
+
from abc import ABC, abstractmethod
|
12
|
+
from typing import Callable, Dict, List, Optional, Union
|
11
13
|
|
12
14
|
import numpy as np
|
13
15
|
import pandas as pd
|
14
|
-
|
16
|
+
import risk_distributions as rd
|
17
|
+
from layered_config_tree import LayeredConfigTree
|
15
18
|
from vivarium import Component
|
16
19
|
from vivarium.framework.engine import Builder
|
17
20
|
from vivarium.framework.population import SimulantData
|
18
21
|
from vivarium.framework.values import Pipeline, list_combiner, union_post_processor
|
19
22
|
|
20
|
-
from vivarium_public_health.risks.data_transformations import
|
21
|
-
from vivarium_public_health.utilities import EntityString
|
23
|
+
from vivarium_public_health.risks.data_transformations import pivot_categorical
|
24
|
+
from vivarium_public_health.utilities import EntityString, get_lookup_columns
|
22
25
|
|
23
26
|
|
24
27
|
class MissingDataError(Exception):
|
25
28
|
pass
|
26
29
|
|
27
30
|
|
28
|
-
|
29
|
-
# adaptor pattern, which is gross, but would require some more difficult
|
30
|
-
# refactoring which is thoroughly out of scope right now. -J.C. 8/25/19
|
31
|
-
class SimulationDistribution(Component):
|
32
|
-
"""Wrapper around a variety of distribution implementations."""
|
31
|
+
class RiskExposureDistribution(Component, ABC):
|
33
32
|
|
34
33
|
#####################
|
35
34
|
# Lifecycle methods #
|
36
35
|
#####################
|
37
36
|
|
38
|
-
def __init__(
|
37
|
+
def __init__(
|
38
|
+
self,
|
39
|
+
risk: EntityString,
|
40
|
+
distribution_type: str,
|
41
|
+
exposure_data: Optional[Union[int, float, pd.DataFrame]] = None,
|
42
|
+
) -> None:
|
39
43
|
super().__init__()
|
40
|
-
self.risk =
|
44
|
+
self.risk = risk
|
45
|
+
self.distribution_type = distribution_type
|
46
|
+
self._exposure_data = exposure_data
|
47
|
+
|
48
|
+
self.parameters_pipeline_name = f"{self.risk}.exposure_parameters"
|
41
49
|
|
50
|
+
#################
|
51
|
+
# Setup methods #
|
52
|
+
#################
|
53
|
+
|
54
|
+
def get_configuration(self, builder: "Builder") -> Optional[LayeredConfigTree]:
|
55
|
+
return builder.configuration[self.risk]
|
56
|
+
|
57
|
+
@abstractmethod
|
58
|
+
def build_all_lookup_tables(self, builder: "Builder") -> None:
|
59
|
+
raise NotImplementedError
|
60
|
+
|
61
|
+
def get_exposure_data(self, builder: Builder) -> Union[int, float, pd.DataFrame]:
|
62
|
+
if self._exposure_data is not None:
|
63
|
+
return self._exposure_data
|
64
|
+
return self.get_data(builder, self.configuration["data_sources"]["exposure"])
|
65
|
+
|
66
|
+
# noinspection PyAttributeOutsideInit
|
42
67
|
def setup(self, builder: Builder) -> None:
|
43
|
-
|
44
|
-
self.
|
45
|
-
|
68
|
+
self.exposure_parameters = self.get_exposure_parameter_pipeline(builder)
|
69
|
+
if self.exposure_parameters.name != self.parameters_pipeline_name:
|
70
|
+
raise ValueError(
|
71
|
+
"Expected exposure parameters pipeline to be named "
|
72
|
+
f"{self.parameters_pipeline_name}, "
|
73
|
+
f"but found {self.exposure_parameters.name}."
|
74
|
+
)
|
75
|
+
|
76
|
+
@abstractmethod
|
77
|
+
def get_exposure_parameter_pipeline(self, builder: Builder) -> Pipeline:
|
78
|
+
raise NotImplementedError
|
46
79
|
|
47
80
|
##################
|
48
81
|
# Public methods #
|
49
82
|
##################
|
50
83
|
|
51
|
-
|
52
|
-
|
84
|
+
@abstractmethod
|
85
|
+
def ppf(self, quantiles: pd.Series) -> pd.Series:
|
86
|
+
raise NotImplementedError
|
53
87
|
|
54
88
|
|
55
|
-
class
|
89
|
+
class EnsembleDistribution(RiskExposureDistribution):
|
56
90
|
##############
|
57
91
|
# Properties #
|
58
92
|
##############
|
@@ -73,38 +107,71 @@ class EnsembleSimulation(Component):
|
|
73
107
|
# Lifecycle methods #
|
74
108
|
#####################
|
75
109
|
|
76
|
-
def __init__(self, risk
|
77
|
-
super().__init__()
|
78
|
-
self.risk = EntityString(risk)
|
79
|
-
self._weights, self._parameters = self.get_parameters(weights, mean, sd)
|
110
|
+
def __init__(self, risk: EntityString, distribution_type: str = "ensemble") -> None:
|
111
|
+
super().__init__(risk, distribution_type)
|
80
112
|
self._propensity = f"ensemble_propensity_{self.risk}"
|
81
113
|
|
82
|
-
|
83
|
-
|
84
|
-
|
114
|
+
#################
|
115
|
+
# Setup methods #
|
116
|
+
#################
|
117
|
+
|
118
|
+
def build_all_lookup_tables(self, builder: Builder) -> None:
|
119
|
+
exposure_data = self.get_exposure_data(builder)
|
120
|
+
standard_deviation = self.get_data(
|
121
|
+
builder,
|
122
|
+
self.configuration["data_sources"]["exposure_standard_deviation"],
|
123
|
+
)
|
124
|
+
weights_source = self.configuration["data_sources"]["ensemble_distribution_weights"]
|
125
|
+
raw_weights = self.get_data(builder, weights_source)
|
126
|
+
|
127
|
+
glnorm_mask = raw_weights["parameter"] == "glnorm"
|
128
|
+
if np.any(raw_weights.loc[glnorm_mask, self.get_value_columns(weights_source)]):
|
129
|
+
raise NotImplementedError("glnorm distribution is not supported")
|
130
|
+
raw_weights = raw_weights[~glnorm_mask]
|
131
|
+
|
132
|
+
distributions = list(raw_weights["parameter"].unique())
|
133
|
+
|
134
|
+
raw_weights = pivot_categorical(
|
135
|
+
builder, self.risk, raw_weights, pivot_column="parameter", reset_index=False
|
85
136
|
)
|
137
|
+
|
138
|
+
weights, parameters = rd.EnsembleDistribution.get_parameters(
|
139
|
+
raw_weights,
|
140
|
+
mean=get_risk_distribution_parameter(self.get_value_columns, exposure_data),
|
141
|
+
sd=get_risk_distribution_parameter(self.get_value_columns, standard_deviation),
|
142
|
+
)
|
143
|
+
|
144
|
+
distribution_weights_table = self.build_lookup_table(
|
145
|
+
builder, weights.reset_index(), distributions
|
146
|
+
)
|
147
|
+
self.lookup_tables["ensemble_distribution_weights"] = distribution_weights_table
|
148
|
+
key_columns = distribution_weights_table.key_columns
|
149
|
+
parameter_columns = distribution_weights_table.parameter_columns
|
150
|
+
|
86
151
|
self.parameters = {
|
87
|
-
|
88
|
-
|
152
|
+
parameter: builder.lookup.build_table(
|
153
|
+
data.reset_index(),
|
154
|
+
key_columns=key_columns,
|
155
|
+
parameter_columns=parameter_columns,
|
89
156
|
)
|
90
|
-
for
|
157
|
+
for parameter, data in parameters.items()
|
91
158
|
}
|
92
159
|
|
160
|
+
def setup(self, builder: Builder) -> None:
|
161
|
+
super().setup(builder)
|
93
162
|
self.randomness = builder.randomness.get_stream(self._propensity)
|
94
163
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
name: p.reset_index() for name, p in parameters.items()
|
107
|
-
}
|
164
|
+
def get_exposure_parameter_pipeline(self, builder: Builder) -> Pipeline:
|
165
|
+
# This pipeline is not needed for ensemble distributions, so just
|
166
|
+
# register a dummy pipeline
|
167
|
+
def raise_not_implemented():
|
168
|
+
raise NotImplementedError(
|
169
|
+
"EnsembleDistribution does not use exposure parameters."
|
170
|
+
)
|
171
|
+
|
172
|
+
return builder.value.register_value_producer(
|
173
|
+
self.parameters_pipeline_name, lambda *_: raise_not_implemented()
|
174
|
+
)
|
108
175
|
|
109
176
|
########################
|
110
177
|
# Event-driven methods #
|
@@ -120,149 +187,256 @@ class EnsembleSimulation(Component):
|
|
120
187
|
# Public methods #
|
121
188
|
##################
|
122
189
|
|
123
|
-
def ppf(self,
|
124
|
-
if not
|
125
|
-
|
126
|
-
weights = self.
|
190
|
+
def ppf(self, quantiles: pd.Series) -> pd.Series:
|
191
|
+
if not quantiles.empty:
|
192
|
+
quantiles = clip(quantiles)
|
193
|
+
weights = self.lookup_tables["ensemble_distribution_weights"](quantiles.index)
|
127
194
|
parameters = {
|
128
|
-
name:
|
195
|
+
name: param(quantiles.index) for name, param in self.parameters.items()
|
129
196
|
}
|
130
|
-
ensemble_propensity = self.population_view.get(
|
131
|
-
x = EnsembleDistribution(weights, parameters).ppf(
|
197
|
+
ensemble_propensity = self.population_view.get(quantiles.index).iloc[:, 0]
|
198
|
+
x = rd.EnsembleDistribution(weights, parameters).ppf(
|
199
|
+
quantiles, ensemble_propensity
|
200
|
+
)
|
132
201
|
x[x.isnull()] = 0
|
133
202
|
else:
|
134
203
|
x = pd.Series([])
|
135
204
|
return x
|
136
205
|
|
137
206
|
|
138
|
-
class ContinuousDistribution(
|
207
|
+
class ContinuousDistribution(RiskExposureDistribution):
|
139
208
|
#####################
|
140
209
|
# Lifecycle methods #
|
141
210
|
#####################
|
142
211
|
|
143
|
-
def __init__(self, risk
|
144
|
-
super().__init__()
|
145
|
-
self.
|
146
|
-
|
147
|
-
|
212
|
+
def __init__(self, risk: EntityString, distribution_type: str) -> None:
|
213
|
+
super().__init__(risk, distribution_type)
|
214
|
+
self.standard_deviation = None
|
215
|
+
try:
|
216
|
+
self._distribution = {
|
217
|
+
"normal": rd.Normal,
|
218
|
+
"lognormal": rd.LogNormal,
|
219
|
+
}[distribution_type]
|
220
|
+
except KeyError:
|
221
|
+
raise NotImplementedError(
|
222
|
+
f"Distribution type {distribution_type} is not supported for "
|
223
|
+
f"risk {risk.name}."
|
224
|
+
)
|
148
225
|
|
149
|
-
|
150
|
-
|
151
|
-
|
226
|
+
#################
|
227
|
+
# Setup methods #
|
228
|
+
#################
|
229
|
+
|
230
|
+
def build_all_lookup_tables(self, builder: "Builder") -> None:
|
231
|
+
exposure_data = self.get_exposure_data(builder)
|
232
|
+
standard_deviation = self.get_data(
|
233
|
+
builder, self.configuration["data_sources"]["exposure_standard_deviation"]
|
234
|
+
)
|
235
|
+
parameters = self._distribution.get_parameters(
|
236
|
+
mean=get_risk_distribution_parameter(self.get_value_columns, exposure_data),
|
237
|
+
sd=get_risk_distribution_parameter(self.get_value_columns, standard_deviation),
|
152
238
|
)
|
153
239
|
|
154
|
-
|
155
|
-
|
156
|
-
|
240
|
+
self.lookup_tables["parameters"] = self.build_lookup_table(
|
241
|
+
builder, parameters.reset_index(), list(parameters.columns)
|
242
|
+
)
|
157
243
|
|
158
|
-
def
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
244
|
+
def get_exposure_parameter_pipeline(self, builder: Builder) -> Pipeline:
|
245
|
+
return builder.value.register_value_producer(
|
246
|
+
self.parameters_pipeline_name,
|
247
|
+
source=self.lookup_tables["parameters"],
|
248
|
+
requires_columns=get_lookup_columns([self.lookup_tables["parameters"]]),
|
249
|
+
)
|
163
250
|
|
164
251
|
##################
|
165
252
|
# Public methods #
|
166
253
|
##################
|
167
254
|
|
168
|
-
def ppf(self,
|
169
|
-
if not
|
170
|
-
|
171
|
-
|
255
|
+
def ppf(self, quantiles: pd.Series) -> pd.Series:
|
256
|
+
if not quantiles.empty:
|
257
|
+
quantiles = clip(quantiles)
|
258
|
+
parameters = self.exposure_parameters(quantiles.index)
|
259
|
+
x = self._distribution(parameters=parameters).ppf(quantiles)
|
172
260
|
x[x.isnull()] = 0
|
173
261
|
else:
|
174
262
|
x = pd.Series([])
|
175
263
|
return x
|
176
264
|
|
177
265
|
|
178
|
-
class PolytomousDistribution(
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
super().__init__()
|
185
|
-
self.risk = EntityString(risk)
|
186
|
-
self._exposure_data = exposure_data
|
187
|
-
self.exposure_parameters_pipeline_name = f"{self.risk}.exposure_parameters"
|
188
|
-
|
189
|
-
# noinspection PyAttributeOutsideInit
|
190
|
-
def setup(self, builder: Builder) -> None:
|
191
|
-
self.categories = self.get_categories()
|
192
|
-
self.exposure = self.get_exposure_parameters(builder)
|
266
|
+
class PolytomousDistribution(RiskExposureDistribution):
|
267
|
+
@property
|
268
|
+
def categories(self) -> List[str]:
|
269
|
+
# These need to be sorted so the cumulative sum is in the ocrrect order of categories
|
270
|
+
# and results are therefore reproducible and correct
|
271
|
+
return sorted(self.lookup_tables["exposure"].value_columns)
|
193
272
|
|
194
273
|
#################
|
195
274
|
# Setup methods #
|
196
275
|
#################
|
197
276
|
|
198
|
-
def
|
199
|
-
|
200
|
-
|
201
|
-
|
277
|
+
def build_all_lookup_tables(self, builder: "Builder") -> None:
|
278
|
+
exposure_data = self.get_exposure_data(builder)
|
279
|
+
exposure_value_columns = self.get_exposure_value_columns(exposure_data)
|
280
|
+
|
281
|
+
if isinstance(exposure_data, pd.DataFrame):
|
282
|
+
exposure_data = pivot_categorical(builder, self.risk, exposure_data, "parameter")
|
283
|
+
|
284
|
+
self.lookup_tables["exposure"] = self.build_lookup_table(
|
285
|
+
builder, exposure_data, exposure_value_columns
|
202
286
|
)
|
203
287
|
|
204
|
-
def
|
288
|
+
def get_exposure_value_columns(
|
289
|
+
self, exposure_data: Union[int, float, pd.DataFrame]
|
290
|
+
) -> Optional[List[str]]:
|
291
|
+
if isinstance(exposure_data, pd.DataFrame):
|
292
|
+
return list(exposure_data["parameter"].unique())
|
293
|
+
return None
|
294
|
+
|
295
|
+
def get_exposure_parameter_pipeline(self, builder: Builder) -> Pipeline:
|
205
296
|
return builder.value.register_value_producer(
|
206
|
-
self.
|
207
|
-
source=
|
208
|
-
|
209
|
-
key_columns=["sex"],
|
210
|
-
parameter_columns=["age", "year"],
|
211
|
-
),
|
297
|
+
self.parameters_pipeline_name,
|
298
|
+
source=self.lookup_tables["exposure"],
|
299
|
+
requires_columns=get_lookup_columns([self.lookup_tables["exposure"]]),
|
212
300
|
)
|
213
301
|
|
214
302
|
##################
|
215
303
|
# Public methods #
|
216
304
|
##################
|
217
305
|
|
218
|
-
def ppf(self,
|
219
|
-
exposure = self.
|
306
|
+
def ppf(self, quantiles: pd.Series) -> pd.Series:
|
307
|
+
exposure = self.exposure_parameters(quantiles.index)
|
220
308
|
sorted_exposures = exposure[self.categories]
|
221
309
|
if not np.allclose(1, np.sum(sorted_exposures, axis=1)):
|
222
310
|
raise MissingDataError("All exposure data returned as 0.")
|
223
311
|
exposure_sum = sorted_exposures.cumsum(axis="columns")
|
224
312
|
category_index = pd.concat(
|
225
|
-
[exposure_sum[c] <
|
313
|
+
[exposure_sum[c] < quantiles for c in exposure_sum.columns], axis=1
|
226
314
|
).sum(axis=1)
|
227
315
|
return pd.Series(
|
228
316
|
np.array(self.categories)[category_index],
|
229
317
|
name=self.risk + ".exposure",
|
230
|
-
index=
|
318
|
+
index=quantiles.index,
|
231
319
|
)
|
232
320
|
|
233
321
|
|
234
|
-
class DichotomousDistribution(
|
235
|
-
#####################
|
236
|
-
# Lifecycle methods #
|
237
|
-
#####################
|
322
|
+
class DichotomousDistribution(RiskExposureDistribution):
|
238
323
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
self._exposure_data = exposure_data.drop(columns="cat2")
|
324
|
+
#################
|
325
|
+
# Setup methods #
|
326
|
+
#################
|
243
327
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
328
|
+
def build_all_lookup_tables(self, builder: "Builder") -> None:
|
329
|
+
exposure_data = self.get_exposure_data(builder)
|
330
|
+
exposure_value_columns = self.get_exposure_value_columns(exposure_data)
|
331
|
+
|
332
|
+
if isinstance(exposure_data, pd.DataFrame):
|
333
|
+
any_negatives = (exposure_data[exposure_value_columns] < 0).any().any()
|
334
|
+
any_over_one = (exposure_data[exposure_value_columns] > 1).any().any()
|
335
|
+
if any_negatives or any_over_one:
|
336
|
+
raise ValueError(f"All exposures must be in the range [0, 1] for {self.risk}")
|
337
|
+
elif exposure_data < 0 or exposure_data > 1:
|
338
|
+
raise ValueError(f"Exposure must be in the range [0, 1] for {self.risk}")
|
339
|
+
|
340
|
+
self.lookup_tables["exposure"] = self.build_lookup_table(
|
341
|
+
builder, exposure_data, exposure_value_columns
|
248
342
|
)
|
249
|
-
self.
|
250
|
-
|
343
|
+
self.lookup_tables["paf"] = self.build_lookup_table(builder, 0.0)
|
344
|
+
|
345
|
+
def get_exposure_data(self, builder: Builder) -> Union[int, float, pd.DataFrame]:
|
346
|
+
exposure_data = super().get_exposure_data(builder)
|
347
|
+
|
348
|
+
if isinstance(exposure_data, (int, float)):
|
349
|
+
return exposure_data
|
350
|
+
|
351
|
+
# rebin exposure categories
|
352
|
+
self.validate_rebin_source(builder, exposure_data)
|
353
|
+
rebin_exposed_categories = set(self.configuration["rebinned_exposed"])
|
354
|
+
if rebin_exposed_categories:
|
355
|
+
exposure_data = self._rebin_exposure_data(exposure_data, rebin_exposed_categories)
|
356
|
+
|
357
|
+
exposure_data = exposure_data[exposure_data["parameter"] == "cat1"]
|
358
|
+
return exposure_data.drop(columns="parameter")
|
359
|
+
|
360
|
+
@staticmethod
|
361
|
+
def _rebin_exposure_data(
|
362
|
+
exposure_data: pd.DataFrame, rebin_exposed_categories: set
|
363
|
+
) -> pd.DataFrame:
|
364
|
+
exposure_data = exposure_data[
|
365
|
+
exposure_data["parameter"].isin(rebin_exposed_categories)
|
366
|
+
]
|
367
|
+
exposure_data["parameter"] = "cat1"
|
368
|
+
exposure_data = (
|
369
|
+
exposure_data.groupby(list(exposure_data.columns.difference(["value"])))
|
370
|
+
.sum()
|
371
|
+
.reset_index()
|
251
372
|
)
|
252
|
-
|
373
|
+
return exposure_data
|
374
|
+
|
375
|
+
def get_exposure_value_columns(
|
376
|
+
self, exposure_data: Union[int, float, pd.DataFrame]
|
377
|
+
) -> Optional[List[str]]:
|
378
|
+
if isinstance(exposure_data, pd.DataFrame):
|
379
|
+
return self.get_value_columns(exposure_data)
|
380
|
+
return None
|
381
|
+
|
382
|
+
# noinspection PyAttributeOutsideInit
|
383
|
+
def setup(self, builder: Builder) -> None:
|
384
|
+
super().setup(builder)
|
253
385
|
self.joint_paf = builder.value.register_value_producer(
|
254
386
|
f"{self.risk}.exposure_parameters.paf",
|
255
|
-
source=lambda index: [
|
387
|
+
source=lambda index: [self.lookup_tables["paf"](index)],
|
256
388
|
preferred_combiner=list_combiner,
|
257
389
|
preferred_post_processor=union_post_processor,
|
258
390
|
)
|
259
391
|
|
392
|
+
def get_exposure_parameter_pipeline(self, builder: Builder) -> Pipeline:
|
393
|
+
return builder.value.register_value_producer(
|
394
|
+
f"{self.risk}.exposure_parameters",
|
395
|
+
source=self.exposure_parameter_source,
|
396
|
+
requires_columns=get_lookup_columns([self.lookup_tables["exposure"]]),
|
397
|
+
)
|
398
|
+
|
399
|
+
##############
|
400
|
+
# Validators #
|
401
|
+
##############
|
402
|
+
|
403
|
+
def validate_rebin_source(self, builder, data: pd.DataFrame) -> None:
|
404
|
+
if not isinstance(data, pd.DataFrame):
|
405
|
+
return
|
406
|
+
|
407
|
+
rebin_exposed_categories = set(builder.configuration[self.risk]["rebinned_exposed"])
|
408
|
+
|
409
|
+
if (
|
410
|
+
rebin_exposed_categories
|
411
|
+
and builder.configuration[self.risk]["category_thresholds"]
|
412
|
+
):
|
413
|
+
raise ValueError(
|
414
|
+
f"Rebinning and category thresholds are mutually exclusive. "
|
415
|
+
f"You provided both for {self.risk.name}."
|
416
|
+
)
|
417
|
+
|
418
|
+
invalid_cats = rebin_exposed_categories.difference(set(data.parameter))
|
419
|
+
if invalid_cats:
|
420
|
+
raise ValueError(
|
421
|
+
f"The following provided categories for the rebinned exposed "
|
422
|
+
f"category of {self.risk.name} are not found in the exposure data: "
|
423
|
+
f"{invalid_cats}."
|
424
|
+
)
|
425
|
+
|
426
|
+
if rebin_exposed_categories == set(data.parameter):
|
427
|
+
raise ValueError(
|
428
|
+
f"The provided categories for the rebinned exposed category of "
|
429
|
+
f"{self.risk.name} comprise all categories for the exposure data. "
|
430
|
+
f"At least one category must be left out of the provided categories "
|
431
|
+
f"to be rebinned into the unexposed category."
|
432
|
+
)
|
433
|
+
|
260
434
|
##################################
|
261
435
|
# Pipeline sources and modifiers #
|
262
436
|
##################################
|
263
437
|
|
264
|
-
def
|
265
|
-
base_exposure = self.
|
438
|
+
def exposure_parameter_source(self, index: pd.Index) -> pd.Series:
|
439
|
+
base_exposure = self.lookup_tables["exposure"](index).values
|
266
440
|
joint_paf = self.joint_paf(index).values
|
267
441
|
return pd.Series(base_exposure * (1 - joint_paf), index=index, name="values")
|
268
442
|
|
@@ -270,42 +444,17 @@ class DichotomousDistribution(Component):
|
|
270
444
|
# Public methods #
|
271
445
|
##################
|
272
446
|
|
273
|
-
def ppf(self,
|
274
|
-
exposed =
|
447
|
+
def ppf(self, quantiles: pd.Series) -> pd.Series:
|
448
|
+
exposed = quantiles < self.exposure_parameters(quantiles.index)
|
275
449
|
return pd.Series(
|
276
450
|
exposed.replace({True: "cat1", False: "cat2"}),
|
277
451
|
name=self.risk + ".exposure",
|
278
|
-
index=
|
279
|
-
)
|
280
|
-
|
281
|
-
|
282
|
-
def get_distribution(risk, distribution_type, exposure, exposure_standard_deviation, weights):
|
283
|
-
if distribution_type == "dichotomous":
|
284
|
-
distribution = DichotomousDistribution(risk, exposure)
|
285
|
-
elif "polytomous" in distribution_type:
|
286
|
-
distribution = PolytomousDistribution(risk, exposure)
|
287
|
-
elif distribution_type == "normal":
|
288
|
-
distribution = ContinuousDistribution(
|
289
|
-
risk, mean=exposure, sd=exposure_standard_deviation, distribution=Normal
|
290
|
-
)
|
291
|
-
elif distribution_type == "lognormal":
|
292
|
-
distribution = ContinuousDistribution(
|
293
|
-
risk, mean=exposure, sd=exposure_standard_deviation, distribution=LogNormal
|
294
|
-
)
|
295
|
-
elif distribution_type == "ensemble":
|
296
|
-
distribution = EnsembleSimulation(
|
297
|
-
risk,
|
298
|
-
weights,
|
299
|
-
mean=exposure,
|
300
|
-
sd=exposure_standard_deviation,
|
452
|
+
index=quantiles.index,
|
301
453
|
)
|
302
|
-
else:
|
303
|
-
raise NotImplementedError(f"Unhandled distribution type {distribution_type}")
|
304
|
-
return distribution
|
305
454
|
|
306
455
|
|
307
456
|
def clip(q):
|
308
|
-
"""Adjust the percentile boundary
|
457
|
+
"""Adjust the percentile boundary cases.
|
309
458
|
|
310
459
|
The risk distributions package uses the 99.9th and 0.001st percentiles
|
311
460
|
of a log-normal distribution as the bounds of the distribution support.
|
@@ -319,3 +468,24 @@ def clip(q):
|
|
319
468
|
q[q > Q_UPPER_BOUND] = Q_UPPER_BOUND
|
320
469
|
q[q < Q_LOWER_BOUND] = Q_LOWER_BOUND
|
321
470
|
return q
|
471
|
+
|
472
|
+
|
473
|
+
def get_risk_distribution_parameter(
|
474
|
+
value_columns_getter: Callable[[Union[pd.DataFrame]], List[str]],
|
475
|
+
data: Union[float, pd.DataFrame],
|
476
|
+
) -> Union[float, pd.Series]:
|
477
|
+
if isinstance(data, pd.DataFrame):
|
478
|
+
value_columns = value_columns_getter(data)
|
479
|
+
if len(value_columns) > 1:
|
480
|
+
raise ValueError(
|
481
|
+
"Expected a single value column for risk data, but found "
|
482
|
+
f"{len(value_columns)}: {value_columns}."
|
483
|
+
)
|
484
|
+
# don't return parameter col in continuous and ensemble distribution
|
485
|
+
# means to match standard deviation index
|
486
|
+
if "parameter" in data.columns and set(data["parameter"]) == {"continuous"}:
|
487
|
+
data = data.drop("parameter", axis=1)
|
488
|
+
index = [col for col in data.columns if col not in value_columns]
|
489
|
+
data = data.set_index(index)[value_columns].squeeze(axis=1)
|
490
|
+
|
491
|
+
return data
|