vivarium-public-health 2.3.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vivarium_public_health/_version.py +1 -1
- vivarium_public_health/disease/model.py +23 -21
- vivarium_public_health/disease/models.py +1 -0
- vivarium_public_health/disease/special_disease.py +40 -41
- vivarium_public_health/disease/state.py +42 -125
- vivarium_public_health/disease/transition.py +70 -27
- vivarium_public_health/mslt/delay.py +1 -0
- vivarium_public_health/mslt/disease.py +1 -0
- vivarium_public_health/mslt/intervention.py +1 -0
- vivarium_public_health/mslt/magic_wand_components.py +1 -0
- vivarium_public_health/mslt/observer.py +1 -0
- vivarium_public_health/mslt/population.py +1 -0
- vivarium_public_health/plugins/parser.py +61 -31
- vivarium_public_health/population/add_new_birth_cohorts.py +2 -3
- vivarium_public_health/population/base_population.py +2 -1
- vivarium_public_health/population/mortality.py +83 -80
- vivarium_public_health/{metrics → results}/__init__.py +2 -0
- vivarium_public_health/results/columns.py +22 -0
- vivarium_public_health/results/disability.py +187 -0
- vivarium_public_health/results/disease.py +222 -0
- vivarium_public_health/results/mortality.py +186 -0
- vivarium_public_health/results/observer.py +78 -0
- vivarium_public_health/results/risk.py +138 -0
- vivarium_public_health/results/simple_cause.py +18 -0
- vivarium_public_health/{metrics → results}/stratification.py +10 -8
- vivarium_public_health/risks/__init__.py +1 -2
- vivarium_public_health/risks/base_risk.py +134 -29
- vivarium_public_health/risks/data_transformations.py +65 -326
- vivarium_public_health/risks/distributions.py +315 -145
- vivarium_public_health/risks/effect.py +376 -75
- vivarium_public_health/risks/implementations/low_birth_weight_and_short_gestation.py +61 -89
- vivarium_public_health/treatment/magic_wand.py +1 -0
- vivarium_public_health/treatment/scale_up.py +1 -0
- vivarium_public_health/treatment/therapeutic_inertia.py +1 -0
- vivarium_public_health/utilities.py +17 -2
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/METADATA +13 -3
- vivarium_public_health-3.0.0.dist-info/RECORD +49 -0
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/WHEEL +1 -1
- vivarium_public_health/metrics/disability.py +0 -118
- vivarium_public_health/metrics/disease.py +0 -136
- vivarium_public_health/metrics/mortality.py +0 -144
- vivarium_public_health/metrics/risk.py +0 -110
- vivarium_public_health/testing/__init__.py +0 -0
- vivarium_public_health/testing/mock_artifact.py +0 -145
- vivarium_public_health/testing/utils.py +0 -71
- vivarium_public_health-2.3.2.dist-info/RECORD +0 -49
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/LICENSE.txt +0 -0
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,9 @@ risk data and performing any necessary data transformations.
|
|
8
8
|
|
9
9
|
"""
|
10
10
|
|
11
|
-
from typing import Union
|
12
|
-
|
13
11
|
import numpy as np
|
14
12
|
import pandas as pd
|
15
|
-
from
|
13
|
+
from vivarium.framework.engine import Builder
|
16
14
|
|
17
15
|
from vivarium_public_health.utilities import EntityString, TargetString
|
18
16
|
|
@@ -21,12 +19,24 @@ from vivarium_public_health.utilities import EntityString, TargetString
|
|
21
19
|
#############
|
22
20
|
|
23
21
|
|
24
|
-
def pivot_categorical(
|
22
|
+
def pivot_categorical(
|
23
|
+
builder: Builder,
|
24
|
+
risk: EntityString,
|
25
|
+
data: pd.DataFrame,
|
26
|
+
pivot_column: str = "parameter",
|
27
|
+
reset_index: bool = True,
|
28
|
+
) -> pd.DataFrame:
|
25
29
|
"""Pivots data that is long on categories to be wide."""
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
# todo remove dependency on artifact manager having exactly one value column
|
31
|
+
value_column = builder.data.value_columns()(f"{risk}.exposure")[0]
|
32
|
+
index_cols = [
|
33
|
+
column for column in data.columns if column not in [value_column, pivot_column]
|
34
|
+
]
|
35
|
+
data = data.pivot_table(index=index_cols, columns=pivot_column, values=value_column)
|
36
|
+
if reset_index:
|
37
|
+
data = data.reset_index()
|
29
38
|
data.columns.name = None
|
39
|
+
|
30
40
|
return data
|
31
41
|
|
32
42
|
|
@@ -35,14 +45,8 @@ def pivot_categorical(data: pd.DataFrame) -> pd.DataFrame:
|
|
35
45
|
##########################
|
36
46
|
|
37
47
|
|
38
|
-
def
|
39
|
-
|
40
|
-
data = load_distribution_data(builder, risk)
|
41
|
-
return data
|
42
|
-
|
43
|
-
|
44
|
-
def get_exposure_post_processor(builder, risk: EntityString):
|
45
|
-
thresholds = builder.configuration[risk.name]["category_thresholds"]
|
48
|
+
def get_exposure_post_processor(builder, risk: str):
|
49
|
+
thresholds = builder.configuration[risk]["category_thresholds"]
|
46
50
|
|
47
51
|
if thresholds:
|
48
52
|
thresholds = [-np.inf] + thresholds + [np.inf]
|
@@ -59,111 +63,10 @@ def get_exposure_post_processor(builder, risk: EntityString):
|
|
59
63
|
return post_processor
|
60
64
|
|
61
65
|
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
"distribution_type": get_distribution_type(builder, risk),
|
67
|
-
"exposure": exposure_data,
|
68
|
-
"exposure_standard_deviation": get_exposure_standard_deviation_data(builder, risk),
|
69
|
-
"weights": get_exposure_distribution_weights(builder, risk),
|
70
|
-
}
|
71
|
-
return data
|
72
|
-
|
73
|
-
|
74
|
-
def get_distribution_type(builder, risk: EntityString):
|
75
|
-
risk_config = builder.configuration[risk.name]
|
76
|
-
|
77
|
-
if risk_config["exposure"] == "data" and not risk_config["rebinned_exposed"]:
|
78
|
-
distribution_type = builder.data.load(f"{risk}.distribution")
|
79
|
-
else:
|
80
|
-
distribution_type = "dichotomous"
|
81
|
-
|
82
|
-
return distribution_type
|
83
|
-
|
84
|
-
|
85
|
-
def get_exposure_data(builder, risk: EntityString):
|
86
|
-
exposure_data = load_exposure_data(builder, risk)
|
87
|
-
exposure_data = rebin_exposure_data(builder, risk, exposure_data)
|
88
|
-
|
89
|
-
if get_distribution_type(builder, risk) in [
|
90
|
-
"dichotomous",
|
91
|
-
"ordered_polytomous",
|
92
|
-
"unordered_polytomous",
|
93
|
-
"lbwsg",
|
94
|
-
]:
|
95
|
-
exposure_data = pivot_categorical(exposure_data)
|
96
|
-
|
97
|
-
return exposure_data
|
98
|
-
|
99
|
-
|
100
|
-
def load_exposure_data(builder, risk: EntityString):
|
101
|
-
risk_config = builder.configuration[risk.name]
|
102
|
-
exposure_source = risk_config["exposure"]
|
103
|
-
|
104
|
-
if exposure_source == "data":
|
105
|
-
exposure_data = builder.data.load(f"{risk}.exposure")
|
106
|
-
else:
|
107
|
-
if isinstance(exposure_source, str): # Build from covariate
|
108
|
-
cat1 = builder.data.load(f"{exposure_source}.estimate")
|
109
|
-
# TODO: Generate a draw.
|
110
|
-
cat1 = cat1[cat1["parameter"] == "mean_value"]
|
111
|
-
cat1["parameter"] = "cat1"
|
112
|
-
else: # We have a numerical value
|
113
|
-
cat1 = builder.data.load("population.demographic_dimensions")
|
114
|
-
cat1["parameter"] = "cat1"
|
115
|
-
cat1["value"] = float(exposure_source)
|
116
|
-
cat2 = cat1.copy()
|
117
|
-
cat2["parameter"] = "cat2"
|
118
|
-
cat2["value"] = 1 - cat2["value"]
|
119
|
-
exposure_data = pd.concat([cat1, cat2], ignore_index=True)
|
120
|
-
|
121
|
-
return exposure_data
|
122
|
-
|
123
|
-
|
124
|
-
def get_exposure_standard_deviation_data(builder, risk: EntityString):
|
125
|
-
distribution_type = get_distribution_type(builder, risk)
|
126
|
-
if distribution_type in ["normal", "lognormal", "ensemble"]:
|
127
|
-
exposure_sd = builder.data.load(f"{risk}.exposure_standard_deviation")
|
128
|
-
else:
|
129
|
-
exposure_sd = None
|
130
|
-
return exposure_sd
|
131
|
-
|
132
|
-
|
133
|
-
def get_exposure_distribution_weights(builder, risk: EntityString):
|
134
|
-
distribution_type = get_distribution_type(builder, risk)
|
135
|
-
if distribution_type == "ensemble":
|
136
|
-
weights = builder.data.load(f"{risk}.exposure_distribution_weights")
|
137
|
-
weights = pivot_categorical(weights)
|
138
|
-
if "glnorm" in weights.columns:
|
139
|
-
if np.any(weights["glnorm"]):
|
140
|
-
raise NotImplementedError("glnorm distribution is not supported")
|
141
|
-
weights = weights.drop(columns=["glnorm"])
|
142
|
-
else:
|
143
|
-
weights = None
|
144
|
-
return weights
|
145
|
-
|
146
|
-
|
147
|
-
def rebin_exposure_data(builder, risk: EntityString, exposure_data: pd.DataFrame):
|
148
|
-
validate_rebin_source(builder, risk, exposure_data)
|
149
|
-
rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
|
150
|
-
|
151
|
-
if rebin_exposed_categories:
|
152
|
-
exposure_data = _rebin_exposure_data(exposure_data, rebin_exposed_categories)
|
153
|
-
|
154
|
-
return exposure_data
|
155
|
-
|
156
|
-
|
157
|
-
def _rebin_exposure_data(
|
158
|
-
exposure_data: pd.DataFrame, rebin_exposed_categories: set
|
159
|
-
) -> pd.DataFrame:
|
160
|
-
exposure_data["parameter"] = exposure_data["parameter"].map(
|
161
|
-
lambda p: "cat1" if p in rebin_exposed_categories else "cat2"
|
162
|
-
)
|
163
|
-
return (
|
164
|
-
exposure_data.groupby(list(exposure_data.columns.difference(["value"])))
|
165
|
-
.sum()
|
166
|
-
.reset_index()
|
66
|
+
def load_exposure_data(builder: Builder, risk: EntityString) -> pd.DataFrame:
|
67
|
+
risk_component = builder.components.get_component(risk)
|
68
|
+
return risk_component.get_data(
|
69
|
+
builder, builder.configuration[risk_component.name]["data_sources"]["exposure"]
|
167
70
|
)
|
168
71
|
|
169
72
|
|
@@ -172,111 +75,6 @@ def _rebin_exposure_data(
|
|
172
75
|
###############################
|
173
76
|
|
174
77
|
|
175
|
-
def get_relative_risk_data(builder, risk: EntityString, target: TargetString):
|
176
|
-
source_type = validate_relative_risk_data_source(builder, risk, target)
|
177
|
-
relative_risk_data = load_relative_risk_data(builder, risk, target, source_type)
|
178
|
-
validate_relative_risk_rebin_source(builder, risk, target, relative_risk_data)
|
179
|
-
relative_risk_data = rebin_relative_risk_data(builder, risk, relative_risk_data)
|
180
|
-
|
181
|
-
if get_distribution_type(builder, risk) in [
|
182
|
-
"dichotomous",
|
183
|
-
"ordered_polytomous",
|
184
|
-
"unordered_polytomous",
|
185
|
-
]:
|
186
|
-
relative_risk_data = pivot_categorical(relative_risk_data)
|
187
|
-
# Check if any values for relative risk are below expected boundary of 1.0
|
188
|
-
category_columns = [c for c in relative_risk_data.columns if "cat" in c]
|
189
|
-
if not relative_risk_data[
|
190
|
-
(relative_risk_data[category_columns] < 1.0).any(axis=1)
|
191
|
-
].empty:
|
192
|
-
logger.warning(
|
193
|
-
f"WARNING: Some data values are below the expected boundary of 1.0 for {risk}.relative_risk"
|
194
|
-
)
|
195
|
-
|
196
|
-
else:
|
197
|
-
relative_risk_data = relative_risk_data.drop(columns=["parameter"])
|
198
|
-
|
199
|
-
return relative_risk_data
|
200
|
-
|
201
|
-
|
202
|
-
def load_relative_risk_data(
|
203
|
-
builder, risk: EntityString, target: TargetString, source_type: str
|
204
|
-
):
|
205
|
-
relative_risk_source = builder.configuration[f"effect_of_{risk.name}_on_{target.name}"][
|
206
|
-
target.measure
|
207
|
-
]
|
208
|
-
|
209
|
-
if source_type == "data":
|
210
|
-
relative_risk_data = builder.data.load(f"{risk}.relative_risk")
|
211
|
-
correct_target = (relative_risk_data["affected_entity"] == target.name) & (
|
212
|
-
relative_risk_data["affected_measure"] == target.measure
|
213
|
-
)
|
214
|
-
relative_risk_data = relative_risk_data[correct_target].drop(
|
215
|
-
columns=["affected_entity", "affected_measure"]
|
216
|
-
)
|
217
|
-
|
218
|
-
elif source_type == "relative risk value":
|
219
|
-
relative_risk_data = _make_relative_risk_data(
|
220
|
-
builder, float(relative_risk_source["relative_risk"])
|
221
|
-
)
|
222
|
-
|
223
|
-
else: # distribution
|
224
|
-
parameters = {
|
225
|
-
k: v for k, v in relative_risk_source.to_dict().items() if v is not None
|
226
|
-
}
|
227
|
-
random_state = np.random.RandomState(
|
228
|
-
builder.randomness.get_seed(
|
229
|
-
f"effect_of_{risk.name}_on_{target.name}.{target.measure}"
|
230
|
-
)
|
231
|
-
)
|
232
|
-
cat1_value = generate_relative_risk_from_distribution(random_state, parameters)
|
233
|
-
relative_risk_data = _make_relative_risk_data(builder, cat1_value)
|
234
|
-
|
235
|
-
return relative_risk_data
|
236
|
-
|
237
|
-
|
238
|
-
def generate_relative_risk_from_distribution(
|
239
|
-
random_state: np.random.RandomState, parameters: dict
|
240
|
-
) -> Union[float, pd.Series, np.ndarray]:
|
241
|
-
first = pd.Series(list(parameters.values())[0])
|
242
|
-
length = len(first)
|
243
|
-
index = first.index
|
244
|
-
|
245
|
-
for v in parameters.values():
|
246
|
-
if length != len(pd.Series(v)) or not index.equals(pd.Series(v).index):
|
247
|
-
raise ValueError(
|
248
|
-
"If specifying vectorized parameters, all parameters "
|
249
|
-
"must be the same length and have the same index."
|
250
|
-
)
|
251
|
-
|
252
|
-
if "mean" in parameters: # normal distribution
|
253
|
-
rr_value = random_state.normal(parameters["mean"], parameters["se"])
|
254
|
-
elif "log_mean" in parameters: # log distribution
|
255
|
-
log_value = parameters["log_mean"] + parameters["log_se"] * random_state.randn()
|
256
|
-
if parameters["tau_squared"]:
|
257
|
-
log_value += random_state.normal(0, parameters["tau_squared"])
|
258
|
-
rr_value = np.exp(log_value)
|
259
|
-
else:
|
260
|
-
raise NotImplementedError(
|
261
|
-
f"Only normal distributions (supplying mean and se) and log distributions "
|
262
|
-
f"(supplying log_mean, log_se, and tau_squared) are currently supported."
|
263
|
-
)
|
264
|
-
|
265
|
-
rr_value = np.maximum(1, rr_value)
|
266
|
-
|
267
|
-
return rr_value
|
268
|
-
|
269
|
-
|
270
|
-
def _make_relative_risk_data(builder, cat1_value: float) -> pd.DataFrame:
|
271
|
-
cat1 = builder.data.load("population.demographic_dimensions")
|
272
|
-
cat1["parameter"] = "cat1"
|
273
|
-
cat1["value"] = cat1_value
|
274
|
-
cat2 = cat1.copy()
|
275
|
-
cat2["parameter"] = "cat2"
|
276
|
-
cat2["value"] = 1
|
277
|
-
return pd.concat([cat1, cat2], ignore_index=True)
|
278
|
-
|
279
|
-
|
280
78
|
def rebin_relative_risk_data(
|
281
79
|
builder, risk: EntityString, relative_risk_data: pd.DataFrame
|
282
80
|
) -> pd.DataFrame:
|
@@ -287,9 +85,13 @@ def rebin_relative_risk_data(
|
|
287
85
|
for the matching rr = [rr1, rr2, rr3, 1], rebinned rr for the rebinned cat1 should be:
|
288
86
|
(0.1 *rr1 + 0.2 * rr2 + 0.3* rr3) / (0.1+0.2+0.3)
|
289
87
|
"""
|
290
|
-
|
88
|
+
if not risk in builder.configuration.to_dict():
|
89
|
+
return relative_risk_data
|
90
|
+
|
91
|
+
rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
|
291
92
|
|
292
93
|
if rebin_exposed_categories:
|
94
|
+
# todo make sure this works
|
293
95
|
exposure_data = load_exposure_data(builder, risk)
|
294
96
|
relative_risk_data = _rebin_relative_risk_data(
|
295
97
|
relative_risk_data, exposure_data, rebin_exposed_categories
|
@@ -319,78 +121,16 @@ def _rebin_relative_risk_data(
|
|
319
121
|
return relative_risk_data.drop(columns=["value_x", "value_y"])
|
320
122
|
|
321
123
|
|
322
|
-
def get_exposure_effect(builder, risk: EntityString):
|
323
|
-
distribution_type = get_distribution_type(builder, risk)
|
324
|
-
risk_exposure = builder.value.get_value(f"{risk.name}.exposure")
|
325
|
-
|
326
|
-
if distribution_type in ["normal", "lognormal", "ensemble"]:
|
327
|
-
tmred = builder.data.load(f"{risk}.tmred")
|
328
|
-
tmrel = 0.5 * (tmred["min"] + tmred["max"])
|
329
|
-
scale = builder.data.load(f"{risk}.relative_risk_scalar")
|
330
|
-
|
331
|
-
def exposure_effect(rates, rr):
|
332
|
-
exposure = risk_exposure(rr.index)
|
333
|
-
relative_risk = np.maximum(rr.values ** ((exposure - tmrel) / scale), 1)
|
334
|
-
return rates * relative_risk
|
335
|
-
|
336
|
-
else:
|
337
|
-
|
338
|
-
def exposure_effect(rates, rr: pd.DataFrame) -> pd.Series:
|
339
|
-
index_columns = ["index", risk.name]
|
340
|
-
|
341
|
-
exposure = risk_exposure(rr.index).reset_index()
|
342
|
-
exposure.columns = index_columns
|
343
|
-
exposure = exposure.set_index(index_columns)
|
344
|
-
|
345
|
-
relative_risk = rr.stack().reset_index()
|
346
|
-
relative_risk.columns = index_columns + ["value"]
|
347
|
-
relative_risk = relative_risk.set_index(index_columns)
|
348
|
-
|
349
|
-
effect = relative_risk.loc[exposure.index, "value"].droplevel(risk.name)
|
350
|
-
affected_rates = rates * effect
|
351
|
-
return affected_rates
|
352
|
-
|
353
|
-
return exposure_effect
|
354
|
-
|
355
|
-
|
356
|
-
##################################################
|
357
|
-
# Population attributable fraction data handlers #
|
358
|
-
##################################################
|
359
|
-
|
360
|
-
|
361
|
-
def get_population_attributable_fraction_data(
|
362
|
-
builder, risk: EntityString, target: TargetString
|
363
|
-
):
|
364
|
-
exposure_source = builder.configuration[f"{risk.name}"]["exposure"]
|
365
|
-
rr_source_type = validate_relative_risk_data_source(builder, risk, target)
|
366
|
-
|
367
|
-
if exposure_source == "data" and rr_source_type == "data" and risk.type == "risk_factor":
|
368
|
-
paf_data = builder.data.load(f"{risk}.population_attributable_fraction")
|
369
|
-
correct_target = (paf_data["affected_entity"] == target.name) & (
|
370
|
-
paf_data["affected_measure"] == target.measure
|
371
|
-
)
|
372
|
-
paf_data = paf_data[correct_target].drop(
|
373
|
-
columns=["affected_entity", "affected_measure"]
|
374
|
-
)
|
375
|
-
else:
|
376
|
-
key_cols = ["sex", "age_start", "age_end", "year_start", "year_end"]
|
377
|
-
exposure_data = get_exposure_data(builder, risk).set_index(key_cols)
|
378
|
-
relative_risk_data = get_relative_risk_data(builder, risk, target).set_index(key_cols)
|
379
|
-
mean_rr = (exposure_data * relative_risk_data).sum(axis=1)
|
380
|
-
paf_data = ((mean_rr - 1) / mean_rr).reset_index().rename(columns={0: "value"})
|
381
|
-
return paf_data
|
382
|
-
|
383
|
-
|
384
124
|
##############
|
385
125
|
# Validators #
|
386
126
|
##############
|
387
127
|
|
388
128
|
|
389
|
-
def validate_distribution_data_source(builder, risk: EntityString):
|
129
|
+
def validate_distribution_data_source(builder: Builder, risk: EntityString) -> None:
|
390
130
|
"""Checks that the exposure distribution specification is valid."""
|
391
|
-
exposure_type = builder.configuration[risk
|
392
|
-
rebin = builder.configuration[risk
|
393
|
-
category_thresholds = builder.configuration[risk
|
131
|
+
exposure_type = builder.configuration[risk]["data_sources"]["exposure"]
|
132
|
+
rebin = builder.configuration[risk]["rebinned_exposed"]
|
133
|
+
category_thresholds = builder.configuration[risk]["category_thresholds"]
|
394
134
|
|
395
135
|
if risk.type == "alternative_risk_factor":
|
396
136
|
if exposure_type != "data" or rebin:
|
@@ -401,29 +141,20 @@ def validate_distribution_data_source(builder, risk: EntityString):
|
|
401
141
|
if not category_thresholds:
|
402
142
|
raise ValueError("Must specify category thresholds to use alternative risks.")
|
403
143
|
|
404
|
-
elif risk.type in ["risk_factor", "coverage_gap"]:
|
405
|
-
if isinstance(exposure_type, (int, float)) and not 0 <= exposure_type <= 1:
|
406
|
-
raise ValueError(f"Exposure should be in the range [0, 1]")
|
407
|
-
elif isinstance(exposure_type, str) and exposure_type.split(".")[0] not in [
|
408
|
-
"covariate",
|
409
|
-
"data",
|
410
|
-
]:
|
411
|
-
raise ValueError(
|
412
|
-
f"Exposure must be specified as 'data', an integer or float value, "
|
413
|
-
f"or as a string in the format covariate.covariate_name"
|
414
|
-
)
|
415
|
-
else:
|
416
|
-
pass # All good
|
417
|
-
else:
|
144
|
+
elif risk.type not in ["risk_factor", "coverage_gap"]:
|
418
145
|
raise ValueError(f"Unknown risk type {risk.type} for risk {risk.name}")
|
419
146
|
|
420
147
|
|
421
148
|
def validate_relative_risk_data_source(builder, risk: EntityString, target: TargetString):
|
422
|
-
|
423
|
-
|
149
|
+
from vivarium_public_health.risks import RiskEffect
|
150
|
+
|
151
|
+
source_key = RiskEffect.get_name(risk, target)
|
152
|
+
source_config = builder.configuration[source_key]
|
424
153
|
|
425
154
|
provided_keys = set(
|
426
|
-
k
|
155
|
+
k
|
156
|
+
for k, v in source_config["distribution_args"].to_dict().items()
|
157
|
+
if isinstance(v, (int, float))
|
427
158
|
)
|
428
159
|
|
429
160
|
source_map = {
|
@@ -442,24 +173,24 @@ def validate_relative_risk_data_source(builder, risk: EntityString, target: Targ
|
|
442
173
|
source_type = [k for k, v in source_map.items() if provided_keys == v][0]
|
443
174
|
|
444
175
|
if source_type == "relative risk value":
|
445
|
-
if not 1 <=
|
176
|
+
if not 1 <= source_type <= 100:
|
446
177
|
raise ValueError(
|
447
|
-
|
448
|
-
f"
|
178
|
+
"If specifying a single value for relative risk, it should be in the range [1, 100]. "
|
179
|
+
f"You provided {source_type} for {source_key}."
|
449
180
|
)
|
450
181
|
elif source_type == "normal distribution":
|
451
|
-
if
|
182
|
+
if source_config["mean"] <= 0 or source_config["se"] <= 0:
|
452
183
|
raise ValueError(
|
453
184
|
f"To specify parameters for a normal distribution for a risk effect, you must provide"
|
454
185
|
f"both mean and se above 0. This is not the case for {source_key}."
|
455
186
|
)
|
456
187
|
elif source_type == "log distribution":
|
457
|
-
if
|
188
|
+
if source_config["log_mean"] <= 0 or source_config["log_se"] <= 0:
|
458
189
|
raise ValueError(
|
459
190
|
f"To specify parameters for a log distribution for a risk effect, you must provide"
|
460
191
|
f"both log_mean and log_se above 0. This is not the case for {source_key}."
|
461
192
|
)
|
462
|
-
if
|
193
|
+
if source_config["tau_squared"] < 0:
|
463
194
|
raise ValueError(
|
464
195
|
f"To specify parameters for a log distribution for a risk effect, you must provide"
|
465
196
|
f"tau_squared >= 0. This is not the case for {source_key}."
|
@@ -478,13 +209,18 @@ def validate_relative_risk_rebin_source(
|
|
478
209
|
f"Subsetting {risk} relative risk data to {target.name} {target.measure} "
|
479
210
|
"returned an empty DataFrame. Check your artifact."
|
480
211
|
)
|
481
|
-
|
212
|
+
if risk in builder.configuration.to_dict():
|
213
|
+
validate_rebin_source(builder, risk, data)
|
214
|
+
|
215
|
+
|
216
|
+
def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame) -> None:
|
482
217
|
|
218
|
+
if not isinstance(data, pd.DataFrame):
|
219
|
+
return
|
483
220
|
|
484
|
-
|
485
|
-
rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
|
221
|
+
rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
|
486
222
|
|
487
|
-
if rebin_exposed_categories and builder.configuration[risk
|
223
|
+
if rebin_exposed_categories and builder.configuration[risk]["category_thresholds"]:
|
488
224
|
raise ValueError(
|
489
225
|
f"Rebinning and category thresholds are mutually exclusive. "
|
490
226
|
f"You provided both for {risk.name}."
|
@@ -494,20 +230,23 @@ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame):
|
|
494
230
|
f"{risk}.distribution"
|
495
231
|
):
|
496
232
|
raise ValueError(
|
497
|
-
f"Rebinning is only supported for polytomous risks. You provided
|
498
|
-
f
|
233
|
+
f"Rebinning is only supported for polytomous risks. You provided "
|
234
|
+
f"rebinning exposed categoriesfor {risk.name}, which is of "
|
235
|
+
f"type {builder.data.load(f'{risk}.distribution')}."
|
499
236
|
)
|
500
237
|
|
501
238
|
invalid_cats = rebin_exposed_categories.difference(set(data.parameter))
|
502
239
|
if invalid_cats:
|
503
240
|
raise ValueError(
|
504
|
-
f"The following provided categories for the rebinned exposed
|
505
|
-
f"are not found in the exposure data:
|
241
|
+
f"The following provided categories for the rebinned exposed "
|
242
|
+
f"category of {risk.name} are not found in the exposure data: "
|
243
|
+
f"{invalid_cats}."
|
506
244
|
)
|
507
245
|
|
508
246
|
if rebin_exposed_categories == set(data.parameter):
|
509
247
|
raise ValueError(
|
510
|
-
f"The provided categories for the rebinned exposed category of
|
511
|
-
f"categories for the exposure data.
|
512
|
-
f"
|
248
|
+
f"The provided categories for the rebinned exposed category of "
|
249
|
+
f"{risk.name} comprise all categories for the exposure data. "
|
250
|
+
f"At least one category must be left out of the provided categories "
|
251
|
+
f"to be rebinned into the unexposed category."
|
513
252
|
)
|