vivarium-public-health 2.3.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vivarium_public_health/_version.py +1 -1
- vivarium_public_health/disease/model.py +23 -21
- vivarium_public_health/disease/models.py +1 -0
- vivarium_public_health/disease/special_disease.py +40 -41
- vivarium_public_health/disease/state.py +42 -125
- vivarium_public_health/disease/transition.py +70 -27
- vivarium_public_health/mslt/delay.py +1 -0
- vivarium_public_health/mslt/disease.py +1 -0
- vivarium_public_health/mslt/intervention.py +1 -0
- vivarium_public_health/mslt/magic_wand_components.py +1 -0
- vivarium_public_health/mslt/observer.py +1 -0
- vivarium_public_health/mslt/population.py +1 -0
- vivarium_public_health/plugins/parser.py +61 -31
- vivarium_public_health/population/add_new_birth_cohorts.py +2 -3
- vivarium_public_health/population/base_population.py +2 -1
- vivarium_public_health/population/mortality.py +83 -80
- vivarium_public_health/{metrics → results}/__init__.py +2 -0
- vivarium_public_health/results/columns.py +22 -0
- vivarium_public_health/results/disability.py +187 -0
- vivarium_public_health/results/disease.py +222 -0
- vivarium_public_health/results/mortality.py +186 -0
- vivarium_public_health/results/observer.py +78 -0
- vivarium_public_health/results/risk.py +138 -0
- vivarium_public_health/results/simple_cause.py +18 -0
- vivarium_public_health/{metrics → results}/stratification.py +10 -8
- vivarium_public_health/risks/__init__.py +1 -2
- vivarium_public_health/risks/base_risk.py +134 -29
- vivarium_public_health/risks/data_transformations.py +65 -326
- vivarium_public_health/risks/distributions.py +315 -145
- vivarium_public_health/risks/effect.py +376 -75
- vivarium_public_health/risks/implementations/low_birth_weight_and_short_gestation.py +61 -89
- vivarium_public_health/treatment/magic_wand.py +1 -0
- vivarium_public_health/treatment/scale_up.py +1 -0
- vivarium_public_health/treatment/therapeutic_inertia.py +1 -0
- vivarium_public_health/utilities.py +17 -2
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/METADATA +13 -3
- vivarium_public_health-3.0.0.dist-info/RECORD +49 -0
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/WHEEL +1 -1
- vivarium_public_health/metrics/disability.py +0 -118
- vivarium_public_health/metrics/disease.py +0 -136
- vivarium_public_health/metrics/mortality.py +0 -144
- vivarium_public_health/metrics/risk.py +0 -110
- vivarium_public_health/testing/__init__.py +0 -0
- vivarium_public_health/testing/mock_artifact.py +0 -145
- vivarium_public_health/testing/utils.py +0 -71
- vivarium_public_health-2.3.2.dist-info/RECORD +0 -49
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/LICENSE.txt +0 -0
- {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,9 @@ risk data and performing any necessary data transformations.
|
|
8
8
|
|
9
9
|
"""
|
10
10
|
|
11
|
-
from typing import Union
|
12
|
-
|
13
11
|
import numpy as np
|
14
12
|
import pandas as pd
|
15
|
-
from
|
13
|
+
from vivarium.framework.engine import Builder
|
16
14
|
|
17
15
|
from vivarium_public_health.utilities import EntityString, TargetString
|
18
16
|
|
@@ -21,12 +19,24 @@ from vivarium_public_health.utilities import EntityString, TargetString
|
|
21
19
|
#############
|
22
20
|
|
23
21
|
|
24
|
-
def pivot_categorical(
|
22
|
+
def pivot_categorical(
|
23
|
+
builder: Builder,
|
24
|
+
risk: EntityString,
|
25
|
+
data: pd.DataFrame,
|
26
|
+
pivot_column: str = "parameter",
|
27
|
+
reset_index: bool = True,
|
28
|
+
) -> pd.DataFrame:
|
25
29
|
"""Pivots data that is long on categories to be wide."""
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
# todo remove dependency on artifact manager having exactly one value column
|
31
|
+
value_column = builder.data.value_columns()(f"{risk}.exposure")[0]
|
32
|
+
index_cols = [
|
33
|
+
column for column in data.columns if column not in [value_column, pivot_column]
|
34
|
+
]
|
35
|
+
data = data.pivot_table(index=index_cols, columns=pivot_column, values=value_column)
|
36
|
+
if reset_index:
|
37
|
+
data = data.reset_index()
|
29
38
|
data.columns.name = None
|
39
|
+
|
30
40
|
return data
|
31
41
|
|
32
42
|
|
@@ -35,14 +45,8 @@ def pivot_categorical(data: pd.DataFrame) -> pd.DataFrame:
|
|
35
45
|
##########################
|
36
46
|
|
37
47
|
|
38
|
-
def
|
39
|
-
|
40
|
-
data = load_distribution_data(builder, risk)
|
41
|
-
return data
|
42
|
-
|
43
|
-
|
44
|
-
def get_exposure_post_processor(builder, risk: EntityString):
|
45
|
-
thresholds = builder.configuration[risk.name]["category_thresholds"]
|
48
|
+
def get_exposure_post_processor(builder, risk: str):
|
49
|
+
thresholds = builder.configuration[risk]["category_thresholds"]
|
46
50
|
|
47
51
|
if thresholds:
|
48
52
|
thresholds = [-np.inf] + thresholds + [np.inf]
|
@@ -59,111 +63,10 @@ def get_exposure_post_processor(builder, risk: EntityString):
|
|
59
63
|
return post_processor
|
60
64
|
|
61
65
|
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
"distribution_type": get_distribution_type(builder, risk),
|
67
|
-
"exposure": exposure_data,
|
68
|
-
"exposure_standard_deviation": get_exposure_standard_deviation_data(builder, risk),
|
69
|
-
"weights": get_exposure_distribution_weights(builder, risk),
|
70
|
-
}
|
71
|
-
return data
|
72
|
-
|
73
|
-
|
74
|
-
def get_distribution_type(builder, risk: EntityString):
|
75
|
-
risk_config = builder.configuration[risk.name]
|
76
|
-
|
77
|
-
if risk_config["exposure"] == "data" and not risk_config["rebinned_exposed"]:
|
78
|
-
distribution_type = builder.data.load(f"{risk}.distribution")
|
79
|
-
else:
|
80
|
-
distribution_type = "dichotomous"
|
81
|
-
|
82
|
-
return distribution_type
|
83
|
-
|
84
|
-
|
85
|
-
def get_exposure_data(builder, risk: EntityString):
|
86
|
-
exposure_data = load_exposure_data(builder, risk)
|
87
|
-
exposure_data = rebin_exposure_data(builder, risk, exposure_data)
|
88
|
-
|
89
|
-
if get_distribution_type(builder, risk) in [
|
90
|
-
"dichotomous",
|
91
|
-
"ordered_polytomous",
|
92
|
-
"unordered_polytomous",
|
93
|
-
"lbwsg",
|
94
|
-
]:
|
95
|
-
exposure_data = pivot_categorical(exposure_data)
|
96
|
-
|
97
|
-
return exposure_data
|
98
|
-
|
99
|
-
|
100
|
-
def load_exposure_data(builder, risk: EntityString):
|
101
|
-
risk_config = builder.configuration[risk.name]
|
102
|
-
exposure_source = risk_config["exposure"]
|
103
|
-
|
104
|
-
if exposure_source == "data":
|
105
|
-
exposure_data = builder.data.load(f"{risk}.exposure")
|
106
|
-
else:
|
107
|
-
if isinstance(exposure_source, str): # Build from covariate
|
108
|
-
cat1 = builder.data.load(f"{exposure_source}.estimate")
|
109
|
-
# TODO: Generate a draw.
|
110
|
-
cat1 = cat1[cat1["parameter"] == "mean_value"]
|
111
|
-
cat1["parameter"] = "cat1"
|
112
|
-
else: # We have a numerical value
|
113
|
-
cat1 = builder.data.load("population.demographic_dimensions")
|
114
|
-
cat1["parameter"] = "cat1"
|
115
|
-
cat1["value"] = float(exposure_source)
|
116
|
-
cat2 = cat1.copy()
|
117
|
-
cat2["parameter"] = "cat2"
|
118
|
-
cat2["value"] = 1 - cat2["value"]
|
119
|
-
exposure_data = pd.concat([cat1, cat2], ignore_index=True)
|
120
|
-
|
121
|
-
return exposure_data
|
122
|
-
|
123
|
-
|
124
|
-
def get_exposure_standard_deviation_data(builder, risk: EntityString):
|
125
|
-
distribution_type = get_distribution_type(builder, risk)
|
126
|
-
if distribution_type in ["normal", "lognormal", "ensemble"]:
|
127
|
-
exposure_sd = builder.data.load(f"{risk}.exposure_standard_deviation")
|
128
|
-
else:
|
129
|
-
exposure_sd = None
|
130
|
-
return exposure_sd
|
131
|
-
|
132
|
-
|
133
|
-
def get_exposure_distribution_weights(builder, risk: EntityString):
|
134
|
-
distribution_type = get_distribution_type(builder, risk)
|
135
|
-
if distribution_type == "ensemble":
|
136
|
-
weights = builder.data.load(f"{risk}.exposure_distribution_weights")
|
137
|
-
weights = pivot_categorical(weights)
|
138
|
-
if "glnorm" in weights.columns:
|
139
|
-
if np.any(weights["glnorm"]):
|
140
|
-
raise NotImplementedError("glnorm distribution is not supported")
|
141
|
-
weights = weights.drop(columns=["glnorm"])
|
142
|
-
else:
|
143
|
-
weights = None
|
144
|
-
return weights
|
145
|
-
|
146
|
-
|
147
|
-
def rebin_exposure_data(builder, risk: EntityString, exposure_data: pd.DataFrame):
|
148
|
-
validate_rebin_source(builder, risk, exposure_data)
|
149
|
-
rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
|
150
|
-
|
151
|
-
if rebin_exposed_categories:
|
152
|
-
exposure_data = _rebin_exposure_data(exposure_data, rebin_exposed_categories)
|
153
|
-
|
154
|
-
return exposure_data
|
155
|
-
|
156
|
-
|
157
|
-
def _rebin_exposure_data(
|
158
|
-
exposure_data: pd.DataFrame, rebin_exposed_categories: set
|
159
|
-
) -> pd.DataFrame:
|
160
|
-
exposure_data["parameter"] = exposure_data["parameter"].map(
|
161
|
-
lambda p: "cat1" if p in rebin_exposed_categories else "cat2"
|
162
|
-
)
|
163
|
-
return (
|
164
|
-
exposure_data.groupby(list(exposure_data.columns.difference(["value"])))
|
165
|
-
.sum()
|
166
|
-
.reset_index()
|
66
|
+
def load_exposure_data(builder: Builder, risk: EntityString) -> pd.DataFrame:
|
67
|
+
risk_component = builder.components.get_component(risk)
|
68
|
+
return risk_component.get_data(
|
69
|
+
builder, builder.configuration[risk_component.name]["data_sources"]["exposure"]
|
167
70
|
)
|
168
71
|
|
169
72
|
|
@@ -172,111 +75,6 @@ def _rebin_exposure_data(
|
|
172
75
|
###############################
|
173
76
|
|
174
77
|
|
175
|
-
def get_relative_risk_data(builder, risk: EntityString, target: TargetString):
|
176
|
-
source_type = validate_relative_risk_data_source(builder, risk, target)
|
177
|
-
relative_risk_data = load_relative_risk_data(builder, risk, target, source_type)
|
178
|
-
validate_relative_risk_rebin_source(builder, risk, target, relative_risk_data)
|
179
|
-
relative_risk_data = rebin_relative_risk_data(builder, risk, relative_risk_data)
|
180
|
-
|
181
|
-
if get_distribution_type(builder, risk) in [
|
182
|
-
"dichotomous",
|
183
|
-
"ordered_polytomous",
|
184
|
-
"unordered_polytomous",
|
185
|
-
]:
|
186
|
-
relative_risk_data = pivot_categorical(relative_risk_data)
|
187
|
-
# Check if any values for relative risk are below expected boundary of 1.0
|
188
|
-
category_columns = [c for c in relative_risk_data.columns if "cat" in c]
|
189
|
-
if not relative_risk_data[
|
190
|
-
(relative_risk_data[category_columns] < 1.0).any(axis=1)
|
191
|
-
].empty:
|
192
|
-
logger.warning(
|
193
|
-
f"WARNING: Some data values are below the expected boundary of 1.0 for {risk}.relative_risk"
|
194
|
-
)
|
195
|
-
|
196
|
-
else:
|
197
|
-
relative_risk_data = relative_risk_data.drop(columns=["parameter"])
|
198
|
-
|
199
|
-
return relative_risk_data
|
200
|
-
|
201
|
-
|
202
|
-
def load_relative_risk_data(
|
203
|
-
builder, risk: EntityString, target: TargetString, source_type: str
|
204
|
-
):
|
205
|
-
relative_risk_source = builder.configuration[f"effect_of_{risk.name}_on_{target.name}"][
|
206
|
-
target.measure
|
207
|
-
]
|
208
|
-
|
209
|
-
if source_type == "data":
|
210
|
-
relative_risk_data = builder.data.load(f"{risk}.relative_risk")
|
211
|
-
correct_target = (relative_risk_data["affected_entity"] == target.name) & (
|
212
|
-
relative_risk_data["affected_measure"] == target.measure
|
213
|
-
)
|
214
|
-
relative_risk_data = relative_risk_data[correct_target].drop(
|
215
|
-
columns=["affected_entity", "affected_measure"]
|
216
|
-
)
|
217
|
-
|
218
|
-
elif source_type == "relative risk value":
|
219
|
-
relative_risk_data = _make_relative_risk_data(
|
220
|
-
builder, float(relative_risk_source["relative_risk"])
|
221
|
-
)
|
222
|
-
|
223
|
-
else: # distribution
|
224
|
-
parameters = {
|
225
|
-
k: v for k, v in relative_risk_source.to_dict().items() if v is not None
|
226
|
-
}
|
227
|
-
random_state = np.random.RandomState(
|
228
|
-
builder.randomness.get_seed(
|
229
|
-
f"effect_of_{risk.name}_on_{target.name}.{target.measure}"
|
230
|
-
)
|
231
|
-
)
|
232
|
-
cat1_value = generate_relative_risk_from_distribution(random_state, parameters)
|
233
|
-
relative_risk_data = _make_relative_risk_data(builder, cat1_value)
|
234
|
-
|
235
|
-
return relative_risk_data
|
236
|
-
|
237
|
-
|
238
|
-
def generate_relative_risk_from_distribution(
|
239
|
-
random_state: np.random.RandomState, parameters: dict
|
240
|
-
) -> Union[float, pd.Series, np.ndarray]:
|
241
|
-
first = pd.Series(list(parameters.values())[0])
|
242
|
-
length = len(first)
|
243
|
-
index = first.index
|
244
|
-
|
245
|
-
for v in parameters.values():
|
246
|
-
if length != len(pd.Series(v)) or not index.equals(pd.Series(v).index):
|
247
|
-
raise ValueError(
|
248
|
-
"If specifying vectorized parameters, all parameters "
|
249
|
-
"must be the same length and have the same index."
|
250
|
-
)
|
251
|
-
|
252
|
-
if "mean" in parameters: # normal distribution
|
253
|
-
rr_value = random_state.normal(parameters["mean"], parameters["se"])
|
254
|
-
elif "log_mean" in parameters: # log distribution
|
255
|
-
log_value = parameters["log_mean"] + parameters["log_se"] * random_state.randn()
|
256
|
-
if parameters["tau_squared"]:
|
257
|
-
log_value += random_state.normal(0, parameters["tau_squared"])
|
258
|
-
rr_value = np.exp(log_value)
|
259
|
-
else:
|
260
|
-
raise NotImplementedError(
|
261
|
-
f"Only normal distributions (supplying mean and se) and log distributions "
|
262
|
-
f"(supplying log_mean, log_se, and tau_squared) are currently supported."
|
263
|
-
)
|
264
|
-
|
265
|
-
rr_value = np.maximum(1, rr_value)
|
266
|
-
|
267
|
-
return rr_value
|
268
|
-
|
269
|
-
|
270
|
-
def _make_relative_risk_data(builder, cat1_value: float) -> pd.DataFrame:
|
271
|
-
cat1 = builder.data.load("population.demographic_dimensions")
|
272
|
-
cat1["parameter"] = "cat1"
|
273
|
-
cat1["value"] = cat1_value
|
274
|
-
cat2 = cat1.copy()
|
275
|
-
cat2["parameter"] = "cat2"
|
276
|
-
cat2["value"] = 1
|
277
|
-
return pd.concat([cat1, cat2], ignore_index=True)
|
278
|
-
|
279
|
-
|
280
78
|
def rebin_relative_risk_data(
|
281
79
|
builder, risk: EntityString, relative_risk_data: pd.DataFrame
|
282
80
|
) -> pd.DataFrame:
|
@@ -287,9 +85,13 @@ def rebin_relative_risk_data(
|
|
287
85
|
for the matching rr = [rr1, rr2, rr3, 1], rebinned rr for the rebinned cat1 should be:
|
288
86
|
(0.1 *rr1 + 0.2 * rr2 + 0.3* rr3) / (0.1+0.2+0.3)
|
289
87
|
"""
|
290
|
-
|
88
|
+
if not risk in builder.configuration.to_dict():
|
89
|
+
return relative_risk_data
|
90
|
+
|
91
|
+
rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
|
291
92
|
|
292
93
|
if rebin_exposed_categories:
|
94
|
+
# todo make sure this works
|
293
95
|
exposure_data = load_exposure_data(builder, risk)
|
294
96
|
relative_risk_data = _rebin_relative_risk_data(
|
295
97
|
relative_risk_data, exposure_data, rebin_exposed_categories
|
@@ -319,78 +121,16 @@ def _rebin_relative_risk_data(
|
|
319
121
|
return relative_risk_data.drop(columns=["value_x", "value_y"])
|
320
122
|
|
321
123
|
|
322
|
-
def get_exposure_effect(builder, risk: EntityString):
|
323
|
-
distribution_type = get_distribution_type(builder, risk)
|
324
|
-
risk_exposure = builder.value.get_value(f"{risk.name}.exposure")
|
325
|
-
|
326
|
-
if distribution_type in ["normal", "lognormal", "ensemble"]:
|
327
|
-
tmred = builder.data.load(f"{risk}.tmred")
|
328
|
-
tmrel = 0.5 * (tmred["min"] + tmred["max"])
|
329
|
-
scale = builder.data.load(f"{risk}.relative_risk_scalar")
|
330
|
-
|
331
|
-
def exposure_effect(rates, rr):
|
332
|
-
exposure = risk_exposure(rr.index)
|
333
|
-
relative_risk = np.maximum(rr.values ** ((exposure - tmrel) / scale), 1)
|
334
|
-
return rates * relative_risk
|
335
|
-
|
336
|
-
else:
|
337
|
-
|
338
|
-
def exposure_effect(rates, rr: pd.DataFrame) -> pd.Series:
|
339
|
-
index_columns = ["index", risk.name]
|
340
|
-
|
341
|
-
exposure = risk_exposure(rr.index).reset_index()
|
342
|
-
exposure.columns = index_columns
|
343
|
-
exposure = exposure.set_index(index_columns)
|
344
|
-
|
345
|
-
relative_risk = rr.stack().reset_index()
|
346
|
-
relative_risk.columns = index_columns + ["value"]
|
347
|
-
relative_risk = relative_risk.set_index(index_columns)
|
348
|
-
|
349
|
-
effect = relative_risk.loc[exposure.index, "value"].droplevel(risk.name)
|
350
|
-
affected_rates = rates * effect
|
351
|
-
return affected_rates
|
352
|
-
|
353
|
-
return exposure_effect
|
354
|
-
|
355
|
-
|
356
|
-
##################################################
|
357
|
-
# Population attributable fraction data handlers #
|
358
|
-
##################################################
|
359
|
-
|
360
|
-
|
361
|
-
def get_population_attributable_fraction_data(
|
362
|
-
builder, risk: EntityString, target: TargetString
|
363
|
-
):
|
364
|
-
exposure_source = builder.configuration[f"{risk.name}"]["exposure"]
|
365
|
-
rr_source_type = validate_relative_risk_data_source(builder, risk, target)
|
366
|
-
|
367
|
-
if exposure_source == "data" and rr_source_type == "data" and risk.type == "risk_factor":
|
368
|
-
paf_data = builder.data.load(f"{risk}.population_attributable_fraction")
|
369
|
-
correct_target = (paf_data["affected_entity"] == target.name) & (
|
370
|
-
paf_data["affected_measure"] == target.measure
|
371
|
-
)
|
372
|
-
paf_data = paf_data[correct_target].drop(
|
373
|
-
columns=["affected_entity", "affected_measure"]
|
374
|
-
)
|
375
|
-
else:
|
376
|
-
key_cols = ["sex", "age_start", "age_end", "year_start", "year_end"]
|
377
|
-
exposure_data = get_exposure_data(builder, risk).set_index(key_cols)
|
378
|
-
relative_risk_data = get_relative_risk_data(builder, risk, target).set_index(key_cols)
|
379
|
-
mean_rr = (exposure_data * relative_risk_data).sum(axis=1)
|
380
|
-
paf_data = ((mean_rr - 1) / mean_rr).reset_index().rename(columns={0: "value"})
|
381
|
-
return paf_data
|
382
|
-
|
383
|
-
|
384
124
|
##############
|
385
125
|
# Validators #
|
386
126
|
##############
|
387
127
|
|
388
128
|
|
389
|
-
def validate_distribution_data_source(builder, risk: EntityString):
|
129
|
+
def validate_distribution_data_source(builder: Builder, risk: EntityString) -> None:
|
390
130
|
"""Checks that the exposure distribution specification is valid."""
|
391
|
-
exposure_type = builder.configuration[risk
|
392
|
-
rebin = builder.configuration[risk
|
393
|
-
category_thresholds = builder.configuration[risk
|
131
|
+
exposure_type = builder.configuration[risk]["data_sources"]["exposure"]
|
132
|
+
rebin = builder.configuration[risk]["rebinned_exposed"]
|
133
|
+
category_thresholds = builder.configuration[risk]["category_thresholds"]
|
394
134
|
|
395
135
|
if risk.type == "alternative_risk_factor":
|
396
136
|
if exposure_type != "data" or rebin:
|
@@ -401,29 +141,20 @@ def validate_distribution_data_source(builder, risk: EntityString):
|
|
401
141
|
if not category_thresholds:
|
402
142
|
raise ValueError("Must specify category thresholds to use alternative risks.")
|
403
143
|
|
404
|
-
elif risk.type in ["risk_factor", "coverage_gap"]:
|
405
|
-
if isinstance(exposure_type, (int, float)) and not 0 <= exposure_type <= 1:
|
406
|
-
raise ValueError(f"Exposure should be in the range [0, 1]")
|
407
|
-
elif isinstance(exposure_type, str) and exposure_type.split(".")[0] not in [
|
408
|
-
"covariate",
|
409
|
-
"data",
|
410
|
-
]:
|
411
|
-
raise ValueError(
|
412
|
-
f"Exposure must be specified as 'data', an integer or float value, "
|
413
|
-
f"or as a string in the format covariate.covariate_name"
|
414
|
-
)
|
415
|
-
else:
|
416
|
-
pass # All good
|
417
|
-
else:
|
144
|
+
elif risk.type not in ["risk_factor", "coverage_gap"]:
|
418
145
|
raise ValueError(f"Unknown risk type {risk.type} for risk {risk.name}")
|
419
146
|
|
420
147
|
|
421
148
|
def validate_relative_risk_data_source(builder, risk: EntityString, target: TargetString):
|
422
|
-
|
423
|
-
|
149
|
+
from vivarium_public_health.risks import RiskEffect
|
150
|
+
|
151
|
+
source_key = RiskEffect.get_name(risk, target)
|
152
|
+
source_config = builder.configuration[source_key]
|
424
153
|
|
425
154
|
provided_keys = set(
|
426
|
-
k
|
155
|
+
k
|
156
|
+
for k, v in source_config["distribution_args"].to_dict().items()
|
157
|
+
if isinstance(v, (int, float))
|
427
158
|
)
|
428
159
|
|
429
160
|
source_map = {
|
@@ -442,24 +173,24 @@ def validate_relative_risk_data_source(builder, risk: EntityString, target: Targ
|
|
442
173
|
source_type = [k for k, v in source_map.items() if provided_keys == v][0]
|
443
174
|
|
444
175
|
if source_type == "relative risk value":
|
445
|
-
if not 1 <=
|
176
|
+
if not 1 <= source_type <= 100:
|
446
177
|
raise ValueError(
|
447
|
-
|
448
|
-
f"
|
178
|
+
"If specifying a single value for relative risk, it should be in the range [1, 100]. "
|
179
|
+
f"You provided {source_type} for {source_key}."
|
449
180
|
)
|
450
181
|
elif source_type == "normal distribution":
|
451
|
-
if
|
182
|
+
if source_config["mean"] <= 0 or source_config["se"] <= 0:
|
452
183
|
raise ValueError(
|
453
184
|
f"To specify parameters for a normal distribution for a risk effect, you must provide"
|
454
185
|
f"both mean and se above 0. This is not the case for {source_key}."
|
455
186
|
)
|
456
187
|
elif source_type == "log distribution":
|
457
|
-
if
|
188
|
+
if source_config["log_mean"] <= 0 or source_config["log_se"] <= 0:
|
458
189
|
raise ValueError(
|
459
190
|
f"To specify parameters for a log distribution for a risk effect, you must provide"
|
460
191
|
f"both log_mean and log_se above 0. This is not the case for {source_key}."
|
461
192
|
)
|
462
|
-
if
|
193
|
+
if source_config["tau_squared"] < 0:
|
463
194
|
raise ValueError(
|
464
195
|
f"To specify parameters for a log distribution for a risk effect, you must provide"
|
465
196
|
f"tau_squared >= 0. This is not the case for {source_key}."
|
@@ -478,13 +209,18 @@ def validate_relative_risk_rebin_source(
|
|
478
209
|
f"Subsetting {risk} relative risk data to {target.name} {target.measure} "
|
479
210
|
"returned an empty DataFrame. Check your artifact."
|
480
211
|
)
|
481
|
-
|
212
|
+
if risk in builder.configuration.to_dict():
|
213
|
+
validate_rebin_source(builder, risk, data)
|
214
|
+
|
215
|
+
|
216
|
+
def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame) -> None:
|
482
217
|
|
218
|
+
if not isinstance(data, pd.DataFrame):
|
219
|
+
return
|
483
220
|
|
484
|
-
|
485
|
-
rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
|
221
|
+
rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
|
486
222
|
|
487
|
-
if rebin_exposed_categories and builder.configuration[risk
|
223
|
+
if rebin_exposed_categories and builder.configuration[risk]["category_thresholds"]:
|
488
224
|
raise ValueError(
|
489
225
|
f"Rebinning and category thresholds are mutually exclusive. "
|
490
226
|
f"You provided both for {risk.name}."
|
@@ -494,20 +230,23 @@ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame):
|
|
494
230
|
f"{risk}.distribution"
|
495
231
|
):
|
496
232
|
raise ValueError(
|
497
|
-
f"Rebinning is only supported for polytomous risks. You provided
|
498
|
-
f
|
233
|
+
f"Rebinning is only supported for polytomous risks. You provided "
|
234
|
+
f"rebinning exposed categoriesfor {risk.name}, which is of "
|
235
|
+
f"type {builder.data.load(f'{risk}.distribution')}."
|
499
236
|
)
|
500
237
|
|
501
238
|
invalid_cats = rebin_exposed_categories.difference(set(data.parameter))
|
502
239
|
if invalid_cats:
|
503
240
|
raise ValueError(
|
504
|
-
f"The following provided categories for the rebinned exposed
|
505
|
-
f"are not found in the exposure data:
|
241
|
+
f"The following provided categories for the rebinned exposed "
|
242
|
+
f"category of {risk.name} are not found in the exposure data: "
|
243
|
+
f"{invalid_cats}."
|
506
244
|
)
|
507
245
|
|
508
246
|
if rebin_exposed_categories == set(data.parameter):
|
509
247
|
raise ValueError(
|
510
|
-
f"The provided categories for the rebinned exposed category of
|
511
|
-
f"categories for the exposure data.
|
512
|
-
f"
|
248
|
+
f"The provided categories for the rebinned exposed category of "
|
249
|
+
f"{risk.name} comprise all categories for the exposure data. "
|
250
|
+
f"At least one category must be left out of the provided categories "
|
251
|
+
f"to be rebinned into the unexposed category."
|
513
252
|
)
|