vivarium-public-health 2.3.2__py3-none-any.whl → 3.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. vivarium_public_health/_version.py +1 -1
  2. vivarium_public_health/disease/model.py +23 -21
  3. vivarium_public_health/disease/models.py +1 -0
  4. vivarium_public_health/disease/special_disease.py +40 -41
  5. vivarium_public_health/disease/state.py +42 -125
  6. vivarium_public_health/disease/transition.py +70 -27
  7. vivarium_public_health/mslt/delay.py +1 -0
  8. vivarium_public_health/mslt/disease.py +1 -0
  9. vivarium_public_health/mslt/intervention.py +1 -0
  10. vivarium_public_health/mslt/magic_wand_components.py +1 -0
  11. vivarium_public_health/mslt/observer.py +1 -0
  12. vivarium_public_health/mslt/population.py +1 -0
  13. vivarium_public_health/plugins/parser.py +61 -31
  14. vivarium_public_health/population/add_new_birth_cohorts.py +2 -3
  15. vivarium_public_health/population/base_population.py +2 -1
  16. vivarium_public_health/population/mortality.py +83 -80
  17. vivarium_public_health/{metrics → results}/__init__.py +2 -0
  18. vivarium_public_health/results/columns.py +22 -0
  19. vivarium_public_health/results/disability.py +187 -0
  20. vivarium_public_health/results/disease.py +222 -0
  21. vivarium_public_health/results/mortality.py +186 -0
  22. vivarium_public_health/results/observer.py +78 -0
  23. vivarium_public_health/results/risk.py +138 -0
  24. vivarium_public_health/results/simple_cause.py +18 -0
  25. vivarium_public_health/{metrics → results}/stratification.py +10 -8
  26. vivarium_public_health/risks/__init__.py +1 -2
  27. vivarium_public_health/risks/base_risk.py +134 -29
  28. vivarium_public_health/risks/data_transformations.py +65 -326
  29. vivarium_public_health/risks/distributions.py +315 -145
  30. vivarium_public_health/risks/effect.py +376 -75
  31. vivarium_public_health/risks/implementations/low_birth_weight_and_short_gestation.py +61 -89
  32. vivarium_public_health/treatment/magic_wand.py +1 -0
  33. vivarium_public_health/treatment/scale_up.py +1 -0
  34. vivarium_public_health/treatment/therapeutic_inertia.py +1 -0
  35. vivarium_public_health/utilities.py +17 -2
  36. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/METADATA +13 -3
  37. vivarium_public_health-3.0.0.dist-info/RECORD +49 -0
  38. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/WHEEL +1 -1
  39. vivarium_public_health/metrics/disability.py +0 -118
  40. vivarium_public_health/metrics/disease.py +0 -136
  41. vivarium_public_health/metrics/mortality.py +0 -144
  42. vivarium_public_health/metrics/risk.py +0 -110
  43. vivarium_public_health/testing/__init__.py +0 -0
  44. vivarium_public_health/testing/mock_artifact.py +0 -145
  45. vivarium_public_health/testing/utils.py +0 -71
  46. vivarium_public_health-2.3.2.dist-info/RECORD +0 -49
  47. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/LICENSE.txt +0 -0
  48. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,9 @@ risk data and performing any necessary data transformations.
8
8
 
9
9
  """
10
10
 
11
- from typing import Union
12
-
13
11
  import numpy as np
14
12
  import pandas as pd
15
- from loguru import logger
13
+ from vivarium.framework.engine import Builder
16
14
 
17
15
  from vivarium_public_health.utilities import EntityString, TargetString
18
16
 
@@ -21,12 +19,24 @@ from vivarium_public_health.utilities import EntityString, TargetString
21
19
  #############
22
20
 
23
21
 
24
- def pivot_categorical(data: pd.DataFrame) -> pd.DataFrame:
22
+ def pivot_categorical(
23
+ builder: Builder,
24
+ risk: EntityString,
25
+ data: pd.DataFrame,
26
+ pivot_column: str = "parameter",
27
+ reset_index: bool = True,
28
+ ) -> pd.DataFrame:
25
29
  """Pivots data that is long on categories to be wide."""
26
- key_cols = ["sex", "age_start", "age_end", "year_start", "year_end"]
27
- key_cols = [k for k in key_cols if k in data.columns]
28
- data = data.pivot_table(index=key_cols, columns="parameter", values="value").reset_index()
30
+ # todo remove dependency on artifact manager having exactly one value column
31
+ value_column = builder.data.value_columns()(f"{risk}.exposure")[0]
32
+ index_cols = [
33
+ column for column in data.columns if column not in [value_column, pivot_column]
34
+ ]
35
+ data = data.pivot_table(index=index_cols, columns=pivot_column, values=value_column)
36
+ if reset_index:
37
+ data = data.reset_index()
29
38
  data.columns.name = None
39
+
30
40
  return data
31
41
 
32
42
 
@@ -35,14 +45,8 @@ def pivot_categorical(data: pd.DataFrame) -> pd.DataFrame:
35
45
  ##########################
36
46
 
37
47
 
38
- def get_distribution_data(builder, risk: EntityString):
39
- validate_distribution_data_source(builder, risk)
40
- data = load_distribution_data(builder, risk)
41
- return data
42
-
43
-
44
- def get_exposure_post_processor(builder, risk: EntityString):
45
- thresholds = builder.configuration[risk.name]["category_thresholds"]
48
+ def get_exposure_post_processor(builder, risk: str):
49
+ thresholds = builder.configuration[risk]["category_thresholds"]
46
50
 
47
51
  if thresholds:
48
52
  thresholds = [-np.inf] + thresholds + [np.inf]
@@ -59,111 +63,10 @@ def get_exposure_post_processor(builder, risk: EntityString):
59
63
  return post_processor
60
64
 
61
65
 
62
- def load_distribution_data(builder, risk: EntityString):
63
- exposure_data = get_exposure_data(builder, risk)
64
-
65
- data = {
66
- "distribution_type": get_distribution_type(builder, risk),
67
- "exposure": exposure_data,
68
- "exposure_standard_deviation": get_exposure_standard_deviation_data(builder, risk),
69
- "weights": get_exposure_distribution_weights(builder, risk),
70
- }
71
- return data
72
-
73
-
74
- def get_distribution_type(builder, risk: EntityString):
75
- risk_config = builder.configuration[risk.name]
76
-
77
- if risk_config["exposure"] == "data" and not risk_config["rebinned_exposed"]:
78
- distribution_type = builder.data.load(f"{risk}.distribution")
79
- else:
80
- distribution_type = "dichotomous"
81
-
82
- return distribution_type
83
-
84
-
85
- def get_exposure_data(builder, risk: EntityString):
86
- exposure_data = load_exposure_data(builder, risk)
87
- exposure_data = rebin_exposure_data(builder, risk, exposure_data)
88
-
89
- if get_distribution_type(builder, risk) in [
90
- "dichotomous",
91
- "ordered_polytomous",
92
- "unordered_polytomous",
93
- "lbwsg",
94
- ]:
95
- exposure_data = pivot_categorical(exposure_data)
96
-
97
- return exposure_data
98
-
99
-
100
- def load_exposure_data(builder, risk: EntityString):
101
- risk_config = builder.configuration[risk.name]
102
- exposure_source = risk_config["exposure"]
103
-
104
- if exposure_source == "data":
105
- exposure_data = builder.data.load(f"{risk}.exposure")
106
- else:
107
- if isinstance(exposure_source, str): # Build from covariate
108
- cat1 = builder.data.load(f"{exposure_source}.estimate")
109
- # TODO: Generate a draw.
110
- cat1 = cat1[cat1["parameter"] == "mean_value"]
111
- cat1["parameter"] = "cat1"
112
- else: # We have a numerical value
113
- cat1 = builder.data.load("population.demographic_dimensions")
114
- cat1["parameter"] = "cat1"
115
- cat1["value"] = float(exposure_source)
116
- cat2 = cat1.copy()
117
- cat2["parameter"] = "cat2"
118
- cat2["value"] = 1 - cat2["value"]
119
- exposure_data = pd.concat([cat1, cat2], ignore_index=True)
120
-
121
- return exposure_data
122
-
123
-
124
- def get_exposure_standard_deviation_data(builder, risk: EntityString):
125
- distribution_type = get_distribution_type(builder, risk)
126
- if distribution_type in ["normal", "lognormal", "ensemble"]:
127
- exposure_sd = builder.data.load(f"{risk}.exposure_standard_deviation")
128
- else:
129
- exposure_sd = None
130
- return exposure_sd
131
-
132
-
133
- def get_exposure_distribution_weights(builder, risk: EntityString):
134
- distribution_type = get_distribution_type(builder, risk)
135
- if distribution_type == "ensemble":
136
- weights = builder.data.load(f"{risk}.exposure_distribution_weights")
137
- weights = pivot_categorical(weights)
138
- if "glnorm" in weights.columns:
139
- if np.any(weights["glnorm"]):
140
- raise NotImplementedError("glnorm distribution is not supported")
141
- weights = weights.drop(columns=["glnorm"])
142
- else:
143
- weights = None
144
- return weights
145
-
146
-
147
- def rebin_exposure_data(builder, risk: EntityString, exposure_data: pd.DataFrame):
148
- validate_rebin_source(builder, risk, exposure_data)
149
- rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
150
-
151
- if rebin_exposed_categories:
152
- exposure_data = _rebin_exposure_data(exposure_data, rebin_exposed_categories)
153
-
154
- return exposure_data
155
-
156
-
157
- def _rebin_exposure_data(
158
- exposure_data: pd.DataFrame, rebin_exposed_categories: set
159
- ) -> pd.DataFrame:
160
- exposure_data["parameter"] = exposure_data["parameter"].map(
161
- lambda p: "cat1" if p in rebin_exposed_categories else "cat2"
162
- )
163
- return (
164
- exposure_data.groupby(list(exposure_data.columns.difference(["value"])))
165
- .sum()
166
- .reset_index()
66
+ def load_exposure_data(builder: Builder, risk: EntityString) -> pd.DataFrame:
67
+ risk_component = builder.components.get_component(risk)
68
+ return risk_component.get_data(
69
+ builder, builder.configuration[risk_component.name]["data_sources"]["exposure"]
167
70
  )
168
71
 
169
72
 
@@ -172,111 +75,6 @@ def _rebin_exposure_data(
172
75
  ###############################
173
76
 
174
77
 
175
- def get_relative_risk_data(builder, risk: EntityString, target: TargetString):
176
- source_type = validate_relative_risk_data_source(builder, risk, target)
177
- relative_risk_data = load_relative_risk_data(builder, risk, target, source_type)
178
- validate_relative_risk_rebin_source(builder, risk, target, relative_risk_data)
179
- relative_risk_data = rebin_relative_risk_data(builder, risk, relative_risk_data)
180
-
181
- if get_distribution_type(builder, risk) in [
182
- "dichotomous",
183
- "ordered_polytomous",
184
- "unordered_polytomous",
185
- ]:
186
- relative_risk_data = pivot_categorical(relative_risk_data)
187
- # Check if any values for relative risk are below expected boundary of 1.0
188
- category_columns = [c for c in relative_risk_data.columns if "cat" in c]
189
- if not relative_risk_data[
190
- (relative_risk_data[category_columns] < 1.0).any(axis=1)
191
- ].empty:
192
- logger.warning(
193
- f"WARNING: Some data values are below the expected boundary of 1.0 for {risk}.relative_risk"
194
- )
195
-
196
- else:
197
- relative_risk_data = relative_risk_data.drop(columns=["parameter"])
198
-
199
- return relative_risk_data
200
-
201
-
202
- def load_relative_risk_data(
203
- builder, risk: EntityString, target: TargetString, source_type: str
204
- ):
205
- relative_risk_source = builder.configuration[f"effect_of_{risk.name}_on_{target.name}"][
206
- target.measure
207
- ]
208
-
209
- if source_type == "data":
210
- relative_risk_data = builder.data.load(f"{risk}.relative_risk")
211
- correct_target = (relative_risk_data["affected_entity"] == target.name) & (
212
- relative_risk_data["affected_measure"] == target.measure
213
- )
214
- relative_risk_data = relative_risk_data[correct_target].drop(
215
- columns=["affected_entity", "affected_measure"]
216
- )
217
-
218
- elif source_type == "relative risk value":
219
- relative_risk_data = _make_relative_risk_data(
220
- builder, float(relative_risk_source["relative_risk"])
221
- )
222
-
223
- else: # distribution
224
- parameters = {
225
- k: v for k, v in relative_risk_source.to_dict().items() if v is not None
226
- }
227
- random_state = np.random.RandomState(
228
- builder.randomness.get_seed(
229
- f"effect_of_{risk.name}_on_{target.name}.{target.measure}"
230
- )
231
- )
232
- cat1_value = generate_relative_risk_from_distribution(random_state, parameters)
233
- relative_risk_data = _make_relative_risk_data(builder, cat1_value)
234
-
235
- return relative_risk_data
236
-
237
-
238
- def generate_relative_risk_from_distribution(
239
- random_state: np.random.RandomState, parameters: dict
240
- ) -> Union[float, pd.Series, np.ndarray]:
241
- first = pd.Series(list(parameters.values())[0])
242
- length = len(first)
243
- index = first.index
244
-
245
- for v in parameters.values():
246
- if length != len(pd.Series(v)) or not index.equals(pd.Series(v).index):
247
- raise ValueError(
248
- "If specifying vectorized parameters, all parameters "
249
- "must be the same length and have the same index."
250
- )
251
-
252
- if "mean" in parameters: # normal distribution
253
- rr_value = random_state.normal(parameters["mean"], parameters["se"])
254
- elif "log_mean" in parameters: # log distribution
255
- log_value = parameters["log_mean"] + parameters["log_se"] * random_state.randn()
256
- if parameters["tau_squared"]:
257
- log_value += random_state.normal(0, parameters["tau_squared"])
258
- rr_value = np.exp(log_value)
259
- else:
260
- raise NotImplementedError(
261
- f"Only normal distributions (supplying mean and se) and log distributions "
262
- f"(supplying log_mean, log_se, and tau_squared) are currently supported."
263
- )
264
-
265
- rr_value = np.maximum(1, rr_value)
266
-
267
- return rr_value
268
-
269
-
270
- def _make_relative_risk_data(builder, cat1_value: float) -> pd.DataFrame:
271
- cat1 = builder.data.load("population.demographic_dimensions")
272
- cat1["parameter"] = "cat1"
273
- cat1["value"] = cat1_value
274
- cat2 = cat1.copy()
275
- cat2["parameter"] = "cat2"
276
- cat2["value"] = 1
277
- return pd.concat([cat1, cat2], ignore_index=True)
278
-
279
-
280
78
  def rebin_relative_risk_data(
281
79
  builder, risk: EntityString, relative_risk_data: pd.DataFrame
282
80
  ) -> pd.DataFrame:
@@ -287,9 +85,13 @@ def rebin_relative_risk_data(
287
85
  for the matching rr = [rr1, rr2, rr3, 1], rebinned rr for the rebinned cat1 should be:
288
86
  (0.1 *rr1 + 0.2 * rr2 + 0.3* rr3) / (0.1+0.2+0.3)
289
87
  """
290
- rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
88
+ if not risk in builder.configuration.to_dict():
89
+ return relative_risk_data
90
+
91
+ rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
291
92
 
292
93
  if rebin_exposed_categories:
94
+ # todo make sure this works
293
95
  exposure_data = load_exposure_data(builder, risk)
294
96
  relative_risk_data = _rebin_relative_risk_data(
295
97
  relative_risk_data, exposure_data, rebin_exposed_categories
@@ -319,78 +121,16 @@ def _rebin_relative_risk_data(
319
121
  return relative_risk_data.drop(columns=["value_x", "value_y"])
320
122
 
321
123
 
322
- def get_exposure_effect(builder, risk: EntityString):
323
- distribution_type = get_distribution_type(builder, risk)
324
- risk_exposure = builder.value.get_value(f"{risk.name}.exposure")
325
-
326
- if distribution_type in ["normal", "lognormal", "ensemble"]:
327
- tmred = builder.data.load(f"{risk}.tmred")
328
- tmrel = 0.5 * (tmred["min"] + tmred["max"])
329
- scale = builder.data.load(f"{risk}.relative_risk_scalar")
330
-
331
- def exposure_effect(rates, rr):
332
- exposure = risk_exposure(rr.index)
333
- relative_risk = np.maximum(rr.values ** ((exposure - tmrel) / scale), 1)
334
- return rates * relative_risk
335
-
336
- else:
337
-
338
- def exposure_effect(rates, rr: pd.DataFrame) -> pd.Series:
339
- index_columns = ["index", risk.name]
340
-
341
- exposure = risk_exposure(rr.index).reset_index()
342
- exposure.columns = index_columns
343
- exposure = exposure.set_index(index_columns)
344
-
345
- relative_risk = rr.stack().reset_index()
346
- relative_risk.columns = index_columns + ["value"]
347
- relative_risk = relative_risk.set_index(index_columns)
348
-
349
- effect = relative_risk.loc[exposure.index, "value"].droplevel(risk.name)
350
- affected_rates = rates * effect
351
- return affected_rates
352
-
353
- return exposure_effect
354
-
355
-
356
- ##################################################
357
- # Population attributable fraction data handlers #
358
- ##################################################
359
-
360
-
361
- def get_population_attributable_fraction_data(
362
- builder, risk: EntityString, target: TargetString
363
- ):
364
- exposure_source = builder.configuration[f"{risk.name}"]["exposure"]
365
- rr_source_type = validate_relative_risk_data_source(builder, risk, target)
366
-
367
- if exposure_source == "data" and rr_source_type == "data" and risk.type == "risk_factor":
368
- paf_data = builder.data.load(f"{risk}.population_attributable_fraction")
369
- correct_target = (paf_data["affected_entity"] == target.name) & (
370
- paf_data["affected_measure"] == target.measure
371
- )
372
- paf_data = paf_data[correct_target].drop(
373
- columns=["affected_entity", "affected_measure"]
374
- )
375
- else:
376
- key_cols = ["sex", "age_start", "age_end", "year_start", "year_end"]
377
- exposure_data = get_exposure_data(builder, risk).set_index(key_cols)
378
- relative_risk_data = get_relative_risk_data(builder, risk, target).set_index(key_cols)
379
- mean_rr = (exposure_data * relative_risk_data).sum(axis=1)
380
- paf_data = ((mean_rr - 1) / mean_rr).reset_index().rename(columns={0: "value"})
381
- return paf_data
382
-
383
-
384
124
  ##############
385
125
  # Validators #
386
126
  ##############
387
127
 
388
128
 
389
- def validate_distribution_data_source(builder, risk: EntityString):
129
+ def validate_distribution_data_source(builder: Builder, risk: EntityString) -> None:
390
130
  """Checks that the exposure distribution specification is valid."""
391
- exposure_type = builder.configuration[risk.name]["exposure"]
392
- rebin = builder.configuration[risk.name]["rebinned_exposed"]
393
- category_thresholds = builder.configuration[risk.name]["category_thresholds"]
131
+ exposure_type = builder.configuration[risk]["data_sources"]["exposure"]
132
+ rebin = builder.configuration[risk]["rebinned_exposed"]
133
+ category_thresholds = builder.configuration[risk]["category_thresholds"]
394
134
 
395
135
  if risk.type == "alternative_risk_factor":
396
136
  if exposure_type != "data" or rebin:
@@ -401,29 +141,20 @@ def validate_distribution_data_source(builder, risk: EntityString):
401
141
  if not category_thresholds:
402
142
  raise ValueError("Must specify category thresholds to use alternative risks.")
403
143
 
404
- elif risk.type in ["risk_factor", "coverage_gap"]:
405
- if isinstance(exposure_type, (int, float)) and not 0 <= exposure_type <= 1:
406
- raise ValueError(f"Exposure should be in the range [0, 1]")
407
- elif isinstance(exposure_type, str) and exposure_type.split(".")[0] not in [
408
- "covariate",
409
- "data",
410
- ]:
411
- raise ValueError(
412
- f"Exposure must be specified as 'data', an integer or float value, "
413
- f"or as a string in the format covariate.covariate_name"
414
- )
415
- else:
416
- pass # All good
417
- else:
144
+ elif risk.type not in ["risk_factor", "coverage_gap"]:
418
145
  raise ValueError(f"Unknown risk type {risk.type} for risk {risk.name}")
419
146
 
420
147
 
421
148
  def validate_relative_risk_data_source(builder, risk: EntityString, target: TargetString):
422
- source_key = f"effect_of_{risk.name}_on_{target.name}"
423
- relative_risk_source = builder.configuration[source_key][target.measure]
149
+ from vivarium_public_health.risks import RiskEffect
150
+
151
+ source_key = RiskEffect.get_name(risk, target)
152
+ source_config = builder.configuration[source_key]
424
153
 
425
154
  provided_keys = set(
426
- k for k, v in relative_risk_source.to_dict().items() if isinstance(v, (int, float))
155
+ k
156
+ for k, v in source_config["distribution_args"].to_dict().items()
157
+ if isinstance(v, (int, float))
427
158
  )
428
159
 
429
160
  source_map = {
@@ -442,24 +173,24 @@ def validate_relative_risk_data_source(builder, risk: EntityString, target: Targ
442
173
  source_type = [k for k, v in source_map.items() if provided_keys == v][0]
443
174
 
444
175
  if source_type == "relative risk value":
445
- if not 1 <= relative_risk_source["relative_risk"] <= 100:
176
+ if not 1 <= source_type <= 100:
446
177
  raise ValueError(
447
- f"If specifying a single value for relative risk, it should be in the "
448
- f"range [1, 100]. You provided {relative_risk_source['relative_risk']} for {source_key}."
178
+ "If specifying a single value for relative risk, it should be in the range [1, 100]. "
179
+ f"You provided {source_type} for {source_key}."
449
180
  )
450
181
  elif source_type == "normal distribution":
451
- if relative_risk_source["mean"] <= 0 or relative_risk_source["se"] <= 0:
182
+ if source_config["mean"] <= 0 or source_config["se"] <= 0:
452
183
  raise ValueError(
453
184
  f"To specify parameters for a normal distribution for a risk effect, you must provide"
454
185
  f"both mean and se above 0. This is not the case for {source_key}."
455
186
  )
456
187
  elif source_type == "log distribution":
457
- if relative_risk_source["log_mean"] <= 0 or relative_risk_source["log_se"] <= 0:
188
+ if source_config["log_mean"] <= 0 or source_config["log_se"] <= 0:
458
189
  raise ValueError(
459
190
  f"To specify parameters for a log distribution for a risk effect, you must provide"
460
191
  f"both log_mean and log_se above 0. This is not the case for {source_key}."
461
192
  )
462
- if relative_risk_source["tau_squared"] < 0:
193
+ if source_config["tau_squared"] < 0:
463
194
  raise ValueError(
464
195
  f"To specify parameters for a log distribution for a risk effect, you must provide"
465
196
  f"tau_squared >= 0. This is not the case for {source_key}."
@@ -478,13 +209,18 @@ def validate_relative_risk_rebin_source(
478
209
  f"Subsetting {risk} relative risk data to {target.name} {target.measure} "
479
210
  "returned an empty DataFrame. Check your artifact."
480
211
  )
481
- validate_rebin_source(builder, risk, data)
212
+ if risk in builder.configuration.to_dict():
213
+ validate_rebin_source(builder, risk, data)
214
+
215
+
216
+ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame) -> None:
482
217
 
218
+ if not isinstance(data, pd.DataFrame):
219
+ return
483
220
 
484
- def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame):
485
- rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
221
+ rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
486
222
 
487
- if rebin_exposed_categories and builder.configuration[risk.name]["category_thresholds"]:
223
+ if rebin_exposed_categories and builder.configuration[risk]["category_thresholds"]:
488
224
  raise ValueError(
489
225
  f"Rebinning and category thresholds are mutually exclusive. "
490
226
  f"You provided both for {risk.name}."
@@ -494,20 +230,23 @@ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame):
494
230
  f"{risk}.distribution"
495
231
  ):
496
232
  raise ValueError(
497
- f"Rebinning is only supported for polytomous risks. You provided rebinning exposed categories"
498
- f'for {risk.name}, which is of type {builder.data.load(f"{risk}.distribution")}.'
233
+ f"Rebinning is only supported for polytomous risks. You provided "
234
+ f"rebinning exposed categoriesfor {risk.name}, which is of "
235
+ f"type {builder.data.load(f'{risk}.distribution')}."
499
236
  )
500
237
 
501
238
  invalid_cats = rebin_exposed_categories.difference(set(data.parameter))
502
239
  if invalid_cats:
503
240
  raise ValueError(
504
- f"The following provided categories for the rebinned exposed category of {risk.name} "
505
- f"are not found in the exposure data: {invalid_cats}."
241
+ f"The following provided categories for the rebinned exposed "
242
+ f"category of {risk.name} are not found in the exposure data: "
243
+ f"{invalid_cats}."
506
244
  )
507
245
 
508
246
  if rebin_exposed_categories == set(data.parameter):
509
247
  raise ValueError(
510
- f"The provided categories for the rebinned exposed category of {risk.name} comprise all "
511
- f"categories for the exposure data. At least one category must be left out of the provided "
512
- f"categories to be rebinned into the unexposed category."
248
+ f"The provided categories for the rebinned exposed category of "
249
+ f"{risk.name} comprise all categories for the exposure data. "
250
+ f"At least one category must be left out of the provided categories "
251
+ f"to be rebinned into the unexposed category."
513
252
  )