vivarium-public-health 2.3.2__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. vivarium_public_health/_version.py +1 -1
  2. vivarium_public_health/disease/model.py +23 -21
  3. vivarium_public_health/disease/models.py +1 -0
  4. vivarium_public_health/disease/special_disease.py +40 -41
  5. vivarium_public_health/disease/state.py +42 -125
  6. vivarium_public_health/disease/transition.py +70 -27
  7. vivarium_public_health/mslt/delay.py +1 -0
  8. vivarium_public_health/mslt/disease.py +1 -0
  9. vivarium_public_health/mslt/intervention.py +1 -0
  10. vivarium_public_health/mslt/magic_wand_components.py +1 -0
  11. vivarium_public_health/mslt/observer.py +1 -0
  12. vivarium_public_health/mslt/population.py +1 -0
  13. vivarium_public_health/plugins/parser.py +61 -31
  14. vivarium_public_health/population/add_new_birth_cohorts.py +2 -3
  15. vivarium_public_health/population/base_population.py +2 -1
  16. vivarium_public_health/population/mortality.py +83 -80
  17. vivarium_public_health/{metrics → results}/__init__.py +2 -0
  18. vivarium_public_health/results/columns.py +22 -0
  19. vivarium_public_health/results/disability.py +187 -0
  20. vivarium_public_health/results/disease.py +222 -0
  21. vivarium_public_health/results/mortality.py +186 -0
  22. vivarium_public_health/results/observer.py +78 -0
  23. vivarium_public_health/results/risk.py +138 -0
  24. vivarium_public_health/results/simple_cause.py +18 -0
  25. vivarium_public_health/{metrics → results}/stratification.py +10 -8
  26. vivarium_public_health/risks/__init__.py +1 -2
  27. vivarium_public_health/risks/base_risk.py +134 -29
  28. vivarium_public_health/risks/data_transformations.py +65 -326
  29. vivarium_public_health/risks/distributions.py +315 -145
  30. vivarium_public_health/risks/effect.py +376 -75
  31. vivarium_public_health/risks/implementations/low_birth_weight_and_short_gestation.py +61 -89
  32. vivarium_public_health/treatment/magic_wand.py +1 -0
  33. vivarium_public_health/treatment/scale_up.py +1 -0
  34. vivarium_public_health/treatment/therapeutic_inertia.py +1 -0
  35. vivarium_public_health/utilities.py +17 -2
  36. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/METADATA +13 -3
  37. vivarium_public_health-3.0.0.dist-info/RECORD +49 -0
  38. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/WHEEL +1 -1
  39. vivarium_public_health/metrics/disability.py +0 -118
  40. vivarium_public_health/metrics/disease.py +0 -136
  41. vivarium_public_health/metrics/mortality.py +0 -144
  42. vivarium_public_health/metrics/risk.py +0 -110
  43. vivarium_public_health/testing/__init__.py +0 -0
  44. vivarium_public_health/testing/mock_artifact.py +0 -145
  45. vivarium_public_health/testing/utils.py +0 -71
  46. vivarium_public_health-2.3.2.dist-info/RECORD +0 -49
  47. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/LICENSE.txt +0 -0
  48. {vivarium_public_health-2.3.2.dist-info → vivarium_public_health-3.0.0.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,9 @@ risk data and performing any necessary data transformations.
8
8
 
9
9
  """
10
10
 
11
- from typing import Union
12
-
13
11
  import numpy as np
14
12
  import pandas as pd
15
- from loguru import logger
13
+ from vivarium.framework.engine import Builder
16
14
 
17
15
  from vivarium_public_health.utilities import EntityString, TargetString
18
16
 
@@ -21,12 +19,24 @@ from vivarium_public_health.utilities import EntityString, TargetString
21
19
  #############
22
20
 
23
21
 
24
- def pivot_categorical(data: pd.DataFrame) -> pd.DataFrame:
22
+ def pivot_categorical(
23
+ builder: Builder,
24
+ risk: EntityString,
25
+ data: pd.DataFrame,
26
+ pivot_column: str = "parameter",
27
+ reset_index: bool = True,
28
+ ) -> pd.DataFrame:
25
29
  """Pivots data that is long on categories to be wide."""
26
- key_cols = ["sex", "age_start", "age_end", "year_start", "year_end"]
27
- key_cols = [k for k in key_cols if k in data.columns]
28
- data = data.pivot_table(index=key_cols, columns="parameter", values="value").reset_index()
30
+ # todo remove dependency on artifact manager having exactly one value column
31
+ value_column = builder.data.value_columns()(f"{risk}.exposure")[0]
32
+ index_cols = [
33
+ column for column in data.columns if column not in [value_column, pivot_column]
34
+ ]
35
+ data = data.pivot_table(index=index_cols, columns=pivot_column, values=value_column)
36
+ if reset_index:
37
+ data = data.reset_index()
29
38
  data.columns.name = None
39
+
30
40
  return data
31
41
 
32
42
 
@@ -35,14 +45,8 @@ def pivot_categorical(data: pd.DataFrame) -> pd.DataFrame:
35
45
  ##########################
36
46
 
37
47
 
38
- def get_distribution_data(builder, risk: EntityString):
39
- validate_distribution_data_source(builder, risk)
40
- data = load_distribution_data(builder, risk)
41
- return data
42
-
43
-
44
- def get_exposure_post_processor(builder, risk: EntityString):
45
- thresholds = builder.configuration[risk.name]["category_thresholds"]
48
+ def get_exposure_post_processor(builder, risk: str):
49
+ thresholds = builder.configuration[risk]["category_thresholds"]
46
50
 
47
51
  if thresholds:
48
52
  thresholds = [-np.inf] + thresholds + [np.inf]
@@ -59,111 +63,10 @@ def get_exposure_post_processor(builder, risk: EntityString):
59
63
  return post_processor
60
64
 
61
65
 
62
- def load_distribution_data(builder, risk: EntityString):
63
- exposure_data = get_exposure_data(builder, risk)
64
-
65
- data = {
66
- "distribution_type": get_distribution_type(builder, risk),
67
- "exposure": exposure_data,
68
- "exposure_standard_deviation": get_exposure_standard_deviation_data(builder, risk),
69
- "weights": get_exposure_distribution_weights(builder, risk),
70
- }
71
- return data
72
-
73
-
74
- def get_distribution_type(builder, risk: EntityString):
75
- risk_config = builder.configuration[risk.name]
76
-
77
- if risk_config["exposure"] == "data" and not risk_config["rebinned_exposed"]:
78
- distribution_type = builder.data.load(f"{risk}.distribution")
79
- else:
80
- distribution_type = "dichotomous"
81
-
82
- return distribution_type
83
-
84
-
85
- def get_exposure_data(builder, risk: EntityString):
86
- exposure_data = load_exposure_data(builder, risk)
87
- exposure_data = rebin_exposure_data(builder, risk, exposure_data)
88
-
89
- if get_distribution_type(builder, risk) in [
90
- "dichotomous",
91
- "ordered_polytomous",
92
- "unordered_polytomous",
93
- "lbwsg",
94
- ]:
95
- exposure_data = pivot_categorical(exposure_data)
96
-
97
- return exposure_data
98
-
99
-
100
- def load_exposure_data(builder, risk: EntityString):
101
- risk_config = builder.configuration[risk.name]
102
- exposure_source = risk_config["exposure"]
103
-
104
- if exposure_source == "data":
105
- exposure_data = builder.data.load(f"{risk}.exposure")
106
- else:
107
- if isinstance(exposure_source, str): # Build from covariate
108
- cat1 = builder.data.load(f"{exposure_source}.estimate")
109
- # TODO: Generate a draw.
110
- cat1 = cat1[cat1["parameter"] == "mean_value"]
111
- cat1["parameter"] = "cat1"
112
- else: # We have a numerical value
113
- cat1 = builder.data.load("population.demographic_dimensions")
114
- cat1["parameter"] = "cat1"
115
- cat1["value"] = float(exposure_source)
116
- cat2 = cat1.copy()
117
- cat2["parameter"] = "cat2"
118
- cat2["value"] = 1 - cat2["value"]
119
- exposure_data = pd.concat([cat1, cat2], ignore_index=True)
120
-
121
- return exposure_data
122
-
123
-
124
- def get_exposure_standard_deviation_data(builder, risk: EntityString):
125
- distribution_type = get_distribution_type(builder, risk)
126
- if distribution_type in ["normal", "lognormal", "ensemble"]:
127
- exposure_sd = builder.data.load(f"{risk}.exposure_standard_deviation")
128
- else:
129
- exposure_sd = None
130
- return exposure_sd
131
-
132
-
133
- def get_exposure_distribution_weights(builder, risk: EntityString):
134
- distribution_type = get_distribution_type(builder, risk)
135
- if distribution_type == "ensemble":
136
- weights = builder.data.load(f"{risk}.exposure_distribution_weights")
137
- weights = pivot_categorical(weights)
138
- if "glnorm" in weights.columns:
139
- if np.any(weights["glnorm"]):
140
- raise NotImplementedError("glnorm distribution is not supported")
141
- weights = weights.drop(columns=["glnorm"])
142
- else:
143
- weights = None
144
- return weights
145
-
146
-
147
- def rebin_exposure_data(builder, risk: EntityString, exposure_data: pd.DataFrame):
148
- validate_rebin_source(builder, risk, exposure_data)
149
- rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
150
-
151
- if rebin_exposed_categories:
152
- exposure_data = _rebin_exposure_data(exposure_data, rebin_exposed_categories)
153
-
154
- return exposure_data
155
-
156
-
157
- def _rebin_exposure_data(
158
- exposure_data: pd.DataFrame, rebin_exposed_categories: set
159
- ) -> pd.DataFrame:
160
- exposure_data["parameter"] = exposure_data["parameter"].map(
161
- lambda p: "cat1" if p in rebin_exposed_categories else "cat2"
162
- )
163
- return (
164
- exposure_data.groupby(list(exposure_data.columns.difference(["value"])))
165
- .sum()
166
- .reset_index()
66
+ def load_exposure_data(builder: Builder, risk: EntityString) -> pd.DataFrame:
67
+ risk_component = builder.components.get_component(risk)
68
+ return risk_component.get_data(
69
+ builder, builder.configuration[risk_component.name]["data_sources"]["exposure"]
167
70
  )
168
71
 
169
72
 
@@ -172,111 +75,6 @@ def _rebin_exposure_data(
172
75
  ###############################
173
76
 
174
77
 
175
- def get_relative_risk_data(builder, risk: EntityString, target: TargetString):
176
- source_type = validate_relative_risk_data_source(builder, risk, target)
177
- relative_risk_data = load_relative_risk_data(builder, risk, target, source_type)
178
- validate_relative_risk_rebin_source(builder, risk, target, relative_risk_data)
179
- relative_risk_data = rebin_relative_risk_data(builder, risk, relative_risk_data)
180
-
181
- if get_distribution_type(builder, risk) in [
182
- "dichotomous",
183
- "ordered_polytomous",
184
- "unordered_polytomous",
185
- ]:
186
- relative_risk_data = pivot_categorical(relative_risk_data)
187
- # Check if any values for relative risk are below expected boundary of 1.0
188
- category_columns = [c for c in relative_risk_data.columns if "cat" in c]
189
- if not relative_risk_data[
190
- (relative_risk_data[category_columns] < 1.0).any(axis=1)
191
- ].empty:
192
- logger.warning(
193
- f"WARNING: Some data values are below the expected boundary of 1.0 for {risk}.relative_risk"
194
- )
195
-
196
- else:
197
- relative_risk_data = relative_risk_data.drop(columns=["parameter"])
198
-
199
- return relative_risk_data
200
-
201
-
202
- def load_relative_risk_data(
203
- builder, risk: EntityString, target: TargetString, source_type: str
204
- ):
205
- relative_risk_source = builder.configuration[f"effect_of_{risk.name}_on_{target.name}"][
206
- target.measure
207
- ]
208
-
209
- if source_type == "data":
210
- relative_risk_data = builder.data.load(f"{risk}.relative_risk")
211
- correct_target = (relative_risk_data["affected_entity"] == target.name) & (
212
- relative_risk_data["affected_measure"] == target.measure
213
- )
214
- relative_risk_data = relative_risk_data[correct_target].drop(
215
- columns=["affected_entity", "affected_measure"]
216
- )
217
-
218
- elif source_type == "relative risk value":
219
- relative_risk_data = _make_relative_risk_data(
220
- builder, float(relative_risk_source["relative_risk"])
221
- )
222
-
223
- else: # distribution
224
- parameters = {
225
- k: v for k, v in relative_risk_source.to_dict().items() if v is not None
226
- }
227
- random_state = np.random.RandomState(
228
- builder.randomness.get_seed(
229
- f"effect_of_{risk.name}_on_{target.name}.{target.measure}"
230
- )
231
- )
232
- cat1_value = generate_relative_risk_from_distribution(random_state, parameters)
233
- relative_risk_data = _make_relative_risk_data(builder, cat1_value)
234
-
235
- return relative_risk_data
236
-
237
-
238
- def generate_relative_risk_from_distribution(
239
- random_state: np.random.RandomState, parameters: dict
240
- ) -> Union[float, pd.Series, np.ndarray]:
241
- first = pd.Series(list(parameters.values())[0])
242
- length = len(first)
243
- index = first.index
244
-
245
- for v in parameters.values():
246
- if length != len(pd.Series(v)) or not index.equals(pd.Series(v).index):
247
- raise ValueError(
248
- "If specifying vectorized parameters, all parameters "
249
- "must be the same length and have the same index."
250
- )
251
-
252
- if "mean" in parameters: # normal distribution
253
- rr_value = random_state.normal(parameters["mean"], parameters["se"])
254
- elif "log_mean" in parameters: # log distribution
255
- log_value = parameters["log_mean"] + parameters["log_se"] * random_state.randn()
256
- if parameters["tau_squared"]:
257
- log_value += random_state.normal(0, parameters["tau_squared"])
258
- rr_value = np.exp(log_value)
259
- else:
260
- raise NotImplementedError(
261
- f"Only normal distributions (supplying mean and se) and log distributions "
262
- f"(supplying log_mean, log_se, and tau_squared) are currently supported."
263
- )
264
-
265
- rr_value = np.maximum(1, rr_value)
266
-
267
- return rr_value
268
-
269
-
270
- def _make_relative_risk_data(builder, cat1_value: float) -> pd.DataFrame:
271
- cat1 = builder.data.load("population.demographic_dimensions")
272
- cat1["parameter"] = "cat1"
273
- cat1["value"] = cat1_value
274
- cat2 = cat1.copy()
275
- cat2["parameter"] = "cat2"
276
- cat2["value"] = 1
277
- return pd.concat([cat1, cat2], ignore_index=True)
278
-
279
-
280
78
  def rebin_relative_risk_data(
281
79
  builder, risk: EntityString, relative_risk_data: pd.DataFrame
282
80
  ) -> pd.DataFrame:
@@ -287,9 +85,13 @@ def rebin_relative_risk_data(
287
85
  for the matching rr = [rr1, rr2, rr3, 1], rebinned rr for the rebinned cat1 should be:
288
86
  (0.1 *rr1 + 0.2 * rr2 + 0.3* rr3) / (0.1+0.2+0.3)
289
87
  """
290
- rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
88
+ if not risk in builder.configuration.to_dict():
89
+ return relative_risk_data
90
+
91
+ rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
291
92
 
292
93
  if rebin_exposed_categories:
94
+ # todo make sure this works
293
95
  exposure_data = load_exposure_data(builder, risk)
294
96
  relative_risk_data = _rebin_relative_risk_data(
295
97
  relative_risk_data, exposure_data, rebin_exposed_categories
@@ -319,78 +121,16 @@ def _rebin_relative_risk_data(
319
121
  return relative_risk_data.drop(columns=["value_x", "value_y"])
320
122
 
321
123
 
322
- def get_exposure_effect(builder, risk: EntityString):
323
- distribution_type = get_distribution_type(builder, risk)
324
- risk_exposure = builder.value.get_value(f"{risk.name}.exposure")
325
-
326
- if distribution_type in ["normal", "lognormal", "ensemble"]:
327
- tmred = builder.data.load(f"{risk}.tmred")
328
- tmrel = 0.5 * (tmred["min"] + tmred["max"])
329
- scale = builder.data.load(f"{risk}.relative_risk_scalar")
330
-
331
- def exposure_effect(rates, rr):
332
- exposure = risk_exposure(rr.index)
333
- relative_risk = np.maximum(rr.values ** ((exposure - tmrel) / scale), 1)
334
- return rates * relative_risk
335
-
336
- else:
337
-
338
- def exposure_effect(rates, rr: pd.DataFrame) -> pd.Series:
339
- index_columns = ["index", risk.name]
340
-
341
- exposure = risk_exposure(rr.index).reset_index()
342
- exposure.columns = index_columns
343
- exposure = exposure.set_index(index_columns)
344
-
345
- relative_risk = rr.stack().reset_index()
346
- relative_risk.columns = index_columns + ["value"]
347
- relative_risk = relative_risk.set_index(index_columns)
348
-
349
- effect = relative_risk.loc[exposure.index, "value"].droplevel(risk.name)
350
- affected_rates = rates * effect
351
- return affected_rates
352
-
353
- return exposure_effect
354
-
355
-
356
- ##################################################
357
- # Population attributable fraction data handlers #
358
- ##################################################
359
-
360
-
361
- def get_population_attributable_fraction_data(
362
- builder, risk: EntityString, target: TargetString
363
- ):
364
- exposure_source = builder.configuration[f"{risk.name}"]["exposure"]
365
- rr_source_type = validate_relative_risk_data_source(builder, risk, target)
366
-
367
- if exposure_source == "data" and rr_source_type == "data" and risk.type == "risk_factor":
368
- paf_data = builder.data.load(f"{risk}.population_attributable_fraction")
369
- correct_target = (paf_data["affected_entity"] == target.name) & (
370
- paf_data["affected_measure"] == target.measure
371
- )
372
- paf_data = paf_data[correct_target].drop(
373
- columns=["affected_entity", "affected_measure"]
374
- )
375
- else:
376
- key_cols = ["sex", "age_start", "age_end", "year_start", "year_end"]
377
- exposure_data = get_exposure_data(builder, risk).set_index(key_cols)
378
- relative_risk_data = get_relative_risk_data(builder, risk, target).set_index(key_cols)
379
- mean_rr = (exposure_data * relative_risk_data).sum(axis=1)
380
- paf_data = ((mean_rr - 1) / mean_rr).reset_index().rename(columns={0: "value"})
381
- return paf_data
382
-
383
-
384
124
  ##############
385
125
  # Validators #
386
126
  ##############
387
127
 
388
128
 
389
- def validate_distribution_data_source(builder, risk: EntityString):
129
+ def validate_distribution_data_source(builder: Builder, risk: EntityString) -> None:
390
130
  """Checks that the exposure distribution specification is valid."""
391
- exposure_type = builder.configuration[risk.name]["exposure"]
392
- rebin = builder.configuration[risk.name]["rebinned_exposed"]
393
- category_thresholds = builder.configuration[risk.name]["category_thresholds"]
131
+ exposure_type = builder.configuration[risk]["data_sources"]["exposure"]
132
+ rebin = builder.configuration[risk]["rebinned_exposed"]
133
+ category_thresholds = builder.configuration[risk]["category_thresholds"]
394
134
 
395
135
  if risk.type == "alternative_risk_factor":
396
136
  if exposure_type != "data" or rebin:
@@ -401,29 +141,20 @@ def validate_distribution_data_source(builder, risk: EntityString):
401
141
  if not category_thresholds:
402
142
  raise ValueError("Must specify category thresholds to use alternative risks.")
403
143
 
404
- elif risk.type in ["risk_factor", "coverage_gap"]:
405
- if isinstance(exposure_type, (int, float)) and not 0 <= exposure_type <= 1:
406
- raise ValueError(f"Exposure should be in the range [0, 1]")
407
- elif isinstance(exposure_type, str) and exposure_type.split(".")[0] not in [
408
- "covariate",
409
- "data",
410
- ]:
411
- raise ValueError(
412
- f"Exposure must be specified as 'data', an integer or float value, "
413
- f"or as a string in the format covariate.covariate_name"
414
- )
415
- else:
416
- pass # All good
417
- else:
144
+ elif risk.type not in ["risk_factor", "coverage_gap"]:
418
145
  raise ValueError(f"Unknown risk type {risk.type} for risk {risk.name}")
419
146
 
420
147
 
421
148
  def validate_relative_risk_data_source(builder, risk: EntityString, target: TargetString):
422
- source_key = f"effect_of_{risk.name}_on_{target.name}"
423
- relative_risk_source = builder.configuration[source_key][target.measure]
149
+ from vivarium_public_health.risks import RiskEffect
150
+
151
+ source_key = RiskEffect.get_name(risk, target)
152
+ source_config = builder.configuration[source_key]
424
153
 
425
154
  provided_keys = set(
426
- k for k, v in relative_risk_source.to_dict().items() if isinstance(v, (int, float))
155
+ k
156
+ for k, v in source_config["distribution_args"].to_dict().items()
157
+ if isinstance(v, (int, float))
427
158
  )
428
159
 
429
160
  source_map = {
@@ -442,24 +173,24 @@ def validate_relative_risk_data_source(builder, risk: EntityString, target: Targ
442
173
  source_type = [k for k, v in source_map.items() if provided_keys == v][0]
443
174
 
444
175
  if source_type == "relative risk value":
445
- if not 1 <= relative_risk_source["relative_risk"] <= 100:
176
+ if not 1 <= source_type <= 100:
446
177
  raise ValueError(
447
- f"If specifying a single value for relative risk, it should be in the "
448
- f"range [1, 100]. You provided {relative_risk_source['relative_risk']} for {source_key}."
178
+ "If specifying a single value for relative risk, it should be in the range [1, 100]. "
179
+ f"You provided {source_type} for {source_key}."
449
180
  )
450
181
  elif source_type == "normal distribution":
451
- if relative_risk_source["mean"] <= 0 or relative_risk_source["se"] <= 0:
182
+ if source_config["mean"] <= 0 or source_config["se"] <= 0:
452
183
  raise ValueError(
453
184
  f"To specify parameters for a normal distribution for a risk effect, you must provide"
454
185
  f"both mean and se above 0. This is not the case for {source_key}."
455
186
  )
456
187
  elif source_type == "log distribution":
457
- if relative_risk_source["log_mean"] <= 0 or relative_risk_source["log_se"] <= 0:
188
+ if source_config["log_mean"] <= 0 or source_config["log_se"] <= 0:
458
189
  raise ValueError(
459
190
  f"To specify parameters for a log distribution for a risk effect, you must provide"
460
191
  f"both log_mean and log_se above 0. This is not the case for {source_key}."
461
192
  )
462
- if relative_risk_source["tau_squared"] < 0:
193
+ if source_config["tau_squared"] < 0:
463
194
  raise ValueError(
464
195
  f"To specify parameters for a log distribution for a risk effect, you must provide"
465
196
  f"tau_squared >= 0. This is not the case for {source_key}."
@@ -478,13 +209,18 @@ def validate_relative_risk_rebin_source(
478
209
  f"Subsetting {risk} relative risk data to {target.name} {target.measure} "
479
210
  "returned an empty DataFrame. Check your artifact."
480
211
  )
481
- validate_rebin_source(builder, risk, data)
212
+ if risk in builder.configuration.to_dict():
213
+ validate_rebin_source(builder, risk, data)
214
+
215
+
216
+ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame) -> None:
482
217
 
218
+ if not isinstance(data, pd.DataFrame):
219
+ return
483
220
 
484
- def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame):
485
- rebin_exposed_categories = set(builder.configuration[risk.name]["rebinned_exposed"])
221
+ rebin_exposed_categories = set(builder.configuration[risk]["rebinned_exposed"])
486
222
 
487
- if rebin_exposed_categories and builder.configuration[risk.name]["category_thresholds"]:
223
+ if rebin_exposed_categories and builder.configuration[risk]["category_thresholds"]:
488
224
  raise ValueError(
489
225
  f"Rebinning and category thresholds are mutually exclusive. "
490
226
  f"You provided both for {risk.name}."
@@ -494,20 +230,23 @@ def validate_rebin_source(builder, risk: EntityString, data: pd.DataFrame):
494
230
  f"{risk}.distribution"
495
231
  ):
496
232
  raise ValueError(
497
- f"Rebinning is only supported for polytomous risks. You provided rebinning exposed categories"
498
- f'for {risk.name}, which is of type {builder.data.load(f"{risk}.distribution")}.'
233
+ f"Rebinning is only supported for polytomous risks. You provided "
234
+ f"rebinning exposed categoriesfor {risk.name}, which is of "
235
+ f"type {builder.data.load(f'{risk}.distribution')}."
499
236
  )
500
237
 
501
238
  invalid_cats = rebin_exposed_categories.difference(set(data.parameter))
502
239
  if invalid_cats:
503
240
  raise ValueError(
504
- f"The following provided categories for the rebinned exposed category of {risk.name} "
505
- f"are not found in the exposure data: {invalid_cats}."
241
+ f"The following provided categories for the rebinned exposed "
242
+ f"category of {risk.name} are not found in the exposure data: "
243
+ f"{invalid_cats}."
506
244
  )
507
245
 
508
246
  if rebin_exposed_categories == set(data.parameter):
509
247
  raise ValueError(
510
- f"The provided categories for the rebinned exposed category of {risk.name} comprise all "
511
- f"categories for the exposure data. At least one category must be left out of the provided "
512
- f"categories to be rebinned into the unexposed category."
248
+ f"The provided categories for the rebinned exposed category of "
249
+ f"{risk.name} comprise all categories for the exposure data. "
250
+ f"At least one category must be left out of the provided categories "
251
+ f"to be rebinned into the unexposed category."
513
252
  )