PyPI - vivarium-public-health - Versions diffs - 3.0.2__py3-none-any.whl → 3.0.4__py3-none-any.whl - Mend

vivarium-public-health 3.0.2py3-none-any.whl → 3.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

vivarium_public_health/results/observer.py CHANGED Viewed

@@ -8,16 +8,19 @@ from vivarium_public_health.results.columns import COLUMNS
 class PublicHealthObserver(Observer):
-    """A convenience class for typical public health observers. It provides
-    an entry point for registering the most common observation type
-    as well as standardized results formatting methods to overwrite as necessary.
+    """A convenience class for typical public health observers.
+    It exposes a method for registering the most common observation type
+    (adding observation) as well methods for formatting public health results
+    in a standardized way (to be overwritten as necessary).
     """
     def register_adding_observation(
         self,
         builder: Builder,
-        name,
-        pop_filter,
+        name: str,
+        pop_filter: str,
         when: str = "collect_metrics",
         requires_columns: List[str] = [],
         requires_values: List[str] = [],
@@ -25,7 +28,42 @@ class PublicHealthObserver(Observer):
         excluded_stratifications: List[str] = [],
         aggregator_sources: Optional[List[str]] = None,
         aggregator: Callable[[pd.DataFrame], Union[float, pd.Series]] = len,
-    ):
+    ) -> None:
+        """Registers an adding observation to the results system.
+        An "adding" observation is one that adds/sums new results to existing
+        result values. It is the most common type of observation used in public
+        health models.
+        Parameters
+        ----------
+        builder
+            The builder object.
+        name
+            Name of the observation. It will also be the name of the output results
+            file for this particular observation.
+        pop_filter
+            A Pandas query filter string to filter the population down to the
+            simulants who should be considered for the observation.
+        when
+            Name of the lifecycle phase the observation should happen. Valid values are:
+            "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics".
+        requires_columns
+            List of the state table columns that are required by either the `pop_filter`
+            or the `aggregator`.
+        requires_values
+            List of the value pipelines that are required by either the `pop_filter`
+            or the `aggregator`.
+        additional_stratifications
+            List of additional stratification names by which to stratify this
+            observation by.
+        excluded_stratifications
+            List of default stratification names to remove from this observation.
+        aggregator_sources
+            List of population view columns to be used in the `aggregator`.
+        aggregator
+            Function that computes the quantity for this observation.
+        """
         builder.results.register_adding_observation(
             name=name,
             pop_filter=pop_filter,
@@ -42,6 +80,27 @@ class PublicHealthObserver(Observer):
     def format_results(self, measure: str, results: pd.DataFrame) -> pd.DataFrame:
         """Top-level results formatter that calls standard sub-methods to be
         overwritten as necessary.
+        Public health observations typically require four columns in addition to
+        any stratifications and results columns: 'measure', 'entity_type', 'entity',
+        and 'sub_entity'. This method provides a standardized way to format
+        results by providing five sub-methods to be overwritten as necessary:
+        - format()
+        - get_measure_column()
+        - get_entity_type_column()
+        - get_entity_column()
+        - get_sub_entity_column()
+        Parameters
+        ----------
+        measure
+            The measure name.
+        results
+            The raw results.
+        Returns
+        -------
+            The formatted results.
         """
         results = self.format(measure, results)
@@ -63,16 +122,92 @@ class PublicHealthObserver(Observer):
         return results[ordered_columns]
     def format(self, measure: str, results: pd.DataFrame) -> pd.DataFrame:
+        """Format results.
+        This method should be overwritten in subclasses to provide custom formatting
+        for the results.
+        Parameters
+        ----------
+        measure
+            The measure name.
+        results
+            The raw results.
+        Returns
+        -------
+            The formatted results.
+        """
         return results
     def get_measure_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'measure' column.
+        This method should be overwritten in subclasses to provide the 'measure' column.
+        Parameters
+        ----------
+        measure
+            The measure name.
+        results
+            The raw results.
+        Returns
+        -------
+            The 'measure' column values.
+        """
         return pd.Series(measure, index=results.index)
     def get_entity_type_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'entity_type' column.
+        This method should be overwritten in subclasses to provide the 'entity_type' column.
+        Parameters
+        ----------
+        measure
+            The measure name.
+        results
+            The raw results.
+        Returns
+        -------
+            The 'entity_type' column values.
+        """
         return pd.Series(None, index=results.index)
     def get_entity_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'entity' column.
+        This method should be overwritten in subclasses to provide the 'entity' column.
+        Parameters
+        ----------
+        measure
+            The measure name.
+        results
+            The raw results.
+        Returns
+        -------
+            The 'entity' column values.
+        """
         return pd.Series(None, index=results.index)
     def get_sub_entity_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'sub_entity' column.
+        This method should be overwritten in subclasses to provide the 'sub_entity' column.
+        Parameters
+        ----------
+        measure
+            The measure name.
+        results
+            The raw results.
+        Returns
+        -------
+            The 'sub_entity' column values.
+        """
         return pd.Series(None, index=results.index)

vivarium_public_health/results/risk.py CHANGED Viewed

@@ -40,6 +40,18 @@ class CategoricalRiskObserver(PublicHealthObserver):
                         - "sex"
                     include:
                         - "sample_stratification"
+    Attributes
+    ----------
+    risk
+        The name of the risk factor.
+    exposure_pipeline_name
+        The name of the pipeline that produces the risk factor exposure.
+    step_size
+        The time step size of the simulation.
+    categories
+        The categories of the risk factor.
     """
     ##############
@@ -48,8 +60,7 @@ class CategoricalRiskObserver(PublicHealthObserver):
     @property
     def configuration_defaults(self) -> Dict[str, Any]:
-        """
-        A dictionary containing the defaults for any configurations managed by
+        """A dictionary containing the defaults for any configurations managed by
         this component.
         """
         return {
@@ -62,6 +73,7 @@ class CategoricalRiskObserver(PublicHealthObserver):
     @property
     def columns_required(self) -> Optional[List[str]]:
+        """The columns required by this observer."""
         return ["alive"]
     #####################
@@ -69,11 +81,12 @@ class CategoricalRiskObserver(PublicHealthObserver):
     #####################
     def __init__(self, risk: str) -> None:
-        """
+        """Constructor for this observer.
         Parameters
         ----------
-        risk: name of a risk
+        risk
+            The name of the risk being observed
         """
         super().__init__()
         self.risk = risk
@@ -84,13 +97,37 @@ class CategoricalRiskObserver(PublicHealthObserver):
     #################
     def setup(self, builder: Builder) -> None:
+        """Set up the observer."""
         self.step_size = builder.time.step_size()
         self.categories = builder.data.load(f"risk_factor.{self.risk}.categories")
     def get_configuration(self, builder: Builder) -> LayeredConfigTree:
+        """Get the stratification configuration for this observer.
+        Parameters
+        ----------
+        builder
+            The builder object for the simulation.
+        Returns
+        -------
+            The stratification configuration for this observer.
+        """
         return builder.configuration.stratification[self.risk]
     def register_observations(self, builder: Builder) -> None:
+        """Register a stratification and observation.
+        Notes
+        -----
+        While it's typical for all stratification registrations to be encapsulated
+        in a single class (i.e. the
+        :class:ResultsStratifier <vivarium_public_health.results.stratification.ResultsStratifier),
+        this observer registers an additional one. While it could be registered
+        in the ``ResultsStratifier`` as well, it is specific to this observer and
+        so it is registered here while we have easy access to the required categories
+        and value names.
+        """
         builder.results.register_stratification(
             f"{self.risk}",
             list(self.categories.keys()),
@@ -113,6 +150,7 @@ class CategoricalRiskObserver(PublicHealthObserver):
     ###############
     def aggregate_risk_category_person_time(self, x: pd.DataFrame) -> float:
+        """Aggregate the person time for this time step."""
         return len(x) * to_years(self.step_size())
     ##############################
@@ -120,19 +158,42 @@ class CategoricalRiskObserver(PublicHealthObserver):
     ##############################
     def format(self, measure: str, results: pd.DataFrame) -> pd.DataFrame:
+        """Rename the appropriate column to 'sub_entity'.
+        The primary thing this method does is rename the risk column
+        to 'sub_entity'. We do this here instead of the 'get_sub_entity_column'
+        method simply because we do not want the risk column at all. If we keep
+        it here and then return it as the sub-entity column later, the final
+        results would have both.
+        Parameters
+        ----------
+        measure
+            The measure.
+        results
+            The results to format.
+        Returns
+        -------
+            The formatted results.
+        """
         results = results.reset_index()
         results.rename(columns={self.risk: COLUMNS.SUB_ENTITY}, inplace=True)
         return results
     def get_measure_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'measure' column values."""
         return pd.Series("person_time", index=results.index)
     def get_entity_type_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'entity_type' column values."""
         return pd.Series("rei", index=results.index)
     def get_entity_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'entity' column values."""
         return pd.Series(self.risk, index=results.index)
     def get_sub_entity_column(self, measure: str, results: pd.DataFrame) -> pd.Series:
+        """Get the 'sub_entity' column values."""
         # The sub-entity col was created in the 'format' method
         return results[COLUMNS.SUB_ENTITY]

vivarium_public_health/results/simple_cause.py CHANGED Viewed

@@ -4,15 +4,21 @@ from dataclasses import dataclass
 @dataclass
 class SimpleCause:
     """A simple dataclass to represent the bare minimum information needed
-    for observers, e.g. 'all_causes' as a cause of disability. It also
-    includes a class method to convert a provided disease state into a
+    for observers, e.g. 'all_causes' as a cause of disability.
+    It also includes a class method to convert a provided disease state into a
     ``SimpleCause`` instance.
     """
     state_id: str
+    """The state_id of the cause."""
     model: str
+    """The model of the cause."""
     cause_type: str
+    """The cause type of the cause."""
     @classmethod
     def create_from_disease_state(cls, disease_state: type) -> "SimpleCause":
+        """Create a SimpleCause instance from a"""
         return cls(disease_state.state_id, disease_state.model, disease_state.cause_type)

vivarium_public_health/results/stratification.py CHANGED Viewed

@@ -5,9 +5,8 @@ Results Stratifier
 This module contains tools for stratifying observed quantities
 by specified characteristics through the vivarium results interface.
-"""
-from __future__ import annotations
+"""
 import pandas as pd
 from vivarium import Component
@@ -15,6 +14,22 @@ from vivarium.framework.engine import Builder
 class ResultsStratifier(Component):
+    """A component for registering common public health stratifications.
+    The purpose of this component is to encapsulate all common public health
+    stratification registrations in one place. This is not enforced, however,
+    and stratification registrations can be done in any component.
+    Attributes
+    ----------
+    age_bins
+        The age bins for stratifying by age.
+    start_year
+        The start year of the simulation.
+    end_year
+        The end year of the simulation.
+    """
     #####################
     # Lifecycle methods #
     #####################
@@ -32,6 +47,7 @@ class ResultsStratifier(Component):
     #################
     def register_stratifications(self, builder: Builder) -> None:
+        """Register stratifications for the simulation."""
         builder.results.register_stratification(
             "age_group",
             self.age_bins["age_group_name"].to_list(),
@@ -80,18 +96,17 @@ class ResultsStratifier(Component):
     # Mappers #
     ###########
-    def map_age_groups(self, pop: pd.DataFrame) -> pd.Series[str]:
-        """Map age with age group name strings
+    def map_age_groups(self, pop: pd.DataFrame) -> pd.Series:
+        """Map age with age group name strings.
         Parameters
         ----------
         pop
-            A pd.DataFrame with one column, an age to be mapped to an age group name string
+            A table with one column, an age to be mapped to an age group name string.
         Returns
-        ------
-        pandas.Series
-            A pd.Series with age group name string corresponding to the pop passed into the function
+        -------
+            The age group name strings corresponding to the pop passed into the function.
         """
         bins = self.age_bins["age_start"].to_list() + [self.age_bins["age_end"].iloc[-1]]
         labels = self.age_bins["age_group_name"].to_list()
@@ -99,23 +114,33 @@ class ResultsStratifier(Component):
         return age_group
     @staticmethod
-    def map_year(pop: pd.DataFrame) -> pd.Series[str]:
-        """Map datetime with year
+    def map_year(pop: pd.DataFrame) -> pd.Series:
+        """Map datetime with year.
         Parameters
         ----------
         pop
-            A pd.DataFrame with one column, a datetime to be mapped to year
+            A table with one column, a datetime to be mapped to year.
         Returns
-        ------
-        pandas.Series
-            A pd.Series with years corresponding to the pop passed into the function
+        -------
+            The years corresponding to the pop passed into the function.
         """
         return pop.squeeze(axis=1).dt.year.apply(str)
     @staticmethod
     def get_age_bins(builder: Builder) -> pd.DataFrame:
+        """Get the age bins for stratifying by age.
+        Parameters
+        ----------
+        builder
+            The builder object for the simulation.
+        Returns
+        -------
+            The age bins for stratifying by age.
+        """
         raw_age_bins = builder.data.load("population.age_bins")
         age_start = builder.configuration.population.initialization_age_min
         exit_age = builder.configuration.population.untracking_age

vivarium_public_health/risks/base_risk.py CHANGED Viewed

@@ -30,8 +30,9 @@ from vivarium_public_health.utilities import EntityString, get_lookup_columns
 class Risk(Component):
-    """A model for a risk factor defined by either a continuous or a categorical
-    value. For example,
+    """A model for a risk factor defined by either a continuous or a categorical value.
+    For example,
     #. high systolic blood pressure as a risk where the SBP is not dichotomized
        into hypotension and normal but is treated as the actual SBP
@@ -138,9 +139,10 @@ class Risk(Component):
     def __init__(self, risk: str):
         """
         Parameters
         ----------
-        risk :
+        risk
             the type and name of a risk, specified as "type.name". Type is singular.
         """
         super().__init__()
@@ -171,8 +173,7 @@ class Risk(Component):
         self.exposure = self.get_exposure_pipeline(builder)
     def get_distribution_type(self, builder: Builder) -> str:
-        """
-        Get the distribution type for the risk from the configuration.
+        """Get the distribution type for the risk from the configuration.
         If the configured distribution type is not one of the supported types,
         it is assumed to be a data source and the data is retrieved using the
@@ -180,13 +181,12 @@ class Risk(Component):
         Parameters
         ----------
-        builder : Builder
-            the builder object
+        builder
+            The builder object.
         Returns
         -------
-        str
-            the distribution type
+            The distribution type.
         """
         if self.configuration is None:
             self.configuration = self.get_configuration(builder)
@@ -207,24 +207,22 @@ class Risk(Component):
         return distribution_type
     def get_exposure_distribution(self, builder: Builder) -> RiskExposureDistribution:
-        """
-        Creates and sets up the exposure distribution component for the Risk
+        """Creates and sets up the exposure distribution component for the Risk
         based on its distribution type.
         Parameters
         ----------
-        builder : Builder
-            the builder object
+        builder
+            The builder object.
         Returns
         -------
-        RiskExposureDistribution
-            the exposure distribution
+            The exposure distribution.
         Raises
         ------
         NotImplementedError
-            if the distribution type is not supported
+            If the distribution type is not supported.
         """
         try:
             exposure_distribution = self.exposure_distributions[self.distribution_type](

vivarium_public_health/risks/data_transformations.py CHANGED Viewed

@@ -78,7 +78,9 @@ def load_exposure_data(builder: Builder, risk: EntityString) -> pd.DataFrame:
 def rebin_relative_risk_data(
     builder, risk: EntityString, relative_risk_data: pd.DataFrame
 ) -> pd.DataFrame:
-    """When the polytomous risk is rebinned, matching relative risk needs to be rebinned.
+    """Rebin relative risk data if necessary.
+    When the polytomous risk is rebinned, matching relative risk needs to be rebinned.
     After rebinning, rr for both exposed and unexposed categories should be the weighted sum of relative risk
     of the component categories where weights are relative proportions of exposure of those categories.
     For example, if cat1, cat2, cat3 are exposed categories and cat4 is unexposed with exposure [0.1,0.2,0.3,0.4],

vivarium_public_health/risks/distributions.py CHANGED Viewed

@@ -461,7 +461,6 @@ def clip(q):
     This is bound up in the GBD risk factor PAF calculation process.
     We'll clip the distribution tails so we don't get NaNs back from the
     distribution calls
     """
     Q_LOWER_BOUND = 0.0011
     Q_UPPER_BOUND = 0.998

vivarium_public_health/risks/effect.py CHANGED Viewed

@@ -17,8 +17,6 @@ import scipy
 from layered_config_tree import ConfigurationError
 from vivarium import Component
 from vivarium.framework.engine import Builder
-from vivarium.framework.event import Event
-from vivarium.framework.population import SimulantData
 from vivarium_public_health.risks import Risk
 from vivarium_public_health.risks.data_transformations import (
@@ -30,10 +28,12 @@ from vivarium_public_health.utilities import EntityString, TargetString, get_loo
 class RiskEffect(Component):
-    """A component to model the impact of a risk factor on the target rate of
-    some affected entity. This component can source data either from
-    builder.data or from parameters supplied in the configuration.
-    For a risk named 'risk' that affects 'affected_risk' and 'affected_cause',
+    """A component to model the effect of a risk factor on an affected entity's target rate.
+    This component can source data either from builder.data or from parameters
+    supplied in the configuration.
+    For a risk named 'risk' that affects  'affected_risk' and 'affected_cause',
     the configuration would look like:
     .. code-block:: yaml
@@ -59,10 +59,7 @@ class RiskEffect(Component):
     @property
     def configuration_defaults(self) -> Dict[str, Any]:
-        """
-        A dictionary containing the defaults for any configurations managed by
-        this component.
-        """
+        """Default values for any configurations managed by this component."""
         return {
             self.name: {
                 "data_sources": {
@@ -89,13 +86,14 @@ class RiskEffect(Component):
     def __init__(self, risk: str, target: str):
         """
         Parameters
         ----------
-        risk :
+        risk
             Type and name of risk factor, supplied in the form
             "risk_type.risk_name" where risk_type should be singular (e.g.,
             risk_factor instead of risk_factors).
-        target :
+        target
             Type, name, and target rate of entity to be affected by risk factor,
             supplied in the form "entity_type.entity_name.measure"
             where entity_type should be singular (e.g., cause instead of causes).
@@ -210,7 +208,9 @@ class RiskEffect(Component):
     def rebin_relative_risk_data(
         self, builder, relative_risk_data: pd.DataFrame
     ) -> pd.DataFrame:
-        """When the polytomous risk is rebinned, matching relative risk needs to be rebinned.
+        """Rebin relative risk data.
+        When the polytomous risk is rebinned, matching relative risk needs to be rebinned.
         After rebinning, rr for both exposed and unexposed categories should be the weighted sum of relative risk
         of the component categories where weights are relative proportions of exposure of those categories.
         For example, if cat1, cat2, cat3 are exposed categories and cat4 is unexposed with exposure [0.1,0.2,0.3,0.4],
@@ -319,18 +319,22 @@ class RiskEffect(Component):
 class NonLogLinearRiskEffect(RiskEffect):
-    """A component to model the impact of an exposure-parametrized risk factor on
-    the target rate of some affected entity. This component will
-    1) read TMRED data from the artifact and define the TMREL
-    2) calculate the relative risk at TMREL by linearly interpolating over
-       relative risk data defined in the configuration
-    3) divide relative risk data from configuration by RR at TMREL
-       and clip to be greater than 1
-    4) build a LookupTable which returns the exposure and RR of the left and right edges
-       of the RR bin containing a simulant's exposure
-    5) use this LookupTable to modify the target pipeline by linearly interpolating
-       a simulant's RR value and multiplying it by the intended target rate
+    """A component to model the exposure-parametrized effect of a risk factor.
+    More specifically, this models the effect of the risk factor on the target rate of
+    some affected entity.
+    This component:
+    1) reads TMRED data from the artifact and define the TMREL
+    2) calculates the relative risk at TMREL by linearly interpolating over
+    relative risk data defined in the configuration
+    3) divides relative risk data from configuration by RR at TMREL
+    and clip to be greater than 1
+    4) builds a LookupTable which returns the exposure and RR of the left and right edges
+    of the RR bin containing a simulant's exposure
+    5) uses this LookupTable to modify the target pipeline by linearly interpolating
+    a simulant's RR value and multiplying it by the intended target rate
     """
     ##############
@@ -339,10 +343,7 @@ class NonLogLinearRiskEffect(RiskEffect):
     @property
     def configuration_defaults(self) -> Dict[str, Any]:
-        """
-        A dictionary containing the defaults for any configurations managed by
-        this component.
-        """
+        """Default values for any configurations managed by this component."""
         return {
             self.name: {
                 "data_sources": {
@@ -485,6 +486,7 @@ class NonLogLinearRiskEffect(RiskEffect):
     ##############
     def validate_rr_data(self, rr_data: pd.DataFrame) -> None:
+        """Validate the relative risk data."""
         # check that rr_data has numeric parameter data
         parameter_data_is_numeric = rr_data["parameter"].dtype.kind in "biufc"
         if not parameter_data_is_numeric:

vivarium-public-health 3.0.2__py3-none-any.whl → 3.0.4__py3-none-any.whl

vivarium-public-health 3.0.2py3-none-any.whl → 3.0.4py3-none-any.whl