PyPI - stats-misc - Versions diffs - 0.3.5__py3-none-any.whl - Mend

stats-misc 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

stats_misc/__init__.py +1 -0
stats_misc/_version.py +1 -0
stats_misc/constants.py +491 -0
stats_misc/example_data/__init__.py +0 -0
stats_misc/example_data/example_datasets/endometrial.tsv.gz +0 -0
stats_misc/example_data/example_datasets/sex2.tsv.gz +0 -0
stats_misc/example_data/examples.py +325 -0
stats_misc/intervals.py +387 -0
stats_misc/machine_learning/__init__.py +1 -0
stats_misc/machine_learning/firthlogist.py +631 -0
stats_misc/machine_learning/sklearn_utils.py +239 -0
stats_misc/machine_learning/sksurv_utils.py +482 -0
stats_misc/machine_learning/validation.py +811 -0
stats_misc/meta_analysis.py +601 -0
stats_misc/resampling.py +1153 -0
stats_misc/tests.py +264 -0
stats_misc/utils/__init__.py +1 -0
stats_misc/utils/general.py +459 -0
stats_misc/utils/helpers.py +195 -0
stats_misc-0.3.5.dist-info/METADATA +143 -0
stats_misc-0.3.5.dist-info/RECORD +24 -0
stats_misc-0.3.5.dist-info/WHEEL +5 -0
stats_misc-0.3.5.dist-info/licenses/LICENSE +17 -0
stats_misc-0.3.5.dist-info/top_level.txt +1 -0

stats_misc/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from ._version import __version__

stats_misc/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '0.3.5'

stats_misc/constants.py ADDED Viewed

@@ -0,0 +1,491 @@
+'''
+The constants for the stats-misc module.
+'''
+# specifying some constants
+from typing import (
+    Any, List, Type, Union, Tuple, Optional, Dict, Set, Callable,
+)
+import warnings
+import pandas as pd
+# import numpy as np
+import inspect
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+CLASS_NAME = '__CLASS_NAME'
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Type hinting
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class SklearnClass(object):
+    pass
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# checking inputs
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class EmptyError(Exception):
+    pass
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class InputValidationError(Exception):
+    pass
+# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# class MergeError(Exception):
+#     pass
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def get_param_name(param:Any) -> Union[str,None]:
+    '''
+    Gets the name of `param` or otherwise return a None.
+    '''
+    frame = inspect.currentframe().f_back.f_back
+    param_names =\
+        [name for name, value in frame.f_locals.items() if value is param]
+    return param_names[0] if param_names else None
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def is_type(param: Any, types: Union[Tuple[Type], Type],
+            param_name: Union[str, None]=None) -> bool:
+    """
+    Checks if a given parameter matches any of the supplied types
+    Parameters
+    ----------
+    param : `any`
+        Object to test.
+    types : `type` or `tuple` [`type`]
+        Either a single type, or a tuple of types to test against.
+    Returns
+    -------
+    results : `bool`
+        True if the parameter is an instance of any of the given types.
+        Raises AttributeError otherwise.
+    """
+    if not isinstance(param, types):
+        if param_name is None:
+            param_name = get_param_name(param)
+        else:
+            warnings.warn('`param_name` will be depricated.',
+                          DeprecationWarning,
+                          stacklevel=2,
+                          )
+        raise InputValidationError(
+            f"Expected any of [{types}], "
+            f"got {type(param)}; Please see parameter: `{param_name}`."
+        )
+    return True
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def is_df(df: Any) -> bool:
+    """
+    Checks if objects is a pd.DataFrame.
+    Parameters
+    ----------
+    df : `any`
+    Returns
+    -------
+    results : `bool`
+        True if the df is a pd.DataFrame. Raises InputValidationError
+        otherwise.
+    """
+    return is_type(df, pd.DataFrame)
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def are_columns_in_df(
+    df: pd.DataFrame, expected_columns: Union[List[str], str],
+    warning: bool=False) -> bool:
+    """
+    Checks if all expected columns are present in a given pandas.DataFrame.
+    Parameters
+    ----------
+    df : `pandas.DataFrame`
+    expected_columns : `str` or `list` [`str`]
+        Either a single column name or a list of column names to test
+    warning : `bool`, default False
+        raises a warning instead of an error.
+    Returns
+    -------
+    results : `bool`
+        True if all expected_columns are in the df. Raises InputValidationError
+        otherwise.
+    """
+    # constant
+    message = "The following columns are missing from the pandas.DataFrame: {}"
+    res = True
+    # tests
+    expected_columns_set: Set[str] = set(expected_columns) if isinstance(
+        expected_columns, list
+    ) else set([expected_columns])
+    missing_columns = expected_columns_set - set(df.columns)
+    # return
+    if missing_columns:
+        if warning == False:
+            raise InputValidationError(
+                message.format(missing_columns)
+            )
+        else:
+            warnings.warn(
+                message.format(missing_columns)
+            )
+            res = False
+    return res
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def check_limits(value:Union[float, int],
+                min_value:Union[float, int]=0.0,
+                max_value:Union[float, int]=1.0,
+                closed_interval:bool=False,
+                ) -> bool:
+    '''
+    Checks whether `value` falls within the min and max value, optionally
+    including the end points (closed interval). Will raise a ValueError
+    otherwise.
+    Parameters
+    ----------
+    value : `float` or `int`
+        The value to check.
+    min_value : `float` or `int`, default 0.0
+        The lower limit.
+    max_value : `float` or `int`, default 1.0
+        The upper limit.
+    closed_interval : `bool`, default False
+        Set to `True` to include the limits in the evaluation. So if True
+        and the value and max_value are both 1.0 this will `not` raise an Error.
+    Returns
+    -------
+    Eval : `bool`
+        True if the value falls withint the min and max, returns an
+        ValueError otherwise.
+    '''
+    # check input
+    is_type(value, (int, float), 'value')
+    is_type(min_value, (int, float), 'min_value')
+    is_type(max_value, (int, float), 'max_value')
+    is_type(closed_interval, bool, 'closed_interval')
+    # evaluate limits
+    if closed_interval == True:
+        if value > max_value or value < min_value:
+            raise ValueError('The value should be within the range: {} and {}, '
+                             'including the limit values. The current value '
+                             '{}.'.format(min_value, max_value, value))
+    else:
+        if value >= max_value or value <= min_value:
+            raise ValueError('The value should be within the range: {} and {}, '
+                             'excluding the limit values. The current value '
+                             '{}.'.format(min_value, max_value, value))
+    # return
+    return True
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def string_join_delimiter(s_list:List[str], delimiter:str=', ',
+                     final_delimiter:str=', or ',
+                     ) -> str:
+    '''
+    Takes a list of string and joins the list entries by `delimiter`, with
+    the final entry seperated by `final_delimiter`.
+    Parameters
+    ----------
+    s_list : `list` [`str`]
+        A list of strings, should be more than three.
+    delimiter : `str`, default `, `
+        The delimiter to use an all expect the final string.
+    final_delimiter : `str` default `, or `
+        The delimiter to use on the final string.
+    Returns
+    -------
+    string : `str`
+        The final delimited string.
+    Examples
+    --------
+    >>> SIDES=['both', 'above', 'below']
+    >>> string_join_delimiter(SIDES)
+    'both, above, or below'
+    '''
+    is_type(s_list, list, 's_list')
+    is_type(delimiter, str, 'delimiter')
+    is_type(final_delimiter, str, 'final_delimiter')
+    if len(s_list) < 3:
+        raise InputValidationError('`s_list` should contain at least 3 entries '
+                                   'not: {}.'.format(str(len(s_list)))
+                                )
+    if all([True if isinstance(s, str) else False for s in s_list]) == False:
+        ValueError('`s_list` should exclusively contain string values.')
+    # concatenate the string
+    final_string = delimiter.join(s_list[:-1])+final_delimiter+s_list[-1]
+    # return
+    return final_string
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def _update_kwargs(update_dict:Dict[Any, Any], **kwargs:Optional[Any],
+            ) -> Dict[Any, Any]:
+    '''
+    This function will take any number of `kwargs` and add them to an
+    `update_dict`. If there are any duplicate values in the `kwargs` and the
+    `update_dict`, the entries in the `update_dict` will take precedence.
+    Parameters
+    ----------
+    update_dict : `dict`
+        A dictionary with key - value pairs that should be combined with any
+        of the supplied kwargs.
+    kwargs : `any`
+        Arbitrary keyword arguments.
+    Returns
+    -------
+    kwargs : `dict` [`any`, `any`]
+        A dictionary with the update_dict and kwargs combined, where duplicate
+        entries from update_dict overwrite those in kwargs.
+    Examples
+    --------
+        The function is particularly useful to overwrite `kwargs` that are
+        supplied to a nested function say
+        >>> _update_kwargs(update_dict={'c': 'black'}, c='red',
+                         alpha = 0.5)
+        >>> {'c': 'black', 'alpha': 0.5}
+    '''
+    new_dict = {**kwargs, **update_dict}
+    # returns
+    return new_dict
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def same_len(object1: Any, object2: Any,
+             object_names:Union[List[str], None]=None,
+             ) -> bool:
+    """
+    Check if two object's have the same length, and otherwise raise
+    `ValueError`.
+    Arguments
+    ---------
+    object1, object2 : `any`
+        Any type of object.
+    objects_names : `list` [`str`]
+        The two objects the series our sourced from. Will be returned in any
+        potential `IndexError` message.
+    Returns
+    -------
+    bool
+        True if all OK. Raises a ValueError otherwise.
+    """
+    n1 = len(object1)
+    n2 = len(object2)
+    if object_names is None:
+        object_names = ['object1', 'object2']
+    elif len(object_names) !=2:
+        raise ValueError('`object_names` should be `NoneType` or contain '
+                         'two strings')
+    # the actual test
+    if n1 != n2:
+        raise InputValidationError(
+            "The length of `{0}`: {1}, does not match the length "
+            "of `{2}`: {3}.".format(object_names[0], n1,
+                                    object_names[1], n2)
+        )
+    return True
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+def assign_empty_default(arguments:List[Any], empty_object:Callable[[],Any],
+                         ) -> List[Any]:
+    '''
+    Takes a list of `arguments`, checks if these are `NoneType` and if so
+    asigns them 'empty_object'.
+    This function helps deal with the pitfall of assigning an empty mutable
+    object as a default function argument, which would persist through multiple
+    function calls, leading to unexpected/undesired behaviours.
+    Parameters
+    ----------
+    arguments : `list` [`any`]
+        A list of arguments which may be set to `NoneType`.
+    empty_object : `Callable`
+        that returns a mutable object. Examples include a `list` or a `dict`.
+    Returns
+    -------
+    new_arguments : `list` [`any`]
+        List with `NoneType` replaced by empty mutable object.
+    Examples
+    --------
+    >>> assign_empty_default(['hi', None, 'hello'], empty_object=list)
+    ['hi', [], 'hello']
+    '''
+    # check input
+    is_type(arguments, list, 'arguments')
+    is_type(empty_object, type, 'empty_object')
+    # loop over arguments
+    new_args = [empty_object() if arg is None else arg for arg in arguments]
+    # return
+    return new_args
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Module names
+class NamesUtilsGeneral(object):
+    '''
+    Module names utils.general
+    '''
+    SIDE_TWO        = 'two'
+    SIDE_LEFT       = 'left'
+    SIDE_RIGHT      = 'right'
+    SIDE_BELOW      = 'below'
+    SIDE_ABOVE      = 'above'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Module names
+class NamesIntervals(object):
+    '''
+    Module names for the intervals module.
+    '''
+    INDICES         = 'interval_indices'
+    VALUES          = 'interval_values'
+    COVERAGE        = 'coverage'
+    MATRIX_COVERAGE = 'matrix_coverage'
+    MATRIX_COLUMNS  = 'matrix_columns'
+    MATRIX_ROWS     = 'matrix_rows'
+    POINT           = 'point_estimate'
+    SE              = 'standard_error'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Module names
+class NamesResampling(object):
+    '''
+    Module names for the resampling module.
+    '''
+    STATSFUNC        = 'statsfunction'
+    DATA_PRIV        = '__data'
+    BOOT_SAMPLE      = '__boot_sample'
+    JACK_SAMPLE      = '__jack_sample'
+    N_ESTIMATES      = 'n_estimates'
+    ALPHA            = 'alpha'
+    N_REPS           = 'n_reps'
+    KWARGS           = 'kwargs'
+    CI               = 'confidence_interval'
+    CI_COVERAGE      = 'coverage'
+    BCA_ACCELERATION = '__acceleration'
+    BCA_BIAS         = '__bias'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class NamesMetaAnalysis(object):
+    '''
+    Module names for the meta-analysis  module.
+    '''
+    QSTAT            = 'q_statistic'
+    QPVAL            = 'q_pvalue'
+    ISQR             = 'i_squared'
+    ISQR_CI          = 'i_squared_ci'
+    ISQR_CI_COV      = 'i_squared_ci_coverage'
+    TSQR             = 'tau_squared'
+    TAU_METHOD_MM    = 'mm'
+    TAU_METHOD_MM_IT = 'mm-it'
+    TAU_METHOD_CA    = 'ca'
+    TAU_METHOD_DL    = 'dl'
+    TAU_METHOD_CA2   = 'ca2'
+    TAU_METHOD_DL2   = 'dl2'
+    TAU_METHOD_PM_IT = 'pm-it'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class NamesTest(object):
+    '''
+    Module names for the test module.
+    '''
+    POINT      = 'point_estimate'
+    POINT_SE   = 'standard_error'
+    STATISTIC  = 'test_statistic'
+    PVALUE     = 'p_value'
+    NULL_VALUE = 'null_value'
+    ESS        = 'explained_sum_squares'
+    RSS        = 'residual_sum_squares'
+    DF_NUM     = 'df_numerator'
+    DF_DENUM   = 'df_denominator'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class NamesValidation(object):
+    '''
+    Module names for the validation module.
+    '''
+    CSTAT                = 'c_statistic'
+    CSTAT_LB             = 'lower_bound'
+    CSTAT_UB             = 'upper_bound'
+    CSTAT_COVERAGE       = 'coverage'
+    CSTAT_INTERVAL       = 'confidence_interval'
+    CSTAT_SE             = 'standard_error'
+    FALSE_POSITIVE       = 'false_positive'
+    SENSITIVITY          = 'sensitivity'
+    THRESHOLD            = 'threshold'
+    CAL_SLOPE            = 'calibration_slope'
+    CAL_SLOPE_SE         = 'calibration_slope_se'
+    CAL_INTERCEPT        = 'calibration_in_the_large'
+    CAL_INTERCEPT_SE     = 'calibration_in_the_large_se'
+    CAL_TABLE            = 'observed_predict_table'
+    PREDICTED_RISK       = 'predicted_risk'
+    RECAL_BINOMIAL       = 'binomial'
+    RECAL_GAUSSIAN       = 'gaussian'
+    RECAL_SLOPE          = 'slope'
+    RECAL_INTERCEPT      = 'intercept'
+    RECAL_TABLE          = 'table_recalibrated'
+    AVG_PREDICTED_RISK   = 'Average predict risk'
+    AVG_OBSERVED_RISK    = 'Average observed risk'
+    NO_SUBJECTS          = 'No. subjects'
+    AVG_OBSERVED_RISK_LB = 'Lower bound observed risk'
+    AVG_OBSERVED_RISK_UB = 'Upper bound observed risk'
+    CCC                  = 'calibration correlation coefficient'
+    CCC_CI               = 'confidence interval'
+    CCC_COR              = 'Pearson correlation coefficient'
+    CCC_BIAS             = 'bias correction factor'
+    CCC_SCALE            = 'scaling factor'
+    CCC_TRANS            = 'translation constant'
+    CCC_METHOD_F         = 'fisher'
+    CCC_METHOD_D         = 'delta'
+    CCC_S                = 'ccc'
+    CCC_S_CI             = 'ccc_ci'
+    CCC_S_COR            = 'pearson_correlation'
+    CCC_S_BIAS           = 'bias_correction'
+    CCC_S_SCALE          = 'scale_factor'
+    CCC_S_TRANS          = 'translation_constant'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+class NamesSklearnUtils(object):
+    '''
+    Module names for the sklearn_utils module.
+    '''
+    REPORT_SORT_ASC      = 'ascending'
+    REPORT_SORT_DESC     = 'descending'
+    REPORT_RANK          = 'rank_test_score'
+    SEARCH_MEAN          = 'mean_test_score'
+    SEARCH_PARAMS        = 'params'
+    SEARCH_AVERAGE       = 'average'
+    SEARCH_RANK          = 'average_rank'
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# error messages
+class Error_MSG(object):
+    '''
+    A collection of error messages.
+    '''
+    INCORRECT_STRING_INPUT='`{}` is limited to the following values `{}`.'
+    INVALID_STRING = '`{}` should be limited to `{}`.'
+    INVALID_EXACT_LENGTH = '`{}` needs to contain exactly {} elements, not {}.'
+    DIFF_LENGTHS = ('`{0}` and `{1}`, should have the same number of elements, '
+                    'not: {2}, and {3}, respectively.')
+    NON_CONVERGENCE = '`{}` did not converge.'
+    FLOAT_LIMITS = '`{}` is constraint between `{}` and `{}`.'

stats_misc/example_data/__init__.py ADDED Viewed

File without changes

stats_misc/example_data/example_datasets/endometrial.tsv.gz ADDED Viewed

Binary file

stats_misc/example_data/example_datasets/sex2.tsv.gz ADDED Viewed

Binary file