stats-misc 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stats_misc/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from ._version import __version__
stats_misc/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = '0.3.5'
@@ -0,0 +1,491 @@
1
+ '''
2
+ The constants for the stats-misc module.
3
+ '''
4
+ # specifying some constants
5
+ from typing import (
6
+ Any, List, Type, Union, Tuple, Optional, Dict, Set, Callable,
7
+ )
8
+ import warnings
9
+ import pandas as pd
10
+ # import numpy as np
11
+ import inspect
12
+
13
+ # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
14
+ CLASS_NAME = '__CLASS_NAME'
15
+
16
+ # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
17
+ # Type hinting
18
+
19
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20
+ class SklearnClass(object):
21
+ pass
22
+
23
+ # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
24
+ # checking inputs
25
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26
+ class EmptyError(Exception):
27
+ pass
28
+
29
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
30
+ class InputValidationError(Exception):
31
+ pass
32
+
33
+ # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
34
+ # class MergeError(Exception):
35
+ # pass
36
+
37
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38
+ def get_param_name(param:Any) -> Union[str,None]:
39
+ '''
40
+ Gets the name of `param` or otherwise return a None.
41
+ '''
42
+ frame = inspect.currentframe().f_back.f_back
43
+ param_names =\
44
+ [name for name, value in frame.f_locals.items() if value is param]
45
+ return param_names[0] if param_names else None
46
+
47
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48
+ def is_type(param: Any, types: Union[Tuple[Type], Type],
49
+ param_name: Union[str, None]=None) -> bool:
50
+ """
51
+ Checks if a given parameter matches any of the supplied types
52
+
53
+ Parameters
54
+ ----------
55
+ param : `any`
56
+ Object to test.
57
+ types : `type` or `tuple` [`type`]
58
+ Either a single type, or a tuple of types to test against.
59
+
60
+ Returns
61
+ -------
62
+ results : `bool`
63
+ True if the parameter is an instance of any of the given types.
64
+ Raises AttributeError otherwise.
65
+ """
66
+ if not isinstance(param, types):
67
+ if param_name is None:
68
+ param_name = get_param_name(param)
69
+ else:
70
+ warnings.warn('`param_name` will be depricated.',
71
+ DeprecationWarning,
72
+ stacklevel=2,
73
+ )
74
+ raise InputValidationError(
75
+ f"Expected any of [{types}], "
76
+ f"got {type(param)}; Please see parameter: `{param_name}`."
77
+ )
78
+ return True
79
+
80
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
81
+ def is_df(df: Any) -> bool:
82
+ """
83
+ Checks if objects is a pd.DataFrame.
84
+
85
+ Parameters
86
+ ----------
87
+ df : `any`
88
+
89
+ Returns
90
+ -------
91
+ results : `bool`
92
+ True if the df is a pd.DataFrame. Raises InputValidationError
93
+ otherwise.
94
+ """
95
+ return is_type(df, pd.DataFrame)
96
+
97
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98
+ def are_columns_in_df(
99
+ df: pd.DataFrame, expected_columns: Union[List[str], str],
100
+ warning: bool=False) -> bool:
101
+ """
102
+ Checks if all expected columns are present in a given pandas.DataFrame.
103
+
104
+ Parameters
105
+ ----------
106
+ df : `pandas.DataFrame`
107
+ expected_columns : `str` or `list` [`str`]
108
+ Either a single column name or a list of column names to test
109
+ warning : `bool`, default False
110
+ raises a warning instead of an error.
111
+
112
+ Returns
113
+ -------
114
+ results : `bool`
115
+ True if all expected_columns are in the df. Raises InputValidationError
116
+ otherwise.
117
+ """
118
+ # constant
119
+ message = "The following columns are missing from the pandas.DataFrame: {}"
120
+ res = True
121
+ # tests
122
+ expected_columns_set: Set[str] = set(expected_columns) if isinstance(
123
+ expected_columns, list
124
+ ) else set([expected_columns])
125
+
126
+ missing_columns = expected_columns_set - set(df.columns)
127
+ # return
128
+ if missing_columns:
129
+ if warning == False:
130
+ raise InputValidationError(
131
+ message.format(missing_columns)
132
+ )
133
+ else:
134
+ warnings.warn(
135
+ message.format(missing_columns)
136
+ )
137
+ res = False
138
+ return res
139
+
140
+
141
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
142
+ def check_limits(value:Union[float, int],
143
+ min_value:Union[float, int]=0.0,
144
+ max_value:Union[float, int]=1.0,
145
+ closed_interval:bool=False,
146
+ ) -> bool:
147
+ '''
148
+ Checks whether `value` falls within the min and max value, optionally
149
+ including the end points (closed interval). Will raise a ValueError
150
+ otherwise.
151
+
152
+ Parameters
153
+ ----------
154
+ value : `float` or `int`
155
+ The value to check.
156
+ min_value : `float` or `int`, default 0.0
157
+ The lower limit.
158
+ max_value : `float` or `int`, default 1.0
159
+ The upper limit.
160
+ closed_interval : `bool`, default False
161
+ Set to `True` to include the limits in the evaluation. So if True
162
+ and the value and max_value are both 1.0 this will `not` raise an Error.
163
+
164
+ Returns
165
+ -------
166
+ Eval : `bool`
167
+ True if the value falls withint the min and max, returns an
168
+ ValueError otherwise.
169
+ '''
170
+ # check input
171
+ is_type(value, (int, float), 'value')
172
+ is_type(min_value, (int, float), 'min_value')
173
+ is_type(max_value, (int, float), 'max_value')
174
+ is_type(closed_interval, bool, 'closed_interval')
175
+ # evaluate limits
176
+ if closed_interval == True:
177
+ if value > max_value or value < min_value:
178
+ raise ValueError('The value should be within the range: {} and {}, '
179
+ 'including the limit values. The current value '
180
+ '{}.'.format(min_value, max_value, value))
181
+ else:
182
+ if value >= max_value or value <= min_value:
183
+ raise ValueError('The value should be within the range: {} and {}, '
184
+ 'excluding the limit values. The current value '
185
+ '{}.'.format(min_value, max_value, value))
186
+ # return
187
+ return True
188
+
189
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190
+ def string_join_delimiter(s_list:List[str], delimiter:str=', ',
191
+ final_delimiter:str=', or ',
192
+ ) -> str:
193
+ '''
194
+ Takes a list of string and joins the list entries by `delimiter`, with
195
+ the final entry seperated by `final_delimiter`.
196
+
197
+ Parameters
198
+ ----------
199
+ s_list : `list` [`str`]
200
+ A list of strings, should be more than three.
201
+ delimiter : `str`, default `, `
202
+ The delimiter to use an all expect the final string.
203
+ final_delimiter : `str` default `, or `
204
+ The delimiter to use on the final string.
205
+
206
+ Returns
207
+ -------
208
+ string : `str`
209
+ The final delimited string.
210
+
211
+ Examples
212
+ --------
213
+ >>> SIDES=['both', 'above', 'below']
214
+ >>> string_join_delimiter(SIDES)
215
+ 'both, above, or below'
216
+
217
+ '''
218
+ is_type(s_list, list, 's_list')
219
+ is_type(delimiter, str, 'delimiter')
220
+ is_type(final_delimiter, str, 'final_delimiter')
221
+ if len(s_list) < 3:
222
+ raise InputValidationError('`s_list` should contain at least 3 entries '
223
+ 'not: {}.'.format(str(len(s_list)))
224
+ )
225
+ if all([True if isinstance(s, str) else False for s in s_list]) == False:
226
+ ValueError('`s_list` should exclusively contain string values.')
227
+ # concatenate the string
228
+ final_string = delimiter.join(s_list[:-1])+final_delimiter+s_list[-1]
229
+ # return
230
+ return final_string
231
+
232
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
233
+ def _update_kwargs(update_dict:Dict[Any, Any], **kwargs:Optional[Any],
234
+ ) -> Dict[Any, Any]:
235
+ '''
236
+ This function will take any number of `kwargs` and add them to an
237
+ `update_dict`. If there are any duplicate values in the `kwargs` and the
238
+ `update_dict`, the entries in the `update_dict` will take precedence.
239
+
240
+ Parameters
241
+ ----------
242
+ update_dict : `dict`
243
+ A dictionary with key - value pairs that should be combined with any
244
+ of the supplied kwargs.
245
+ kwargs : `any`
246
+ Arbitrary keyword arguments.
247
+
248
+ Returns
249
+ -------
250
+ kwargs : `dict` [`any`, `any`]
251
+ A dictionary with the update_dict and kwargs combined, where duplicate
252
+ entries from update_dict overwrite those in kwargs.
253
+
254
+ Examples
255
+ --------
256
+ The function is particularly useful to overwrite `kwargs` that are
257
+ supplied to a nested function say
258
+
259
+ >>> _update_kwargs(update_dict={'c': 'black'}, c='red',
260
+ alpha = 0.5)
261
+ >>> {'c': 'black', 'alpha': 0.5}
262
+ '''
263
+ new_dict = {**kwargs, **update_dict}
264
+ # returns
265
+ return new_dict
266
+
267
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
268
+ def same_len(object1: Any, object2: Any,
269
+ object_names:Union[List[str], None]=None,
270
+ ) -> bool:
271
+ """
272
+ Check if two object's have the same length, and otherwise raise
273
+ `ValueError`.
274
+
275
+ Arguments
276
+ ---------
277
+ object1, object2 : `any`
278
+ Any type of object.
279
+ objects_names : `list` [`str`]
280
+ The two objects the series our sourced from. Will be returned in any
281
+ potential `IndexError` message.
282
+
283
+ Returns
284
+ -------
285
+ bool
286
+ True if all OK. Raises a ValueError otherwise.
287
+ """
288
+ n1 = len(object1)
289
+ n2 = len(object2)
290
+ if object_names is None:
291
+ object_names = ['object1', 'object2']
292
+ elif len(object_names) !=2:
293
+ raise ValueError('`object_names` should be `NoneType` or contain '
294
+ 'two strings')
295
+ # the actual test
296
+ if n1 != n2:
297
+ raise InputValidationError(
298
+ "The length of `{0}`: {1}, does not match the length "
299
+ "of `{2}`: {3}.".format(object_names[0], n1,
300
+ object_names[1], n2)
301
+ )
302
+ return True
303
+
304
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
305
+ def assign_empty_default(arguments:List[Any], empty_object:Callable[[],Any],
306
+ ) -> List[Any]:
307
+ '''
308
+ Takes a list of `arguments`, checks if these are `NoneType` and if so
309
+ asigns them 'empty_object'.
310
+
311
+ This function helps deal with the pitfall of assigning an empty mutable
312
+ object as a default function argument, which would persist through multiple
313
+ function calls, leading to unexpected/undesired behaviours.
314
+
315
+ Parameters
316
+ ----------
317
+ arguments : `list` [`any`]
318
+ A list of arguments which may be set to `NoneType`.
319
+ empty_object : `Callable`
320
+ that returns a mutable object. Examples include a `list` or a `dict`.
321
+
322
+ Returns
323
+ -------
324
+ new_arguments : `list` [`any`]
325
+ List with `NoneType` replaced by empty mutable object.
326
+
327
+ Examples
328
+ --------
329
+ >>> assign_empty_default(['hi', None, 'hello'], empty_object=list)
330
+ ['hi', [], 'hello']
331
+ '''
332
+ # check input
333
+ is_type(arguments, list, 'arguments')
334
+ is_type(empty_object, type, 'empty_object')
335
+ # loop over arguments
336
+ new_args = [empty_object() if arg is None else arg for arg in arguments]
337
+ # return
338
+ return new_args
339
+
340
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
341
+ # Module names
342
+ class NamesUtilsGeneral(object):
343
+ '''
344
+ Module names utils.general
345
+ '''
346
+ SIDE_TWO = 'two'
347
+ SIDE_LEFT = 'left'
348
+ SIDE_RIGHT = 'right'
349
+ SIDE_BELOW = 'below'
350
+ SIDE_ABOVE = 'above'
351
+
352
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
353
+ # Module names
354
+ class NamesIntervals(object):
355
+ '''
356
+ Module names for the intervals module.
357
+ '''
358
+ INDICES = 'interval_indices'
359
+ VALUES = 'interval_values'
360
+ COVERAGE = 'coverage'
361
+ MATRIX_COVERAGE = 'matrix_coverage'
362
+ MATRIX_COLUMNS = 'matrix_columns'
363
+ MATRIX_ROWS = 'matrix_rows'
364
+ POINT = 'point_estimate'
365
+ SE = 'standard_error'
366
+
367
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
368
+ # Module names
369
+ class NamesResampling(object):
370
+ '''
371
+ Module names for the resampling module.
372
+ '''
373
+ STATSFUNC = 'statsfunction'
374
+ DATA_PRIV = '__data'
375
+ BOOT_SAMPLE = '__boot_sample'
376
+ JACK_SAMPLE = '__jack_sample'
377
+ N_ESTIMATES = 'n_estimates'
378
+ ALPHA = 'alpha'
379
+ N_REPS = 'n_reps'
380
+ KWARGS = 'kwargs'
381
+ CI = 'confidence_interval'
382
+ CI_COVERAGE = 'coverage'
383
+ BCA_ACCELERATION = '__acceleration'
384
+ BCA_BIAS = '__bias'
385
+
386
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
387
+ class NamesMetaAnalysis(object):
388
+ '''
389
+ Module names for the meta-analysis module.
390
+ '''
391
+ QSTAT = 'q_statistic'
392
+ QPVAL = 'q_pvalue'
393
+ ISQR = 'i_squared'
394
+ ISQR_CI = 'i_squared_ci'
395
+ ISQR_CI_COV = 'i_squared_ci_coverage'
396
+ TSQR = 'tau_squared'
397
+ TAU_METHOD_MM = 'mm'
398
+ TAU_METHOD_MM_IT = 'mm-it'
399
+ TAU_METHOD_CA = 'ca'
400
+ TAU_METHOD_DL = 'dl'
401
+ TAU_METHOD_CA2 = 'ca2'
402
+ TAU_METHOD_DL2 = 'dl2'
403
+ TAU_METHOD_PM_IT = 'pm-it'
404
+
405
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
406
+ class NamesTest(object):
407
+ '''
408
+ Module names for the test module.
409
+ '''
410
+ POINT = 'point_estimate'
411
+ POINT_SE = 'standard_error'
412
+ STATISTIC = 'test_statistic'
413
+ PVALUE = 'p_value'
414
+ NULL_VALUE = 'null_value'
415
+ ESS = 'explained_sum_squares'
416
+ RSS = 'residual_sum_squares'
417
+ DF_NUM = 'df_numerator'
418
+ DF_DENUM = 'df_denominator'
419
+
420
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
421
+ class NamesValidation(object):
422
+ '''
423
+ Module names for the validation module.
424
+ '''
425
+ CSTAT = 'c_statistic'
426
+ CSTAT_LB = 'lower_bound'
427
+ CSTAT_UB = 'upper_bound'
428
+ CSTAT_COVERAGE = 'coverage'
429
+ CSTAT_INTERVAL = 'confidence_interval'
430
+ CSTAT_SE = 'standard_error'
431
+ FALSE_POSITIVE = 'false_positive'
432
+ SENSITIVITY = 'sensitivity'
433
+ THRESHOLD = 'threshold'
434
+ CAL_SLOPE = 'calibration_slope'
435
+ CAL_SLOPE_SE = 'calibration_slope_se'
436
+ CAL_INTERCEPT = 'calibration_in_the_large'
437
+ CAL_INTERCEPT_SE = 'calibration_in_the_large_se'
438
+ CAL_TABLE = 'observed_predict_table'
439
+ PREDICTED_RISK = 'predicted_risk'
440
+ RECAL_BINOMIAL = 'binomial'
441
+ RECAL_GAUSSIAN = 'gaussian'
442
+ RECAL_SLOPE = 'slope'
443
+ RECAL_INTERCEPT = 'intercept'
444
+ RECAL_TABLE = 'table_recalibrated'
445
+ AVG_PREDICTED_RISK = 'Average predict risk'
446
+ AVG_OBSERVED_RISK = 'Average observed risk'
447
+ NO_SUBJECTS = 'No. subjects'
448
+ AVG_OBSERVED_RISK_LB = 'Lower bound observed risk'
449
+ AVG_OBSERVED_RISK_UB = 'Upper bound observed risk'
450
+ CCC = 'calibration correlation coefficient'
451
+ CCC_CI = 'confidence interval'
452
+ CCC_COR = 'Pearson correlation coefficient'
453
+ CCC_BIAS = 'bias correction factor'
454
+ CCC_SCALE = 'scaling factor'
455
+ CCC_TRANS = 'translation constant'
456
+ CCC_METHOD_F = 'fisher'
457
+ CCC_METHOD_D = 'delta'
458
+ CCC_S = 'ccc'
459
+ CCC_S_CI = 'ccc_ci'
460
+ CCC_S_COR = 'pearson_correlation'
461
+ CCC_S_BIAS = 'bias_correction'
462
+ CCC_S_SCALE = 'scale_factor'
463
+ CCC_S_TRANS = 'translation_constant'
464
+
465
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
466
+ class NamesSklearnUtils(object):
467
+ '''
468
+ Module names for the sklearn_utils module.
469
+ '''
470
+ REPORT_SORT_ASC = 'ascending'
471
+ REPORT_SORT_DESC = 'descending'
472
+ REPORT_RANK = 'rank_test_score'
473
+ SEARCH_MEAN = 'mean_test_score'
474
+ SEARCH_PARAMS = 'params'
475
+ SEARCH_AVERAGE = 'average'
476
+ SEARCH_RANK = 'average_rank'
477
+
478
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
479
+ # error messages
480
+ class Error_MSG(object):
481
+ '''
482
+ A collection of error messages.
483
+ '''
484
+ INCORRECT_STRING_INPUT='`{}` is limited to the following values `{}`.'
485
+ INVALID_STRING = '`{}` should be limited to `{}`.'
486
+ INVALID_EXACT_LENGTH = '`{}` needs to contain exactly {} elements, not {}.'
487
+ DIFF_LENGTHS = ('`{0}` and `{1}`, should have the same number of elements, '
488
+ 'not: {2}, and {3}, respectively.')
489
+ NON_CONVERGENCE = '`{}` did not converge.'
490
+ FLOAT_LIMITS = '`{}` is constraint between `{}` and `{}`.'
491
+
File without changes