statslibx 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +5 -5
- statslibx/computacional.py +126 -2
- statslibx/datasets/__init__.py +27 -11
- statslibx/datasets/course_completion.csv +100001 -0
- statslibx/datasets/iris.csv +151 -0
- statslibx/datasets/penguins.csv +345 -0
- statslibx/datasets/sp500_companies.csv +504 -0
- statslibx/datasets/titanic.csv +419 -0
- statslibx/descriptive.py +74 -8
- statslibx/inferential.py +67 -6
- statslibx/utils.py +71 -13
- {statslibx-0.1.8.dist-info → statslibx-0.2.0.dist-info}/METADATA +5 -5
- statslibx-0.2.0.dist-info/RECORD +19 -0
- {statslibx-0.1.8.dist-info → statslibx-0.2.0.dist-info}/WHEEL +1 -1
- statslibx/probability.py +0 -2
- statslibx-0.1.8.dist-info/RECORD +0 -15
- {statslibx-0.1.8.dist-info → statslibx-0.2.0.dist-info}/entry_points.txt +0 -0
- {statslibx-0.1.8.dist-info → statslibx-0.2.0.dist-info}/top_level.txt +0 -0
statslibx/utils.py
CHANGED
|
@@ -11,23 +11,81 @@ from pathlib import Path
|
|
|
11
11
|
|
|
12
12
|
class UtilsStats:
|
|
13
13
|
"""
|
|
14
|
-
|
|
14
|
+
UtilsStats
|
|
15
|
+
A utility class for common statistical operations and visualization.
|
|
16
|
+
This class provides methods for data validation, basic statistical analysis,
|
|
17
|
+
and visualization of results. It also supports loading data directly from files.
|
|
18
|
+
>>> # Load data from a file
|
|
19
|
+
>>> data = utils.load_data("data.csv")
|
|
20
|
+
>>> utils.check_normality(data, column='age')
|
|
21
|
+
>>> # Analyze data from an array
|
|
22
|
+
Methods:
|
|
23
|
+
--------
|
|
24
|
+
_setup_plotting_style():
|
|
25
|
+
Configures default plotting styles for matplotlib.
|
|
15
26
|
|
|
16
|
-
|
|
17
|
-
|
|
27
|
+
set_plot_backend(backend: Literal['matplotlib', 'seaborn', 'plotly']):
|
|
28
|
+
Sets the default visualization backend.
|
|
18
29
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
30
|
+
set_default_figsize(figsize: Tuple[int, int]):
|
|
31
|
+
Sets the default figure size for plots.
|
|
32
|
+
|
|
33
|
+
set_save_fig_options(save_fig: Optional[bool] = False, fig_format: str = 'png',
|
|
34
|
+
fig_dpi: int = 300, figures_dir: str = 'figures'):
|
|
35
|
+
Configures options for saving figures.
|
|
36
|
+
|
|
37
|
+
load_data(path: Union[str, Path], **kwargs) -> pd.DataFrame:
|
|
38
|
+
Loads data from a file in various formats (CSV, Excel, JSON, etc.).
|
|
39
|
+
|
|
40
|
+
validate_dataframe(data: Union[pd.DataFrame, np.ndarray, list, str, Path]) -> pd.DataFrame:
|
|
41
|
+
Validates and converts data to a DataFrame. Also accepts file paths.
|
|
42
|
+
|
|
43
|
+
format_number(num: float, decimals: int = 6, scientific: bool = False) -> str:
|
|
44
|
+
Formats a number with specified decimal places.
|
|
45
|
+
|
|
46
|
+
check_normality(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
|
|
47
|
+
column: Optional[str] = None, alpha: float = 0.05) -> dict:
|
|
48
|
+
Checks if the data follows a normal distribution using the Shapiro-Wilk test.
|
|
49
|
+
|
|
50
|
+
calculate_confidence_intervals(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
|
|
51
|
+
column: Optional[str] = None, confidence_level: float = 0.95,
|
|
52
|
+
Calculates confidence intervals for the mean using parametric or bootstrap methods.
|
|
53
|
+
|
|
54
|
+
detect_outliers(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
|
|
55
|
+
column: Optional[str] = None, method: Literal['iqr', 'zscore', 'isolation_forest'] = 'iqr',
|
|
56
|
+
Detects outliers using different methods: 'iqr', 'zscore', or 'isolation_forest'.
|
|
57
|
+
|
|
58
|
+
calculate_effect_size(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path] = None,
|
|
59
|
+
Calculates the effect size between two groups using Cohen's d or Hedges' g.
|
|
60
|
+
|
|
61
|
+
plot_distribution(data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
|
|
62
|
+
column: Optional[str] = None, plot_type: Literal['hist', 'kde', 'box', 'violin', 'all'] = 'hist',
|
|
63
|
+
bins: int = 30, figsize: Optional[Tuple[int, int]] = None,
|
|
64
|
+
save_fig: Optional[bool] = False, filename: Optional[str] = None, **kwargs):
|
|
65
|
+
Plots the distribution of a variable using various plot types and backends.
|
|
66
|
+
|
|
67
|
+
plot_correlation_matrix(data: Union[pd.DataFrame, str, Path],
|
|
68
|
+
filename: Optional[str] = None, **kwargs):
|
|
69
|
+
Visualizes the correlation matrix using a heatmap.
|
|
70
|
+
|
|
71
|
+
plot_scatter_matrix(data: Union[pd.DataFrame, str, Path],
|
|
72
|
+
filename: Optional[str] = None, **kwargs):
|
|
73
|
+
Creates a scatter matrix (pairplot) for visualizing relationships between variables.
|
|
74
|
+
|
|
75
|
+
plot_distribution_with_ci(data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
|
|
76
|
+
column: Optional[str] = None, confidence_level: float = 0.95,
|
|
77
|
+
ci_method: str = 'parametric', bins: int = 30,
|
|
78
|
+
filename: Optional[str] = None, **kwargs) -> plt.Figure:
|
|
79
|
+
Plots the distribution of a variable with confidence intervals.
|
|
80
|
+
|
|
81
|
+
get_descriptive_stats(data, column=None) -> dict:
|
|
82
|
+
Returns a dictionary of descriptive statistics for the given data.
|
|
83
|
+
|
|
84
|
+
help():
|
|
85
|
+
Displays a complete help guide for the UtilsStats class.
|
|
29
86
|
"""
|
|
30
87
|
|
|
88
|
+
|
|
31
89
|
def __init__(self):
|
|
32
90
|
"""Inicializar la clase utilitaria"""
|
|
33
91
|
self._plot_backend = 'seaborn'
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: StatsLibx - Librería de estadística descriptiva
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
|
|
5
5
|
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Classifier: Development Status :: 3 - Alpha
|
|
@@ -28,9 +28,9 @@ Requires-Dist: statsmodels>=0.13; extra == "advanced"
|
|
|
28
28
|
|
|
29
29
|
StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
|
|
30
30
|
|
|
31
|
-
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva
|
|
31
|
+
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
32
32
|
|
|
33
|
-
GitHub del Proyecto: [
|
|
33
|
+
GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
|
|
34
34
|
|
|
35
35
|
## ✨ Características principales
|
|
36
36
|
|
|
@@ -55,7 +55,7 @@ stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
|
|
|
55
55
|
|
|
56
56
|
stats.summary()
|
|
57
57
|
```
|
|
58
|
-
Para ver mas funciones: [
|
|
58
|
+
Para ver mas funciones: [https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
|
|
59
59
|
|
|
60
60
|
## 📦 Instalación
|
|
61
61
|
```bash
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
statslibx/__init__.py,sha256=YUKUQhO1vUYvcUQmlz1ZtvU6MWNZERdAG55-trf25ZY,1500
|
|
2
|
+
statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
|
|
3
|
+
statslibx/computacional.py,sha256=z46bRUiH9a3ajxVTYE2sGO-pg20L87MdOKM3Y_Tcq44,4062
|
|
4
|
+
statslibx/descriptive.py,sha256=GrUR4QfstUeLTXdxKSZsmKaOJkDso-QH51hlwTUaubA,63513
|
|
5
|
+
statslibx/inferential.py,sha256=xiJCppezhWK4TrAARdOufuxjZcoGKsfHtRujKfuXbgg,83068
|
|
6
|
+
statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
|
|
7
|
+
statslibx/utils.py,sha256=gWXduW8LMN1q4ZwNggmodRsT9Rcsot-S82NsQiqrjUo,69992
|
|
8
|
+
statslibx/datasets/__init__.py,sha256=wiSp4qGwpILCiaN5vVuwWgKnbdELpbi5pxnNB9Wg2nI,7282
|
|
9
|
+
statslibx/datasets/course_completion.csv,sha256=jaqyxAh4YCsYuH5OFsjvGV7KUyM_7vQt6LgnqnNAFsI,22422135
|
|
10
|
+
statslibx/datasets/iris.csv,sha256=xSdC5QMVqZ-Vajg_rt91dVUmdfZAnvD5pHB23QhHmTA,3858
|
|
11
|
+
statslibx/datasets/penguins.csv,sha256=4HY2vYr3QmAJnqL4Z44uq7813vV5lAzHb2cGHuFsBsE,13478
|
|
12
|
+
statslibx/datasets/sp500_companies.csv,sha256=WKS72YOGnAbyLR6kD95fOpIYZt5oXGjPryyFVqLRF_k,803820
|
|
13
|
+
statslibx/datasets/titanic.csv,sha256=5seOS8ybyBMBCCWhgKZrsbu06m_OWyKtD9l0YXOImXU,29474
|
|
14
|
+
statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
|
|
15
|
+
statslibx-0.2.0.dist-info/METADATA,sha256=w7f-3RgizY3PHUSxoBl6YuHImHz2qFyillhZk82WUfE,2993
|
|
16
|
+
statslibx-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
17
|
+
statslibx-0.2.0.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
|
|
18
|
+
statslibx-0.2.0.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
|
|
19
|
+
statslibx-0.2.0.dist-info/RECORD,,
|
statslibx/probability.py
DELETED
statslibx-0.1.8.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
statslibx/__init__.py,sha256=KeEoEZVPUR_PZACWoCpS_2l6luPbEee7VRlcrLgbKQQ,1490
|
|
2
|
-
statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
|
|
3
|
-
statslibx/computacional.py,sha256=Nv8wk67RUuuv15oBRu2XPp0_k7O4ZgmT51vThH2OuFk,35
|
|
4
|
-
statslibx/descriptive.py,sha256=r5D4reP1Cdzsu1tSLmf2OEaFAkGvHSd3FIYfUclEaRU,60178
|
|
5
|
-
statslibx/inferential.py,sha256=H0R6g3dJFk-53m1bKldrXObgk0SSmpcdqQg_tIgRKBI,79169
|
|
6
|
-
statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
|
|
7
|
-
statslibx/probability.py,sha256=MUME4eXWzbdU93F-QdKwmmyd9IgZK1flFUYQHitp10o,33
|
|
8
|
-
statslibx/utils.py,sha256=iJzt0jDacaoUfjtp4dU2PFuIBEheMP9Qrq-HnLTW_Qw,66515
|
|
9
|
-
statslibx/datasets/__init__.py,sha256=GuUl_7-d6YanuDFht1dwB1bFrqjShvKh1m-iRYAbYZE,6875
|
|
10
|
-
statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
|
|
11
|
-
statslibx-0.1.8.dist-info/METADATA,sha256=uyhAd0xghADIfVee7WzDp76nLA2snjqQcNayio_UrIc,2835
|
|
12
|
-
statslibx-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
statslibx-0.1.8.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
|
|
14
|
-
statslibx-0.1.8.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
|
|
15
|
-
statslibx-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|