statslibx 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +2 -2
- statslibx/datasets/__init__.py +4 -3
- statslibx/descriptive.py +10 -63
- statslibx/inferential.py +7 -49
- {statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/METADATA +1 -2
- {statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/RECORD +10 -9
- {statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/top_level.txt +1 -0
- tests/test1.py +14 -0
- {statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/WHEEL +0 -0
- {statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/entry_points.txt +0 -0
statslibx/__init__.py
CHANGED
statslibx/datasets/__init__.py
CHANGED
|
@@ -61,8 +61,9 @@ def load_dataset(
|
|
|
61
61
|
name: str,
|
|
62
62
|
backend: Literal["pandas", "polars"] = "pandas",
|
|
63
63
|
return_X_y: Optional[Tuple[List[str], str]] = None,
|
|
64
|
+
sep: str = ",",
|
|
64
65
|
save: Optional[bool] = False,
|
|
65
|
-
filename: Optional[str] = None
|
|
66
|
+
filename: Optional[str] = None,
|
|
66
67
|
) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
|
|
67
68
|
"""
|
|
68
69
|
Carga un dataset interno del paquete.
|
|
@@ -101,7 +102,7 @@ def load_dataset(
|
|
|
101
102
|
data_bytes = pkgutil.get_data("statslibx.datasets", name)
|
|
102
103
|
if data_bytes is not None:
|
|
103
104
|
df = (
|
|
104
|
-
pd.read_csv(io.BytesIO(data_bytes))
|
|
105
|
+
pd.read_csv(io.BytesIO(data_bytes), sep=sep)
|
|
105
106
|
if backend == "pandas"
|
|
106
107
|
else pl.read_csv(io.BytesIO(data_bytes))
|
|
107
108
|
)
|
|
@@ -112,7 +113,7 @@ def load_dataset(
|
|
|
112
113
|
if df is None:
|
|
113
114
|
try:
|
|
114
115
|
df = (
|
|
115
|
-
pd.read_csv(name)
|
|
116
|
+
pd.read_csv(name, sep=sep)
|
|
116
117
|
if backend == "pandas"
|
|
117
118
|
else pl.read_csv(name)
|
|
118
119
|
)
|
statslibx/descriptive.py
CHANGED
|
@@ -3,7 +3,6 @@ import pandas as pd
|
|
|
3
3
|
import polars as pl
|
|
4
4
|
from typing import Optional, Union, Literal, List
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
import flet as ft
|
|
7
6
|
import os
|
|
8
7
|
import matplotlib.pyplot as plt
|
|
9
8
|
import seaborn as sns
|
|
@@ -81,10 +80,6 @@ class DescriptiveStats:
|
|
|
81
80
|
"""
|
|
82
81
|
|
|
83
82
|
def __init__(self, data: Union[pd.DataFrame, np.ndarray],
|
|
84
|
-
sep: str = None,
|
|
85
|
-
decimal: str = None,
|
|
86
|
-
thousand: str = None,
|
|
87
|
-
backend: Literal['pandas', 'polars'] = 'pandas',
|
|
88
83
|
lang: Literal['es-ES', 'en-US'] = 'es-ES'):
|
|
89
84
|
"""
|
|
90
85
|
# Initialize DataFrame
|
|
@@ -92,9 +87,6 @@ class DescriptiveStats:
|
|
|
92
87
|
## **Parameters:**
|
|
93
88
|
|
|
94
89
|
- **data** : Data to analyze
|
|
95
|
-
- **sep** : Column separator
|
|
96
|
-
- **decimal** : Decimal separator
|
|
97
|
-
- **thousand** : Thousand separator
|
|
98
90
|
- **backend** : 'pandas' or 'polars' for processing
|
|
99
91
|
(Proximamente estara habilitado polars para big data)
|
|
100
92
|
|
|
@@ -104,72 +96,26 @@ class DescriptiveStats:
|
|
|
104
96
|
stats = DescriptiveStats(data)
|
|
105
97
|
``
|
|
106
98
|
"""
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
99
|
+
if isinstance(data, pd.DataFrame):
|
|
100
|
+
self.data = data
|
|
101
|
+
elif isinstance(data, np.ndarray):
|
|
102
|
+
self.data = pd.DataFrame(data)
|
|
103
|
+
else:
|
|
112
104
|
raise TypeError(
|
|
113
|
-
"
|
|
105
|
+
"Data must be a pandas.DataFrame or numpy.ndarray."
|
|
114
106
|
)
|
|
115
107
|
|
|
116
|
-
|
|
117
108
|
if isinstance(data, np.ndarray):
|
|
118
109
|
if data.ndim == 1:
|
|
119
110
|
data = pd.DataFrame({'var': data})
|
|
120
111
|
else:
|
|
121
|
-
data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])]
|
|
112
|
+
data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])],
|
|
113
|
+
sep=self.sep) \
|
|
122
114
|
if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
|
|
123
|
-
|
|
124
|
-
self.data = data
|
|
125
|
-
self.backend = backend
|
|
115
|
+
|
|
126
116
|
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
127
|
-
self.sep = sep
|
|
128
|
-
self.decimal = decimal
|
|
129
|
-
self.thousand = thousand
|
|
130
117
|
self.lang = lang
|
|
131
|
-
|
|
132
|
-
@classmethod
|
|
133
|
-
def from_file(self, path: str):
|
|
134
|
-
"""
|
|
135
|
-
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
136
|
-
Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
137
|
-
Automatic file upload and returns Intelligence instance.
|
|
138
|
-
Supports CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
139
|
-
|
|
140
|
-
Parametros / Parameters:
|
|
141
|
-
------------------------
|
|
142
|
-
path : str
|
|
143
|
-
Ruta del archivo
|
|
144
|
-
File path
|
|
145
|
-
"""
|
|
146
|
-
if not os.path.exists(path):
|
|
147
|
-
raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
|
|
148
|
-
|
|
149
|
-
ext = os.path.splitext(path)[1].lower()
|
|
150
|
-
|
|
151
|
-
if ext == ".csv":
|
|
152
|
-
df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
153
|
-
|
|
154
|
-
elif ext in [".xlsx", ".xls"]:
|
|
155
|
-
df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
|
|
156
|
-
|
|
157
|
-
elif ext in [".txt", ".tsv"]:
|
|
158
|
-
df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
159
|
-
|
|
160
|
-
elif ext == ".json":
|
|
161
|
-
df = pd.read_json(path)
|
|
162
118
|
|
|
163
|
-
elif ext == ".parquet":
|
|
164
|
-
df = pd.read_parquet(path)
|
|
165
|
-
|
|
166
|
-
elif ext == ".feather":
|
|
167
|
-
df = pd.read_feather(path)
|
|
168
|
-
|
|
169
|
-
else:
|
|
170
|
-
raise ValueError(f"Formato no soportado / Unsupported format: {ext}")
|
|
171
|
-
|
|
172
|
-
return DescriptiveStats(df)
|
|
173
119
|
|
|
174
120
|
# ============= MÉTODOS UNIVARIADOS =============
|
|
175
121
|
|
|
@@ -1262,3 +1208,4 @@ class LinearRegressionResult:
|
|
|
1262
1208
|
plt.ylabel("Residuos")
|
|
1263
1209
|
plt.title("Residuos vs Predicciones")
|
|
1264
1210
|
plt.show()
|
|
1211
|
+
|
statslibx/inferential.py
CHANGED
|
@@ -69,8 +69,6 @@ class InferentialStats:
|
|
|
69
69
|
"""
|
|
70
70
|
|
|
71
71
|
def __init__(self, data: Union[pd.DataFrame, np.ndarray],
|
|
72
|
-
backend: Literal['pandas', 'polars'] = 'pandas',
|
|
73
|
-
sep: str = None, decimal: str = None, thousand: str = None,
|
|
74
72
|
lang: Literal['es-ES', 'en-US'] = 'es-ES'):
|
|
75
73
|
"""
|
|
76
74
|
Initialize DataFrame
|
|
@@ -79,17 +77,16 @@ class InferentialStats:
|
|
|
79
77
|
-----------
|
|
80
78
|
data : DataFrame o ndarray
|
|
81
79
|
Data to analyze
|
|
82
|
-
backend : str
|
|
83
|
-
'pandas' or 'polars' for processing
|
|
84
80
|
"""
|
|
85
81
|
|
|
86
|
-
if isinstance(data,
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
82
|
+
if isinstance(data, pd.DataFrame):
|
|
83
|
+
self.data = data
|
|
84
|
+
elif isinstance(data, np.ndarray):
|
|
85
|
+
self.data = pd.DataFrame(data)
|
|
86
|
+
else:
|
|
90
87
|
raise TypeError(
|
|
91
|
-
"
|
|
92
|
-
)
|
|
88
|
+
"Data must be a pandas.DataFrame or numpy.ndarray."
|
|
89
|
+
)
|
|
93
90
|
|
|
94
91
|
if isinstance(data, np.ndarray):
|
|
95
92
|
if data.ndim == 1:
|
|
@@ -98,48 +95,9 @@ class InferentialStats:
|
|
|
98
95
|
data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
|
|
99
96
|
|
|
100
97
|
self.data = data
|
|
101
|
-
self.backend = backend
|
|
102
98
|
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
103
|
-
self.sep = sep
|
|
104
|
-
self.decimal = decimal
|
|
105
|
-
self.thousand = thousand
|
|
106
99
|
self.lang = lang
|
|
107
100
|
|
|
108
|
-
@classmethod
|
|
109
|
-
def from_file(path: str):
|
|
110
|
-
"""
|
|
111
|
-
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
112
|
-
Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
113
|
-
"""
|
|
114
|
-
|
|
115
|
-
if not os.path.exists(path):
|
|
116
|
-
raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
|
|
117
|
-
|
|
118
|
-
ext = os.path.splitext(path)[1].lower()
|
|
119
|
-
|
|
120
|
-
if ext == ".csv":
|
|
121
|
-
df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
122
|
-
|
|
123
|
-
elif ext in [".xlsx", ".xls"]:
|
|
124
|
-
df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
|
|
125
|
-
|
|
126
|
-
elif ext in [".txt", ".tsv"]:
|
|
127
|
-
df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
128
|
-
|
|
129
|
-
elif ext == ".json":
|
|
130
|
-
df = pd.read_json(path)
|
|
131
|
-
|
|
132
|
-
elif ext == ".parquet":
|
|
133
|
-
df = pd.read_parquet(path)
|
|
134
|
-
|
|
135
|
-
elif ext == ".feather":
|
|
136
|
-
df = pd.read_feather(path)
|
|
137
|
-
|
|
138
|
-
else:
|
|
139
|
-
raise ValueError(f"Formato no soportado: {ext}")
|
|
140
|
-
|
|
141
|
-
return InferentialStats(df)
|
|
142
|
-
|
|
143
101
|
# ============= INTERVALOS DE CONFIANZA =============
|
|
144
102
|
|
|
145
103
|
def confidence_interval(self, column: str, confidence: float = 0.95,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
|
|
5
5
|
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -16,7 +16,6 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
16
16
|
Requires-Python: >=3.8
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
Requires-Dist: pandas>=1.5
|
|
19
|
-
Requires-Dist: polars>=0.20
|
|
20
19
|
Provides-Extra: viz
|
|
21
20
|
Requires-Dist: seaborn>=0.11; extra == "viz"
|
|
22
21
|
Requires-Dist: plotly>=5.0; extra == "viz"
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
statslibx/__init__.py,sha256=
|
|
1
|
+
statslibx/__init__.py,sha256=82KG6z_wJZf_ZF8jpViRvtzn4qV9uEZd8a3sRUucKLE,1500
|
|
2
2
|
statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
|
|
3
3
|
statslibx/computacional.py,sha256=z46bRUiH9a3ajxVTYE2sGO-pg20L87MdOKM3Y_Tcq44,4062
|
|
4
|
-
statslibx/descriptive.py,sha256=
|
|
5
|
-
statslibx/inferential.py,sha256=
|
|
4
|
+
statslibx/descriptive.py,sha256=QLIzPB-pEC2BXCIUsjpDyU7peHAs6fRduPukj1gA160,61671
|
|
5
|
+
statslibx/inferential.py,sha256=_mUzX-Uo2Y55zVTZbQnIRloqKcHjh40djLW1J12HQPU,81617
|
|
6
6
|
statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
|
|
7
7
|
statslibx/utils.py,sha256=gWXduW8LMN1q4ZwNggmodRsT9Rcsot-S82NsQiqrjUo,69992
|
|
8
|
-
statslibx/datasets/__init__.py,sha256=
|
|
8
|
+
statslibx/datasets/__init__.py,sha256=KI1N2ByjWpmr9F9_1CDDHEnZ-kDJEKmZON7_4E6Jf_4,7322
|
|
9
9
|
statslibx/datasets/course_completion.csv,sha256=jaqyxAh4YCsYuH5OFsjvGV7KUyM_7vQt6LgnqnNAFsI,22422135
|
|
10
10
|
statslibx/datasets/iris.csv,sha256=xSdC5QMVqZ-Vajg_rt91dVUmdfZAnvD5pHB23QhHmTA,3858
|
|
11
11
|
statslibx/datasets/penguins.csv,sha256=4HY2vYr3QmAJnqL4Z44uq7813vV5lAzHb2cGHuFsBsE,13478
|
|
12
12
|
statslibx/datasets/sp500_companies.csv,sha256=WKS72YOGnAbyLR6kD95fOpIYZt5oXGjPryyFVqLRF_k,803820
|
|
13
13
|
statslibx/datasets/titanic.csv,sha256=5seOS8ybyBMBCCWhgKZrsbu06m_OWyKtD9l0YXOImXU,29474
|
|
14
14
|
statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
|
|
15
|
-
|
|
16
|
-
statslibx-0.2.
|
|
17
|
-
statslibx-0.2.
|
|
18
|
-
statslibx-0.2.
|
|
19
|
-
statslibx-0.2.
|
|
15
|
+
tests/test1.py,sha256=zGaLe9cKLCLrgNbjo-WeDGIjdH4bODtm1_juOn96Mtk,306
|
|
16
|
+
statslibx-0.2.1.dist-info/METADATA,sha256=mNVj_Qo9pROrznPaOkCvWBH7ypw_0j0p9WdCWHgFt5o,2964
|
|
17
|
+
statslibx-0.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
18
|
+
statslibx-0.2.1.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
|
|
19
|
+
statslibx-0.2.1.dist-info/top_level.txt,sha256=Mz7hCT3d_WEbs8d6hWac4m3fkI4RlxUkXnHYt967KG8,16
|
|
20
|
+
statslibx-0.2.1.dist-info/RECORD,,
|
tests/test1.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from statslibx import load_dataset, DescriptiveStats, InferentialStats
|
|
2
|
+
import pandas as pd
|
|
3
|
+
# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
|
|
4
|
+
|
|
5
|
+
df = load_dataset(r"tests\bank (1).csv", sep=";")
|
|
6
|
+
stats = DescriptiveStats(df)
|
|
7
|
+
print(stats.data)
|
|
8
|
+
|
|
9
|
+
infer = InferentialStats(df)
|
|
10
|
+
print(infer.data)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
File without changes
|
|
File without changes
|