statslibx 0.2.8__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx-0.2.9/PKG-INFO +329 -0
- statslibx-0.2.9/README.md +293 -0
- statslibx-0.2.9/pyproject.toml +58 -0
- statslibx-0.2.9/statslibx/__init__.py +81 -0
- statslibx-0.2.9/statslibx/_stats_utils.py +172 -0
- statslibx-0.2.9/statslibx/backend.py +327 -0
- statslibx-0.2.9/statslibx/cli.py +265 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/computational.py +1135 -1059
- statslibx-0.2.9/statslibx/datasets/__init__.py +266 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/descriptive.py +1276 -1200
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/inferential.py +1577 -1497
- statslibx-0.2.9/statslibx/preprocessing/__init__.py +521 -0
- statslibx-0.2.9/statslibx/py.typed +1 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/utils.py +1371 -1406
- statslibx-0.2.9/statslibx/viewx/__init__.py +16 -0
- statslibx-0.2.9/statslibx/viewx/adapters.py +72 -0
- statslibx-0.2.9/statslibx.egg-info/PKG-INFO +329 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx.egg-info/SOURCES.txt +6 -1
- statslibx-0.2.9/statslibx.egg-info/requires.txt +27 -0
- statslibx-0.2.9/statslibx.egg-info/top_level.txt +1 -0
- statslibx-0.2.9/tests/test_smoke.py +170 -0
- statslibx-0.2.8/PKG-INFO +0 -125
- statslibx-0.2.8/README.md +0 -90
- statslibx-0.2.8/pyproject.toml +0 -56
- statslibx-0.2.8/statslibx/__init__.py +0 -52
- statslibx-0.2.8/statslibx/cli.py +0 -241
- statslibx-0.2.8/statslibx/datasets/__init__.py +0 -280
- statslibx-0.2.8/statslibx/preprocessing/__init__.py +0 -298
- statslibx-0.2.8/statslibx/viewx/__init__.py +0 -14
- statslibx-0.2.8/statslibx.egg-info/PKG-INFO +0 -125
- statslibx-0.2.8/statslibx.egg-info/requires.txt +0 -17
- statslibx-0.2.8/statslibx.egg-info/top_level.txt +0 -5
- {statslibx-0.2.8 → statslibx-0.2.9}/MANIFEST.in +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/setup.cfg +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/datasets/Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/datasets/course_completion.csv +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/datasets/iris.csv +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/datasets/penguins.csv +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/datasets/sp500_companies.csv +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx/datasets/titanic.csv +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx.egg-info/dependency_links.txt +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/statslibx.egg-info/entry_points.txt +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/tests/test1.py +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/tests/test2.py +0 -0
- {statslibx-0.2.8 → statslibx-0.2.9}/tests/test3.py +0 -0
statslibx-0.2.9/PKG-INFO
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: statslibx
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: Statistical library for descriptive, inferential, and computational analysis
|
|
5
|
+
Author: Emmanuel Ascendra
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/GhostAnalyst30/StatsLibX
|
|
8
|
+
Project-URL: Documentation, https://ghostanalyst30.github.io/StatsLibX/
|
|
9
|
+
Keywords: statistics,data-science,pandas,polars
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: numpy>=1.21
|
|
15
|
+
Requires-Dist: pandas>=1.3
|
|
16
|
+
Requires-Dist: polars>=0.20
|
|
17
|
+
Requires-Dist: scipy>=1.7
|
|
18
|
+
Requires-Dist: matplotlib>=3.4
|
|
19
|
+
Requires-Dist: seaborn>=0.11
|
|
20
|
+
Requires-Dist: plotly>=5.0
|
|
21
|
+
Requires-Dist: sympy>=1.9
|
|
22
|
+
Provides-Extra: viewx
|
|
23
|
+
Requires-Dist: viewx>=0.2.3; extra == "viewx"
|
|
24
|
+
Provides-Extra: statsmodels
|
|
25
|
+
Requires-Dist: statsmodels>=0.13; extra == "statsmodels"
|
|
26
|
+
Provides-Extra: sklearn
|
|
27
|
+
Requires-Dist: scikit-learn>=1.0; extra == "sklearn"
|
|
28
|
+
Provides-Extra: excel
|
|
29
|
+
Requires-Dist: openpyxl>=3.0; extra == "excel"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: polars>=0.20; extra == "all"
|
|
32
|
+
Requires-Dist: viewx>=0.2.3; extra == "all"
|
|
33
|
+
Requires-Dist: statsmodels>=0.13; extra == "all"
|
|
34
|
+
Requires-Dist: scikit-learn>=1.0; extra == "all"
|
|
35
|
+
Requires-Dist: openpyxl>=3.0; extra == "all"
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
<img src="https://raw.githubusercontent.com/GhostAnalyst30/StatsLibX/main/StatsLibX.png" alt="StatsLibX" width="420"/>
|
|
39
|
+
</p>
|
|
40
|
+
|
|
41
|
+
<h1 align="center">StatsLibX</h1>
|
|
42
|
+
|
|
43
|
+
<p align="center">
|
|
44
|
+
<strong>Estadística descriptiva, inferencial y computacional para Python — con pandas, polars y ViewX.</strong>
|
|
45
|
+
</p>
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<a href="https://pypi.org/project/statslibx/"><img src="https://img.shields.io/pypi/v/statslibx?label=PyPI&color=7c6af7" alt="PyPI version"/></a>
|
|
49
|
+
<a href="https://pypi.org/project/statslibx/"><img src="https://img.shields.io/pypi/pyversions/statslibx?label=Python&color=4fd1c5" alt="Python versions"/></a>
|
|
50
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License MIT"/></a>
|
|
51
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX"><img src="https://img.shields.io/github/stars/GhostAnalyst30/StatsLibX?style=social" alt="GitHub stars"/></a>
|
|
52
|
+
</p>
|
|
53
|
+
|
|
54
|
+
<p align="center">
|
|
55
|
+
<a href="https://ghostanalyst30.github.io/StatsLibX/">Documentación</a> ·
|
|
56
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb">Notebook API</a> ·
|
|
57
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX/issues">Issues</a> ·
|
|
58
|
+
<a href="https://ghostanalyst30.github.io/ViewX/">ViewX</a>
|
|
59
|
+
</p>
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
**StatsLibX** es una librería de Python moderna para análisis estadístico y ciencia de datos. Ofrece una API clara basada en clases, soporte dual **pandas / polars**, datasets embebidos, preprocesamiento, estadística computacional y un puente de reportes con **ViewX**.
|
|
64
|
+
|
|
65
|
+
> **Versión actual:** `0.2.9` · **Autor:** Emmanuel Ascendra
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Novedades en v0.2.9
|
|
70
|
+
|
|
71
|
+
| Área | Cambio |
|
|
72
|
+
|------|--------|
|
|
73
|
+
| **Arquitectura** | Capa `Backend` unificada en todos los módulos de dominio |
|
|
74
|
+
| **Polars** | `load_dataset(backend="polars")` y constructores compatibles con `pl.DataFrame` |
|
|
75
|
+
| **API** | `DescriptiveStats.from_file()`, `InferentialStats.from_file()`, `ComputationalStats.help()` |
|
|
76
|
+
| **Preprocessing** | `clean_data()` ampliado (escalado, outliers, transforms) y `change_dtypes()` con polars |
|
|
77
|
+
| **ViewX** | `to_report_data()` — serializa resultados statslibx para `Report` / `HTML` |
|
|
78
|
+
| **Packaging** | `pyproject.toml`, extras opcionales, CLI `statslibx`, marcador `py.typed` |
|
|
79
|
+
| **Docs web** | Sitio Next.js v0.2.9, playground Pyodide alineado con la API real |
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Instalación
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install statslibx
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Extras opcionales
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
# ViewX (reportes HTML, slides, matrices)
|
|
93
|
+
pip install statslibx[viewx]
|
|
94
|
+
|
|
95
|
+
# Regresión avanzada (statsmodels / sklearn)
|
|
96
|
+
pip install statslibx[statsmodels,sklearn]
|
|
97
|
+
|
|
98
|
+
# Excel + todo incluido
|
|
99
|
+
pip install statslibx[excel]
|
|
100
|
+
pip install statslibx[all]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
| Extra | Paquetes |
|
|
104
|
+
|-------|----------|
|
|
105
|
+
| `viewx` | viewx ≥ 0.2.3 |
|
|
106
|
+
| `statsmodels` | statsmodels ≥ 0.13 |
|
|
107
|
+
| `sklearn` | scikit-learn ≥ 1.0 |
|
|
108
|
+
| `excel` | openpyxl ≥ 3.0 |
|
|
109
|
+
| `all` | Todos los anteriores |
|
|
110
|
+
|
|
111
|
+
**Requisitos:** Python ≥ 3.9 · numpy · pandas · scipy · matplotlib · seaborn · plotly · sympy
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Inicio rápido
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
import statslibx as slx
|
|
119
|
+
from statslibx import DescriptiveStats, InferentialStats, ComputationalStats, Preprocessing
|
|
120
|
+
from statslibx.datasets import load_iris, generate_dataset
|
|
121
|
+
|
|
122
|
+
print(f"StatsLibX v{slx.__version__}")
|
|
123
|
+
|
|
124
|
+
# Cargar dataset embebido (iris, penguins, titanic)
|
|
125
|
+
iris = load_iris()
|
|
126
|
+
print(iris.head())
|
|
127
|
+
|
|
128
|
+
# Estadística descriptiva
|
|
129
|
+
ds = DescriptiveStats(iris)
|
|
130
|
+
print(ds.mean("sepal_length"))
|
|
131
|
+
print(ds.summary())
|
|
132
|
+
|
|
133
|
+
# Prueba inferencial
|
|
134
|
+
inf = InferentialStats(iris)
|
|
135
|
+
print(inf.t_test_1sample("sepal_length", popmean=5.8))
|
|
136
|
+
|
|
137
|
+
# Desde archivo
|
|
138
|
+
stats = DescriptiveStats.from_file("mi_datos.csv")
|
|
139
|
+
|
|
140
|
+
# Datos sintéticos
|
|
141
|
+
schema = {
|
|
142
|
+
"age": {"dist": "normal", "mean": 35, "std": 10, "type": "int"},
|
|
143
|
+
"group": {"dist": "categorical", "choices": ["A", "B", "C"]},
|
|
144
|
+
}
|
|
145
|
+
df = generate_dataset(n_rows=500, schema=schema, seed=42)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Motores de datos (pandas / polars)
|
|
149
|
+
|
|
150
|
+
Todas las clases que reciben DataFrames soportan el parámetro `backend`:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from statslibx import DescriptiveStats, InferentialStats, ComputationalStats, Preprocessing
|
|
154
|
+
|
|
155
|
+
df = load_iris()
|
|
156
|
+
|
|
157
|
+
# Auto-detecta: pandas DataFrame → pandas, polars DataFrame → polars
|
|
158
|
+
DescriptiveStats(df)
|
|
159
|
+
InferentialStats(df)
|
|
160
|
+
ComputationalStats(df)
|
|
161
|
+
Preprocessing(df)
|
|
162
|
+
|
|
163
|
+
# Forzar motor polars (convierte pandas → polars internamente)
|
|
164
|
+
DescriptiveStats(df, backend="polars")
|
|
165
|
+
InferentialStats(df, backend="polars")
|
|
166
|
+
ComputationalStats(df, backend="polars")
|
|
167
|
+
Preprocessing(df, backend="polars")
|
|
168
|
+
|
|
169
|
+
# Forzar motor pandas (convierte polars → pandas)
|
|
170
|
+
# InferentialStats(pl_df, backend="pandas")
|
|
171
|
+
|
|
172
|
+
# Desde archivo
|
|
173
|
+
DescriptiveStats.from_file("datos.csv", backend="polars")
|
|
174
|
+
InferentialStats.from_file("datos.csv", backend="polars")
|
|
175
|
+
ComputationalStats.from_file("datos.csv", backend="polars")
|
|
176
|
+
Preprocessing.from_file("datos.csv", backend="polars")
|
|
177
|
+
|
|
178
|
+
# Inspeccionar motor activo
|
|
179
|
+
stats = DescriptiveStats(df, backend="polars")
|
|
180
|
+
print(stats.backend) # "polars"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Carga directa con polars:
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from statslibx.datasets import load_dataset
|
|
187
|
+
|
|
188
|
+
df = load_dataset("iris.csv", backend="polars") # requiere pip install polars
|
|
189
|
+
stats = DescriptiveStats(df)
|
|
190
|
+
print(stats.backend) # "polars" (auto-detectado)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Módulos
|
|
196
|
+
|
|
197
|
+
| Clase / Módulo | Descripción |
|
|
198
|
+
|----------------|-------------|
|
|
199
|
+
| **`DescriptiveStats`** | Media, mediana, varianza, correlación, regresión lineal, outliers, resúmenes |
|
|
200
|
+
| **`InferentialStats`** | t-tests, ANOVA, chi-cuadrado, intervalos de confianza, normalidad |
|
|
201
|
+
| **`ComputationalStats`** | Regresión polinomial, bootstrap, k-means, interpolación, correlación |
|
|
202
|
+
| **`Preprocessing`** | Limpieza, nulos, escalado, outliers, calidad de datos, dtypes |
|
|
203
|
+
| **`UtilsStats`** | Carga de archivos, visualización (matplotlib / seaborn / plotly), effect size |
|
|
204
|
+
| **`datasets`** | `load_dataset`, `load_iris`, `load_penguins`, `generate_dataset` |
|
|
205
|
+
| **`Backend`** | Abstracción pandas / polars (`statslibx.backend`) |
|
|
206
|
+
| **`viewx`** | `HTML`, `Slides`, `Report`, `DataMatrix`, `to_report_data` |
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Integración ViewX
|
|
211
|
+
|
|
212
|
+
StatsLibX se conecta con [ViewX](https://ghostanalyst30.github.io/ViewX/) para generar reportes y visualizaciones a partir de resultados estadísticos.
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
from statslibx import DescriptiveStats, Report, to_report_data
|
|
216
|
+
|
|
217
|
+
df = load_iris()
|
|
218
|
+
summary = DescriptiveStats(df).summary()
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
<p align="center">
|
|
222
|
+
<img src="https://raw.githubusercontent.com/GhostAnalyst30/ViewX/main/images_for_git/DashBoard_Example.png" alt="ViewX example" width="600"/>
|
|
223
|
+
</p>
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
pip install statslibx[viewx]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## CLI — Terminal
|
|
232
|
+
|
|
233
|
+
StatsLibX incluye una interfaz de línea de comandos para explorar CSV sin escribir código.
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
statslibx data iris.csv --summary --types --missing
|
|
237
|
+
statslibx describe iris.csv --numeric
|
|
238
|
+
statslibx describe iris.csv --categorical
|
|
239
|
+
statslibx quality iris.csv --verbose
|
|
240
|
+
statslibx preview iris.csv -n 10
|
|
241
|
+
statslibx info iris.csv --detailed
|
|
242
|
+
statslibx --help
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Estadística computacional
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from statslibx import ComputationalStats
|
|
251
|
+
|
|
252
|
+
cs = ComputationalStats(df, seed=42)
|
|
253
|
+
|
|
254
|
+
# Regresión con términos de interacción
|
|
255
|
+
model = cs.regression(X=["age", "score"], y="income", interaction_terms=True)
|
|
256
|
+
print(model.get_formula())
|
|
257
|
+
print(model.summary())
|
|
258
|
+
|
|
259
|
+
# Bootstrap
|
|
260
|
+
boot = cs.bootstrapping("income", n_samples=1000, statistic="mean")
|
|
261
|
+
print(boot.summary())
|
|
262
|
+
|
|
263
|
+
# Clustering
|
|
264
|
+
kmeans = cs.k_means(k=3)
|
|
265
|
+
elbow = cs.elbow_method(max_k=10)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Preprocesamiento
|
|
271
|
+
|
|
272
|
+
```python
|
|
273
|
+
pp = Preprocessing(df)
|
|
274
|
+
|
|
275
|
+
pp.data_quality()
|
|
276
|
+
pp.clean_data(
|
|
277
|
+
drop_duplicates=True,
|
|
278
|
+
handle_missing=True,
|
|
279
|
+
missing_strategy="median",
|
|
280
|
+
scale=True,
|
|
281
|
+
scaling_method="standard",
|
|
282
|
+
remove_outliers=True,
|
|
283
|
+
)
|
|
284
|
+
pp.preview_data(n=5)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Documentación
|
|
290
|
+
|
|
291
|
+
| Recurso | Enlace |
|
|
292
|
+
|---------|--------|
|
|
293
|
+
| Documentación estática | [GitHub Pages](https://statslibx.vercel.app/) |
|
|
294
|
+
| Notebook completo (181 celdas) | [how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb) |
|
|
295
|
+
| Repositorio | [github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX) |
|
|
296
|
+
| ViewX | [ViewX Page](https://viewx.vercel.app/) |
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## Estructura del paquete
|
|
301
|
+
|
|
302
|
+
```
|
|
303
|
+
statslibx/
|
|
304
|
+
├── descriptive.py # DescriptiveStats, DescriptiveSummary, LinearRegressionResult
|
|
305
|
+
├── inferential.py # InferentialStats, TestResult
|
|
306
|
+
├── computational.py # ComputationalStats, RegressionResult, BootstrappingResult
|
|
307
|
+
├── preprocessing/ # Preprocessing
|
|
308
|
+
├── datasets/ # iris, penguins, titanic + generate_dataset
|
|
309
|
+
├── utils.py # UtilsStats (I/O, plots, outliers)
|
|
310
|
+
├── backend.py # Backend pandas / polars
|
|
311
|
+
├── viewx/ # Puente ViewX + to_report_data
|
|
312
|
+
├── cli.py # statslibx CLI
|
|
313
|
+
└── py.typed # PEP 561 typed package
|
|
314
|
+
```
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## Contribuciones
|
|
318
|
+
|
|
319
|
+
¡Todas las mejoras e ideas son bienvenidas!
|
|
320
|
+
|
|
321
|
+
Abre un [issue](https://github.com/GhostAnalyst30/StatsLibX/issues) o un pull request en GitHub.
|
|
322
|
+
|
|
323
|
+
**Contacto:** [ascendraemmanuel@gmail.com](mailto:ascendraemmanuel@gmail.com)
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
<p align="center">
|
|
328
|
+
Desarrollado por <strong>Emmanuel Ascendra</strong> · StatsLibX v0.2.9 · MIT License
|
|
329
|
+
</p>
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/GhostAnalyst30/StatsLibX/main/StatsLibX.png" alt="StatsLibX" width="420"/>
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">StatsLibX</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>Estadística descriptiva, inferencial y computacional para Python — con pandas, polars y ViewX.</strong>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://pypi.org/project/statslibx/"><img src="https://img.shields.io/pypi/v/statslibx?label=PyPI&color=7c6af7" alt="PyPI version"/></a>
|
|
13
|
+
<a href="https://pypi.org/project/statslibx/"><img src="https://img.shields.io/pypi/pyversions/statslibx?label=Python&color=4fd1c5" alt="Python versions"/></a>
|
|
14
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License MIT"/></a>
|
|
15
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX"><img src="https://img.shields.io/github/stars/GhostAnalyst30/StatsLibX?style=social" alt="GitHub stars"/></a>
|
|
16
|
+
</p>
|
|
17
|
+
|
|
18
|
+
<p align="center">
|
|
19
|
+
<a href="https://ghostanalyst30.github.io/StatsLibX/">Documentación</a> ·
|
|
20
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb">Notebook API</a> ·
|
|
21
|
+
<a href="https://github.com/GhostAnalyst30/StatsLibX/issues">Issues</a> ·
|
|
22
|
+
<a href="https://ghostanalyst30.github.io/ViewX/">ViewX</a>
|
|
23
|
+
</p>
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
**StatsLibX** es una librería de Python moderna para análisis estadístico y ciencia de datos. Ofrece una API clara basada en clases, soporte dual **pandas / polars**, datasets embebidos, preprocesamiento, estadística computacional y un puente de reportes con **ViewX**.
|
|
28
|
+
|
|
29
|
+
> **Versión actual:** `0.2.9` · **Autor:** Emmanuel Ascendra
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Novedades en v0.2.9
|
|
34
|
+
|
|
35
|
+
| Área | Cambio |
|
|
36
|
+
|------|--------|
|
|
37
|
+
| **Arquitectura** | Capa `Backend` unificada en todos los módulos de dominio |
|
|
38
|
+
| **Polars** | `load_dataset(backend="polars")` y constructores compatibles con `pl.DataFrame` |
|
|
39
|
+
| **API** | `DescriptiveStats.from_file()`, `InferentialStats.from_file()`, `ComputationalStats.help()` |
|
|
40
|
+
| **Preprocessing** | `clean_data()` ampliado (escalado, outliers, transforms) y `change_dtypes()` con polars |
|
|
41
|
+
| **ViewX** | `to_report_data()` — serializa resultados statslibx para `Report` / `HTML` |
|
|
42
|
+
| **Packaging** | `pyproject.toml`, extras opcionales, CLI `statslibx`, marcador `py.typed` |
|
|
43
|
+
| **Docs web** | Sitio Next.js v0.2.9, playground Pyodide alineado con la API real |
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Instalación
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install statslibx
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Extras opcionales
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# ViewX (reportes HTML, slides, matrices)
|
|
57
|
+
pip install statslibx[viewx]
|
|
58
|
+
|
|
59
|
+
# Regresión avanzada (statsmodels / sklearn)
|
|
60
|
+
pip install statslibx[statsmodels,sklearn]
|
|
61
|
+
|
|
62
|
+
# Excel + todo incluido
|
|
63
|
+
pip install statslibx[excel]
|
|
64
|
+
pip install statslibx[all]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
| Extra | Paquetes |
|
|
68
|
+
|-------|----------|
|
|
69
|
+
| `viewx` | viewx ≥ 0.2.3 |
|
|
70
|
+
| `statsmodels` | statsmodels ≥ 0.13 |
|
|
71
|
+
| `sklearn` | scikit-learn ≥ 1.0 |
|
|
72
|
+
| `excel` | openpyxl ≥ 3.0 |
|
|
73
|
+
| `all` | Todos los anteriores |
|
|
74
|
+
|
|
75
|
+
**Requisitos:** Python ≥ 3.9 · numpy · pandas · scipy · matplotlib · seaborn · plotly · sympy
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Inicio rápido
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import statslibx as slx
|
|
83
|
+
from statslibx import DescriptiveStats, InferentialStats, ComputationalStats, Preprocessing
|
|
84
|
+
from statslibx.datasets import load_iris, generate_dataset
|
|
85
|
+
|
|
86
|
+
print(f"StatsLibX v{slx.__version__}")
|
|
87
|
+
|
|
88
|
+
# Cargar dataset embebido (iris, penguins, titanic)
|
|
89
|
+
iris = load_iris()
|
|
90
|
+
print(iris.head())
|
|
91
|
+
|
|
92
|
+
# Estadística descriptiva
|
|
93
|
+
ds = DescriptiveStats(iris)
|
|
94
|
+
print(ds.mean("sepal_length"))
|
|
95
|
+
print(ds.summary())
|
|
96
|
+
|
|
97
|
+
# Prueba inferencial
|
|
98
|
+
inf = InferentialStats(iris)
|
|
99
|
+
print(inf.t_test_1sample("sepal_length", popmean=5.8))
|
|
100
|
+
|
|
101
|
+
# Desde archivo
|
|
102
|
+
stats = DescriptiveStats.from_file("mi_datos.csv")
|
|
103
|
+
|
|
104
|
+
# Datos sintéticos
|
|
105
|
+
schema = {
|
|
106
|
+
"age": {"dist": "normal", "mean": 35, "std": 10, "type": "int"},
|
|
107
|
+
"group": {"dist": "categorical", "choices": ["A", "B", "C"]},
|
|
108
|
+
}
|
|
109
|
+
df = generate_dataset(n_rows=500, schema=schema, seed=42)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Motores de datos (pandas / polars)
|
|
113
|
+
|
|
114
|
+
Todas las clases que reciben DataFrames soportan el parámetro `backend`:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from statslibx import DescriptiveStats, InferentialStats, ComputationalStats, Preprocessing
|
|
118
|
+
|
|
119
|
+
df = load_iris()
|
|
120
|
+
|
|
121
|
+
# Auto-detecta: pandas DataFrame → pandas, polars DataFrame → polars
|
|
122
|
+
DescriptiveStats(df)
|
|
123
|
+
InferentialStats(df)
|
|
124
|
+
ComputationalStats(df)
|
|
125
|
+
Preprocessing(df)
|
|
126
|
+
|
|
127
|
+
# Forzar motor polars (convierte pandas → polars internamente)
|
|
128
|
+
DescriptiveStats(df, backend="polars")
|
|
129
|
+
InferentialStats(df, backend="polars")
|
|
130
|
+
ComputationalStats(df, backend="polars")
|
|
131
|
+
Preprocessing(df, backend="polars")
|
|
132
|
+
|
|
133
|
+
# Forzar motor pandas (convierte polars → pandas)
|
|
134
|
+
# InferentialStats(pl_df, backend="pandas")
|
|
135
|
+
|
|
136
|
+
# Desde archivo
|
|
137
|
+
DescriptiveStats.from_file("datos.csv", backend="polars")
|
|
138
|
+
InferentialStats.from_file("datos.csv", backend="polars")
|
|
139
|
+
ComputationalStats.from_file("datos.csv", backend="polars")
|
|
140
|
+
Preprocessing.from_file("datos.csv", backend="polars")
|
|
141
|
+
|
|
142
|
+
# Inspeccionar motor activo
|
|
143
|
+
stats = DescriptiveStats(df, backend="polars")
|
|
144
|
+
print(stats.backend) # "polars"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Carga directa con polars:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from statslibx.datasets import load_dataset
|
|
151
|
+
|
|
152
|
+
df = load_dataset("iris.csv", backend="polars") # requiere pip install polars
|
|
153
|
+
stats = DescriptiveStats(df)
|
|
154
|
+
print(stats.backend) # "polars" (auto-detectado)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Módulos
|
|
160
|
+
|
|
161
|
+
| Clase / Módulo | Descripción |
|
|
162
|
+
|----------------|-------------|
|
|
163
|
+
| **`DescriptiveStats`** | Media, mediana, varianza, correlación, regresión lineal, outliers, resúmenes |
|
|
164
|
+
| **`InferentialStats`** | t-tests, ANOVA, chi-cuadrado, intervalos de confianza, normalidad |
|
|
165
|
+
| **`ComputationalStats`** | Regresión polinomial, bootstrap, k-means, interpolación, correlación |
|
|
166
|
+
| **`Preprocessing`** | Limpieza, nulos, escalado, outliers, calidad de datos, dtypes |
|
|
167
|
+
| **`UtilsStats`** | Carga de archivos, visualización (matplotlib / seaborn / plotly), effect size |
|
|
168
|
+
| **`datasets`** | `load_dataset`, `load_iris`, `load_penguins`, `generate_dataset` |
|
|
169
|
+
| **`Backend`** | Abstracción pandas / polars (`statslibx.backend`) |
|
|
170
|
+
| **`viewx`** | `HTML`, `Slides`, `Report`, `DataMatrix`, `to_report_data` |
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Integración ViewX
|
|
175
|
+
|
|
176
|
+
StatsLibX se conecta con [ViewX](https://ghostanalyst30.github.io/ViewX/) para generar reportes y visualizaciones a partir de resultados estadísticos.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from statslibx import DescriptiveStats, Report, to_report_data
|
|
180
|
+
|
|
181
|
+
df = load_iris()
|
|
182
|
+
summary = DescriptiveStats(df).summary()
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
<p align="center">
|
|
186
|
+
<img src="https://raw.githubusercontent.com/GhostAnalyst30/ViewX/main/images_for_git/DashBoard_Example.png" alt="ViewX example" width="600"/>
|
|
187
|
+
</p>
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
pip install statslibx[viewx]
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## CLI — Terminal
|
|
196
|
+
|
|
197
|
+
StatsLibX incluye una interfaz de línea de comandos para explorar CSV sin escribir código.
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
statslibx data iris.csv --summary --types --missing
|
|
201
|
+
statslibx describe iris.csv --numeric
|
|
202
|
+
statslibx describe iris.csv --categorical
|
|
203
|
+
statslibx quality iris.csv --verbose
|
|
204
|
+
statslibx preview iris.csv -n 10
|
|
205
|
+
statslibx info iris.csv --detailed
|
|
206
|
+
statslibx --help
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Estadística computacional
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from statslibx import ComputationalStats
|
|
215
|
+
|
|
216
|
+
cs = ComputationalStats(df, seed=42)
|
|
217
|
+
|
|
218
|
+
# Regresión con términos de interacción
|
|
219
|
+
model = cs.regression(X=["age", "score"], y="income", interaction_terms=True)
|
|
220
|
+
print(model.get_formula())
|
|
221
|
+
print(model.summary())
|
|
222
|
+
|
|
223
|
+
# Bootstrap
|
|
224
|
+
boot = cs.bootstrapping("income", n_samples=1000, statistic="mean")
|
|
225
|
+
print(boot.summary())
|
|
226
|
+
|
|
227
|
+
# Clustering
|
|
228
|
+
kmeans = cs.k_means(k=3)
|
|
229
|
+
elbow = cs.elbow_method(max_k=10)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Preprocesamiento
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
pp = Preprocessing(df)
|
|
238
|
+
|
|
239
|
+
pp.data_quality()
|
|
240
|
+
pp.clean_data(
|
|
241
|
+
drop_duplicates=True,
|
|
242
|
+
handle_missing=True,
|
|
243
|
+
missing_strategy="median",
|
|
244
|
+
scale=True,
|
|
245
|
+
scaling_method="standard",
|
|
246
|
+
remove_outliers=True,
|
|
247
|
+
)
|
|
248
|
+
pp.preview_data(n=5)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## Documentación
|
|
254
|
+
|
|
255
|
+
| Recurso | Enlace |
|
|
256
|
+
|---------|--------|
|
|
257
|
+
| Documentación estática | [GitHub Pages](https://statslibx.vercel.app/) |
|
|
258
|
+
| Notebook completo (181 celdas) | [how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb) |
|
|
259
|
+
| Repositorio | [github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX) |
|
|
260
|
+
| ViewX | [ViewX Page](https://viewx.vercel.app/) |
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## Estructura del paquete
|
|
265
|
+
|
|
266
|
+
```
|
|
267
|
+
statslibx/
|
|
268
|
+
├── descriptive.py # DescriptiveStats, DescriptiveSummary, LinearRegressionResult
|
|
269
|
+
├── inferential.py # InferentialStats, TestResult
|
|
270
|
+
├── computational.py # ComputationalStats, RegressionResult, BootstrappingResult
|
|
271
|
+
├── preprocessing/ # Preprocessing
|
|
272
|
+
├── datasets/ # iris, penguins, titanic + generate_dataset
|
|
273
|
+
├── utils.py # UtilsStats (I/O, plots, outliers)
|
|
274
|
+
├── backend.py # Backend pandas / polars
|
|
275
|
+
├── viewx/ # Puente ViewX + to_report_data
|
|
276
|
+
├── cli.py # statslibx CLI
|
|
277
|
+
└── py.typed # PEP 561 typed package
|
|
278
|
+
```
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Contribuciones
|
|
282
|
+
|
|
283
|
+
¡Todas las mejoras e ideas son bienvenidas!
|
|
284
|
+
|
|
285
|
+
Abre un [issue](https://github.com/GhostAnalyst30/StatsLibX/issues) o un pull request en GitHub.
|
|
286
|
+
|
|
287
|
+
**Contacto:** [ascendraemmanuel@gmail.com](mailto:ascendraemmanuel@gmail.com)
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
<p align="center">
|
|
292
|
+
Desarrollado por <strong>Emmanuel Ascendra</strong> · StatsLibX v0.2.9 · MIT License
|
|
293
|
+
</p>
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "statslibx"
|
|
7
|
+
version = "0.2.9"
|
|
8
|
+
description = "Statistical library for descriptive, inferential, and computational analysis"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Emmanuel Ascendra" }]
|
|
13
|
+
keywords = ["statistics", "data-science", "pandas", "polars"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
17
|
+
]
|
|
18
|
+
dependencies = [
|
|
19
|
+
"numpy>=1.21",
|
|
20
|
+
"pandas>=1.3",
|
|
21
|
+
"polars>=0.20",
|
|
22
|
+
"scipy>=1.7",
|
|
23
|
+
"matplotlib>=3.4",
|
|
24
|
+
"seaborn>=0.11",
|
|
25
|
+
"plotly>=5.0",
|
|
26
|
+
"sympy>=1.9",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
viewx = ["viewx>=0.2.3"]
|
|
31
|
+
statsmodels = ["statsmodels>=0.13"]
|
|
32
|
+
sklearn = ["scikit-learn>=1.0"]
|
|
33
|
+
excel = ["openpyxl>=3.0"]
|
|
34
|
+
all = [
|
|
35
|
+
"polars>=0.20",
|
|
36
|
+
"viewx>=0.2.3",
|
|
37
|
+
"statsmodels>=0.13",
|
|
38
|
+
"scikit-learn>=1.0",
|
|
39
|
+
"openpyxl>=3.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
statslibx = "statslibx.cli:main"
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/GhostAnalyst30/StatsLibX"
|
|
47
|
+
Documentation = "https://ghostanalyst30.github.io/StatsLibX/"
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["."]
|
|
51
|
+
include = ["statslibx*"]
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.package-data]
|
|
54
|
+
statslibx = ["py.typed", "datasets/*.csv"]
|
|
55
|
+
|
|
56
|
+
[tool.pytest.ini_options]
|
|
57
|
+
testpaths = ["tests"]
|
|
58
|
+
python_files = ["test_*.py"]
|