statslibx 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx-0.2.0/MANIFEST.in +1 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/PKG-INFO +5 -5
- {statslibx-0.1.8 → statslibx-0.2.0}/README.md +3 -3
- {statslibx-0.1.8 → statslibx-0.2.0}/pyproject.toml +5 -2
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/__init__.py +5 -5
- statslibx-0.2.0/statslibx/computacional.py +126 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/datasets/__init__.py +27 -11
- statslibx-0.2.0/statslibx/datasets/course_completion.csv +100001 -0
- statslibx-0.2.0/statslibx/datasets/iris.csv +151 -0
- statslibx-0.2.0/statslibx/datasets/penguins.csv +345 -0
- statslibx-0.2.0/statslibx/datasets/sp500_companies.csv +504 -0
- statslibx-0.2.0/statslibx/datasets/titanic.csv +419 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/descriptive.py +74 -8
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/inferential.py +67 -6
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/utils.py +71 -13
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx.egg-info/PKG-INFO +5 -5
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx.egg-info/SOURCES.txt +6 -1
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx.egg-info/top_level.txt +1 -0
- statslibx-0.1.8/statslibx/computacional.py +0 -2
- statslibx-0.1.8/statslibx/probability.py +0 -2
- {statslibx-0.1.8 → statslibx-0.2.0}/setup.cfg +0 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/cli.py +0 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/io.py +0 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx/preprocessing/__init__.py +0 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx.egg-info/dependency_links.txt +0 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx.egg-info/entry_points.txt +0 -0
- {statslibx-0.1.8 → statslibx-0.2.0}/statslibx.egg-info/requires.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
recursive-include statslibx/datasets *.csv
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: StatsLibx - Librería de estadística descriptiva
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
|
|
5
5
|
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Classifier: Development Status :: 3 - Alpha
|
|
@@ -28,9 +28,9 @@ Requires-Dist: statsmodels>=0.13; extra == "advanced"
|
|
|
28
28
|
|
|
29
29
|
StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
|
|
30
30
|
|
|
31
|
-
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva
|
|
31
|
+
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
32
32
|
|
|
33
|
-
GitHub del Proyecto: [
|
|
33
|
+
GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
|
|
34
34
|
|
|
35
35
|
## ✨ Características principales
|
|
36
36
|
|
|
@@ -55,7 +55,7 @@ stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
|
|
|
55
55
|
|
|
56
56
|
stats.summary()
|
|
57
57
|
```
|
|
58
|
-
Para ver mas funciones: [
|
|
58
|
+
Para ver mas funciones: [https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
|
|
59
59
|
|
|
60
60
|
## 📦 Instalación
|
|
61
61
|
```bash
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
|
|
4
4
|
|
|
5
|
-
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva
|
|
5
|
+
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
6
6
|
|
|
7
|
-
GitHub del Proyecto: [
|
|
7
|
+
GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
|
|
8
8
|
|
|
9
9
|
## ✨ Características principales
|
|
10
10
|
|
|
@@ -29,7 +29,7 @@ stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
|
|
|
29
29
|
|
|
30
30
|
stats.summary()
|
|
31
31
|
```
|
|
32
|
-
Para ver mas funciones: [
|
|
32
|
+
Para ver mas funciones: [https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
|
|
33
33
|
|
|
34
34
|
## 📦 Instalación
|
|
35
35
|
```bash
|
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "statslibx"
|
|
7
|
-
version = "0.
|
|
8
|
-
description = "StatsLibx - Librería de estadística descriptiva
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "StatsLibx - Librería de estadística descriptiva, inferencial y computacional"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
11
11
|
license = { text = "MIT" }
|
|
@@ -38,5 +38,8 @@ advanced = ["scikit-learn>=1.0", "statsmodels>=0.13"]
|
|
|
38
38
|
[project.scripts]
|
|
39
39
|
statslibx = "statslibx.cli:main"
|
|
40
40
|
|
|
41
|
+
[tool.setuptools]
|
|
42
|
+
include-package-data = true
|
|
43
|
+
|
|
41
44
|
[tool.setuptools.packages.find]
|
|
42
45
|
where = ["."]
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
"""
|
|
2
2
|
StatsLibx - Librería de Estadística para Python
|
|
3
3
|
Autor: Emmanuel Ascendra
|
|
4
|
-
Versión: 0.
|
|
4
|
+
Versión: 0.2.0
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
__version__ = "0.
|
|
7
|
+
__version__ = "0.2.0"
|
|
8
8
|
__author__ = "Emmanuel Ascendra"
|
|
9
9
|
|
|
10
10
|
# Importar las clases principales
|
|
11
11
|
from .descriptive import DescriptiveStats, DescriptiveSummary
|
|
12
12
|
from .inferential import InferentialStats, TestResult
|
|
13
|
-
from .
|
|
14
|
-
from .computacional import ComputacionalStats
|
|
13
|
+
from .computacional import ComputationalStats
|
|
15
14
|
from .utils import UtilsStats
|
|
16
15
|
from .preprocessing import Preprocessing
|
|
17
16
|
from .datasets import load_dataset, generate_dataset
|
|
@@ -22,7 +21,7 @@ __all__ = [
|
|
|
22
21
|
'DescriptiveStats',
|
|
23
22
|
'InferentialStats',
|
|
24
23
|
'ProbabilityStats',
|
|
25
|
-
'
|
|
24
|
+
'ComputationalStats',
|
|
26
25
|
'UtilsStats',
|
|
27
26
|
'Preprocessing',
|
|
28
27
|
'load_dataset',
|
|
@@ -38,6 +37,7 @@ def welcome():
|
|
|
38
37
|
print(f"\nClases disponibles:")
|
|
39
38
|
print(f" - DescriptiveStats: Estadística descriptiva")
|
|
40
39
|
print(f" - InferentialStats: Estadística inferencial")
|
|
40
|
+
print(f" - ComputacionalStats: En desarrollo")
|
|
41
41
|
print(f" - UtilsStats: Utilidades Extras")
|
|
42
42
|
print(f"\nMódulos disponibles:")
|
|
43
43
|
print(f" - Datasets: Carga de Datasets")
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from typing import Union, Optional, Literal
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import polars as pl
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
class ComputationalStats:
|
|
8
|
+
"""
|
|
9
|
+
Class for computational statistics
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, data: Union[pd.DataFrame, np.ndarray],
|
|
13
|
+
sep: str = None,
|
|
14
|
+
decimal: str = None,
|
|
15
|
+
thousand: str = None,
|
|
16
|
+
backend: Literal['pandas', 'polars'] = 'pandas'):
|
|
17
|
+
"""
|
|
18
|
+
# Initialize DataFrame
|
|
19
|
+
|
|
20
|
+
## **Parameters:**
|
|
21
|
+
|
|
22
|
+
- **data** : Data to analyze
|
|
23
|
+
- **sep** : Column separator
|
|
24
|
+
- **decimal** : Decimal separator
|
|
25
|
+
- **thousand** : Thousand separator
|
|
26
|
+
- **backend** : 'pandas' or 'polars' for processing
|
|
27
|
+
(Proximamente estara habilitado polars para big data)
|
|
28
|
+
|
|
29
|
+
**Examples:**
|
|
30
|
+
|
|
31
|
+
``Example 1:
|
|
32
|
+
stats = DescriptiveStats(data)
|
|
33
|
+
``
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
if isinstance(data, str) and os.path.exists(data):
|
|
37
|
+
data = ComputationalStats.from_file(data).data
|
|
38
|
+
|
|
39
|
+
if isinstance(data, pl.DataFrame):
|
|
40
|
+
raise TypeError(
|
|
41
|
+
"Polars aún no soportado. Use pandas.DataFrame."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if isinstance(data, np.ndarray):
|
|
46
|
+
if data.ndim == 1:
|
|
47
|
+
data = pd.DataFrame({'var': data})
|
|
48
|
+
else:
|
|
49
|
+
data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])]) \
|
|
50
|
+
if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
|
|
51
|
+
|
|
52
|
+
self.data = data
|
|
53
|
+
self.backend = backend
|
|
54
|
+
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
55
|
+
self.sep = sep
|
|
56
|
+
self.decimal = decimal
|
|
57
|
+
self.thousand = thousand
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_file(self, path: str):
|
|
61
|
+
"""
|
|
62
|
+
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
63
|
+
Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
64
|
+
Automatic file upload and returns Intelligence instance.
|
|
65
|
+
Supports CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
66
|
+
|
|
67
|
+
Parametros / Parameters:
|
|
68
|
+
------------------------
|
|
69
|
+
path : str
|
|
70
|
+
Ruta del archivo
|
|
71
|
+
File path
|
|
72
|
+
"""
|
|
73
|
+
if not os.path.exists(path):
|
|
74
|
+
raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
|
|
75
|
+
|
|
76
|
+
ext = os.path.splitext(path)[1].lower()
|
|
77
|
+
|
|
78
|
+
if ext == ".csv":
|
|
79
|
+
df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
80
|
+
|
|
81
|
+
elif ext in [".xlsx", ".xls"]:
|
|
82
|
+
df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
|
|
83
|
+
|
|
84
|
+
elif ext in [".txt", ".tsv"]:
|
|
85
|
+
df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
86
|
+
|
|
87
|
+
elif ext == ".json":
|
|
88
|
+
df = pd.read_json(path)
|
|
89
|
+
|
|
90
|
+
elif ext == ".parquet":
|
|
91
|
+
df = pd.read_parquet(path)
|
|
92
|
+
|
|
93
|
+
elif ext == ".feather":
|
|
94
|
+
df = pd.read_feather(path)
|
|
95
|
+
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"Formato no soportado / Unsupported format: {ext}")
|
|
98
|
+
|
|
99
|
+
return ComputationalStats(df)
|
|
100
|
+
|
|
101
|
+
def monte_carlo(self, function, n: int = 100, return_simulations: bool = False, **kwargs) -> pd.DataFrame:
|
|
102
|
+
"""
|
|
103
|
+
Realiza simulaciones de Monte Carlo para una función y devuelve un DataFrame con las simulaciones y sus resultados.
|
|
104
|
+
"""
|
|
105
|
+
samples = []
|
|
106
|
+
|
|
107
|
+
for _ in range(n):
|
|
108
|
+
sample = function(**kwargs)
|
|
109
|
+
samples.append(float(sample))
|
|
110
|
+
|
|
111
|
+
mean = sum(samples) / n
|
|
112
|
+
variance = sum((x - mean)**2 for x in samples) / n
|
|
113
|
+
std = variance**0.5
|
|
114
|
+
|
|
115
|
+
if return_simulations:
|
|
116
|
+
return {
|
|
117
|
+
"mean": float(mean),
|
|
118
|
+
"std": float(std),
|
|
119
|
+
"samples": samples
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
else:
|
|
123
|
+
return {
|
|
124
|
+
"mean": float(mean),
|
|
125
|
+
"std": float(std)
|
|
126
|
+
}
|
|
@@ -48,10 +48,21 @@ def _X_y(
|
|
|
48
48
|
)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
import io
|
|
52
|
+
import pkgutil
|
|
53
|
+
import pandas as pd
|
|
54
|
+
import polars as pl
|
|
55
|
+
from typing import Literal, Optional, Tuple, List, Union
|
|
56
|
+
from numpy.typing import NDArray
|
|
57
|
+
|
|
58
|
+
_SUPPORTED_BACKENDS = {"pandas", "polars"}
|
|
59
|
+
|
|
51
60
|
def load_dataset(
|
|
52
61
|
name: str,
|
|
53
62
|
backend: Literal["pandas", "polars"] = "pandas",
|
|
54
|
-
return_X_y: Optional[Tuple[List[str], str]] = None
|
|
63
|
+
return_X_y: Optional[Tuple[List[str], str]] = None,
|
|
64
|
+
save: Optional[bool] = False,
|
|
65
|
+
filename: Optional[str] = None
|
|
55
66
|
) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
|
|
56
67
|
"""
|
|
57
68
|
Carga un dataset interno del paquete.
|
|
@@ -76,24 +87,29 @@ def load_dataset(
|
|
|
76
87
|
-------
|
|
77
88
|
DataFrame o (X, y)
|
|
78
89
|
"""
|
|
90
|
+
|
|
79
91
|
if backend not in _SUPPORTED_BACKENDS:
|
|
80
92
|
raise ValueError(
|
|
81
93
|
f"Backend '{backend}' no soportado. "
|
|
82
94
|
f"Use uno de {_SUPPORTED_BACKENDS}."
|
|
83
95
|
)
|
|
84
96
|
|
|
85
|
-
|
|
86
|
-
data_bytes = pkgutil.get_data("statslibx.datasets", name)
|
|
97
|
+
df = None
|
|
87
98
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
99
|
+
# ---------- 1️⃣ Intentar cargar desde el paquete ----------
|
|
100
|
+
try:
|
|
101
|
+
data_bytes = pkgutil.get_data("statslibx.datasets", name)
|
|
102
|
+
if data_bytes is not None:
|
|
103
|
+
df = (
|
|
104
|
+
pd.read_csv(io.BytesIO(data_bytes))
|
|
105
|
+
if backend == "pandas"
|
|
106
|
+
else pl.read_csv(io.BytesIO(data_bytes))
|
|
107
|
+
)
|
|
108
|
+
except FileNotFoundError:
|
|
109
|
+
pass # seguimos al siguiente intento
|
|
94
110
|
|
|
95
|
-
# ---------- 2️⃣
|
|
96
|
-
|
|
111
|
+
# ---------- 2️⃣ Intentar cargar desde ruta local ----------
|
|
112
|
+
if df is None:
|
|
97
113
|
try:
|
|
98
114
|
df = (
|
|
99
115
|
pd.read_csv(name)
|