statslibx 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +2 -2
- statslibx/datasets/__init__.py +1 -0
- statslibx/datasets/course_completion.csv +100001 -0
- statslibx/descriptive.py +39 -0
- statslibx/inferential.py +39 -0
- statslibx/utils.py +243 -404
- {statslibx-0.1.4.dist-info → statslibx-0.1.5.dist-info}/METADATA +1 -1
- statslibx-0.1.5.dist-info/RECORD +14 -0
- statslibx-0.1.4.dist-info/RECORD +0 -13
- {statslibx-0.1.4.dist-info → statslibx-0.1.5.dist-info}/WHEEL +0 -0
- {statslibx-0.1.4.dist-info → statslibx-0.1.5.dist-info}/top_level.txt +0 -0
statslibx/descriptive.py
CHANGED
|
@@ -2,6 +2,7 @@ import numpy as np
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from typing import Optional, Union, Literal, List
|
|
4
4
|
from datetime import datetime
|
|
5
|
+
import os
|
|
5
6
|
|
|
6
7
|
class DescriptiveStats:
|
|
7
8
|
"""
|
|
@@ -20,6 +21,10 @@ class DescriptiveStats:
|
|
|
20
21
|
backend : str
|
|
21
22
|
'pandas' o 'polars' para procesamiento
|
|
22
23
|
"""
|
|
24
|
+
|
|
25
|
+
if isinstance(data, str) and os.path.exists(data):
|
|
26
|
+
data = DescriptiveStats.from_file(data).data
|
|
27
|
+
|
|
23
28
|
if isinstance(data, np.ndarray):
|
|
24
29
|
if data.ndim == 1:
|
|
25
30
|
data = pd.DataFrame({'var': data})
|
|
@@ -29,6 +34,40 @@ class DescriptiveStats:
|
|
|
29
34
|
self.data = data
|
|
30
35
|
self.backend = backend
|
|
31
36
|
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def from_file(path: str):
|
|
40
|
+
"""
|
|
41
|
+
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
42
|
+
Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
43
|
+
"""
|
|
44
|
+
if not os.path.exists(path):
|
|
45
|
+
raise FileNotFoundError(f"Archivo no encontrado: {path}")
|
|
46
|
+
|
|
47
|
+
ext = os.path.splitext(path)[1].lower()
|
|
48
|
+
|
|
49
|
+
if ext == ".csv":
|
|
50
|
+
df = pd.read_csv(path)
|
|
51
|
+
|
|
52
|
+
elif ext in [".xlsx", ".xls"]:
|
|
53
|
+
df = pd.read_excel(path)
|
|
54
|
+
|
|
55
|
+
elif ext in [".txt", ".tsv"]:
|
|
56
|
+
df = pd.read_table(path)
|
|
57
|
+
|
|
58
|
+
elif ext == ".json":
|
|
59
|
+
df = pd.read_json(path)
|
|
60
|
+
|
|
61
|
+
elif ext == ".parquet":
|
|
62
|
+
df = pd.read_parquet(path)
|
|
63
|
+
|
|
64
|
+
elif ext == ".feather":
|
|
65
|
+
df = pd.read_feather(path)
|
|
66
|
+
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(f"Formato no soportado: {ext}")
|
|
69
|
+
|
|
70
|
+
return DescriptiveStats(df)
|
|
32
71
|
|
|
33
72
|
# ============= MÉTODOS UNIVARIADOS =============
|
|
34
73
|
|
statslibx/inferential.py
CHANGED
|
@@ -4,6 +4,7 @@ import pandas as pd
|
|
|
4
4
|
from typing import Optional, Union, Literal, List, Dict, Any
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from scipy import stats
|
|
7
|
+
import os
|
|
7
8
|
|
|
8
9
|
class InferentialStats:
|
|
9
10
|
"""
|
|
@@ -15,6 +16,10 @@ class InferentialStats:
|
|
|
15
16
|
"""
|
|
16
17
|
Inicializar con DataFrame o array numpy
|
|
17
18
|
"""
|
|
19
|
+
|
|
20
|
+
if isinstance(data, str) and os.path.exists(data):
|
|
21
|
+
data = InferentialStats.from_file(data).data
|
|
22
|
+
|
|
18
23
|
if isinstance(data, np.ndarray):
|
|
19
24
|
if data.ndim == 1:
|
|
20
25
|
data = pd.DataFrame({'var': data})
|
|
@@ -24,6 +29,40 @@ class InferentialStats:
|
|
|
24
29
|
self.data = data
|
|
25
30
|
self.backend = backend
|
|
26
31
|
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_file(path: str):
|
|
35
|
+
"""
|
|
36
|
+
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
37
|
+
Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
38
|
+
"""
|
|
39
|
+
if not os.path.exists(path):
|
|
40
|
+
raise FileNotFoundError(f"Archivo no encontrado: {path}")
|
|
41
|
+
|
|
42
|
+
ext = os.path.splitext(path)[1].lower()
|
|
43
|
+
|
|
44
|
+
if ext == ".csv":
|
|
45
|
+
df = pd.read_csv(path)
|
|
46
|
+
|
|
47
|
+
elif ext in [".xlsx", ".xls"]:
|
|
48
|
+
df = pd.read_excel(path)
|
|
49
|
+
|
|
50
|
+
elif ext in [".txt", ".tsv"]:
|
|
51
|
+
df = pd.read_table(path)
|
|
52
|
+
|
|
53
|
+
elif ext == ".json":
|
|
54
|
+
df = pd.read_json(path)
|
|
55
|
+
|
|
56
|
+
elif ext == ".parquet":
|
|
57
|
+
df = pd.read_parquet(path)
|
|
58
|
+
|
|
59
|
+
elif ext == ".feather":
|
|
60
|
+
df = pd.read_feather(path)
|
|
61
|
+
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError(f"Formato no soportado: {ext}")
|
|
64
|
+
|
|
65
|
+
return InferentialStats(df)
|
|
27
66
|
|
|
28
67
|
# ============= INTERVALOS DE CONFIANZA =============
|
|
29
68
|
|