statslibx 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
statslibx/__init__.py CHANGED
@@ -1,17 +1,16 @@
1
1
  """
2
2
  StatsLibx - Librería de Estadística para Python
3
3
  Autor: Emmanuel Ascendra
4
- Versión: 0.1.8
4
+ Versión: 0.2.0
5
5
  """
6
6
 
7
- __version__ = "0.1.8"
7
+ __version__ = "0.2.0"
8
8
  __author__ = "Emmanuel Ascendra"
9
9
 
10
10
  # Importar las clases principales
11
11
  from .descriptive import DescriptiveStats, DescriptiveSummary
12
12
  from .inferential import InferentialStats, TestResult
13
- from .probability import ProbabilityStats
14
- from .computacional import ComputacionalStats
13
+ from .computacional import ComputationalStats
15
14
  from .utils import UtilsStats
16
15
  from .preprocessing import Preprocessing
17
16
  from .datasets import load_dataset, generate_dataset
@@ -22,7 +21,7 @@ __all__ = [
22
21
  'DescriptiveStats',
23
22
  'InferentialStats',
24
23
  'ProbabilityStats',
25
- 'ComputacionalStats',
24
+ 'ComputationalStats',
26
25
  'UtilsStats',
27
26
  'Preprocessing',
28
27
  'load_dataset',
@@ -38,6 +37,7 @@ def welcome():
38
37
  print(f"\nClases disponibles:")
39
38
  print(f" - DescriptiveStats: Estadística descriptiva")
40
39
  print(f" - InferentialStats: Estadística inferencial")
40
+ print(f" - ComputacionalStats: En desarrollo")
41
41
  print(f" - UtilsStats: Utilidades Extras")
42
42
  print(f"\nMódulos disponibles:")
43
43
  print(f" - Datasets: Carga de Datasets")
@@ -1,2 +1,126 @@
1
- class ComputacionalStats:
2
- pass
1
+ from typing import Union, Optional, Literal
2
+ import numpy as np
3
+ import pandas as pd
4
+ import polars as pl
5
+ import os
6
+
7
+ class ComputationalStats:
8
+ """
9
+ Class for computational statistics
10
+ """
11
+
12
+ def __init__(self, data: Union[pd.DataFrame, np.ndarray],
13
+ sep: str = None,
14
+ decimal: str = None,
15
+ thousand: str = None,
16
+ backend: Literal['pandas', 'polars'] = 'pandas'):
17
+ """
18
+ # Initialize DataFrame
19
+
20
+ ## **Parameters:**
21
+
22
+ - **data** : Data to analyze
23
+ - **sep** : Column separator
24
+ - **decimal** : Decimal separator
25
+ - **thousand** : Thousand separator
26
+ - **backend** : 'pandas' or 'polars' for processing
27
+ (Proximamente estara habilitado polars para big data)
28
+
29
+ **Examples:**
30
+
31
+ ``Example 1:
32
+ stats = DescriptiveStats(data)
33
+ ``
34
+ """
35
+
36
+ if isinstance(data, str) and os.path.exists(data):
37
+ data = ComputationalStats.from_file(data).data
38
+
39
+ if isinstance(data, pl.DataFrame):
40
+ raise TypeError(
41
+ "Polars aún no soportado. Use pandas.DataFrame."
42
+ )
43
+
44
+
45
+ if isinstance(data, np.ndarray):
46
+ if data.ndim == 1:
47
+ data = pd.DataFrame({'var': data})
48
+ else:
49
+ data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])]) \
50
+ if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
51
+
52
+ self.data = data
53
+ self.backend = backend
54
+ self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
55
+ self.sep = sep
56
+ self.decimal = decimal
57
+ self.thousand = thousand
58
+
59
+ @classmethod
60
+ def from_file(self, path: str):
61
+ """
62
+ Carga automática de archivos y devuelve instancia de Intelligence.
63
+ Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
64
+ Automatic file upload and returns Intelligence instance.
65
+ Supports CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
66
+
67
+ Parametros / Parameters:
68
+ ------------------------
69
+ path : str
70
+ Ruta del archivo
71
+ File path
72
+ """
73
+ if not os.path.exists(path):
74
+ raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
75
+
76
+ ext = os.path.splitext(path)[1].lower()
77
+
78
+ if ext == ".csv":
79
+ df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
80
+
81
+ elif ext in [".xlsx", ".xls"]:
82
+ df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
83
+
84
+ elif ext in [".txt", ".tsv"]:
85
+ df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
86
+
87
+ elif ext == ".json":
88
+ df = pd.read_json(path)
89
+
90
+ elif ext == ".parquet":
91
+ df = pd.read_parquet(path)
92
+
93
+ elif ext == ".feather":
94
+ df = pd.read_feather(path)
95
+
96
+ else:
97
+ raise ValueError(f"Formato no soportado / Unsupported format: {ext}")
98
+
99
+ return ComputationalStats(df)
100
+
101
+ def monte_carlo(self, function, n: int = 100, return_simulations: bool = False, **kwargs) -> pd.DataFrame:
102
+ """
103
+ Realiza simulaciones de Monte Carlo para una función y devuelve un DataFrame con las simulaciones y sus resultados.
104
+ """
105
+ samples = []
106
+
107
+ for _ in range(n):
108
+ sample = function(**kwargs)
109
+ samples.append(float(sample))
110
+
111
+ mean = sum(samples) / n
112
+ variance = sum((x - mean)**2 for x in samples) / n
113
+ std = variance**0.5
114
+
115
+ if return_simulations:
116
+ return {
117
+ "mean": float(mean),
118
+ "std": float(std),
119
+ "samples": samples
120
+ }
121
+
122
+ else:
123
+ return {
124
+ "mean": float(mean),
125
+ "std": float(std)
126
+ }
@@ -48,10 +48,21 @@ def _X_y(
48
48
  )
49
49
 
50
50
 
51
+ import io
52
+ import pkgutil
53
+ import pandas as pd
54
+ import polars as pl
55
+ from typing import Literal, Optional, Tuple, List, Union
56
+ from numpy.typing import NDArray
57
+
58
+ _SUPPORTED_BACKENDS = {"pandas", "polars"}
59
+
51
60
  def load_dataset(
52
61
  name: str,
53
62
  backend: Literal["pandas", "polars"] = "pandas",
54
- return_X_y: Optional[Tuple[List[str], str]] = None
63
+ return_X_y: Optional[Tuple[List[str], str]] = None,
64
+ save: Optional[bool] = False,
65
+ filename: Optional[str] = None
55
66
  ) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
56
67
  """
57
68
  Carga un dataset interno del paquete.
@@ -76,24 +87,29 @@ def load_dataset(
76
87
  -------
77
88
  DataFrame o (X, y)
78
89
  """
90
+
79
91
  if backend not in _SUPPORTED_BACKENDS:
80
92
  raise ValueError(
81
93
  f"Backend '{backend}' no soportado. "
82
94
  f"Use uno de {_SUPPORTED_BACKENDS}."
83
95
  )
84
96
 
85
- # ---------- 1️⃣ Intentar cargar desde el paquete ----------
86
- data_bytes = pkgutil.get_data("statslibx.datasets", name)
97
+ df = None
87
98
 
88
- if data_bytes is not None:
89
- df = (
90
- pd.read_csv(io.BytesIO(data_bytes))
91
- if backend == "pandas"
92
- else pl.read_csv(io.BytesIO(data_bytes))
93
- )
99
+ # ---------- 1️⃣ Intentar cargar desde el paquete ----------
100
+ try:
101
+ data_bytes = pkgutil.get_data("statslibx.datasets", name)
102
+ if data_bytes is not None:
103
+ df = (
104
+ pd.read_csv(io.BytesIO(data_bytes))
105
+ if backend == "pandas"
106
+ else pl.read_csv(io.BytesIO(data_bytes))
107
+ )
108
+ except FileNotFoundError:
109
+ pass # seguimos al siguiente intento
94
110
 
95
- # ---------- 2️⃣ Si no está en el paquete, buscar en ruta actual ----------
96
- else:
111
+ # ---------- 2️⃣ Intentar cargar desde ruta local ----------
112
+ if df is None:
97
113
  try:
98
114
  df = (
99
115
  pd.read_csv(name)