statslibx 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {statslibx-0.2.2/statslibx.egg-info → statslibx-0.2.4}/PKG-INFO +12 -1
  2. {statslibx-0.2.2 → statslibx-0.2.4}/README.md +3 -0
  3. {statslibx-0.2.2 → statslibx-0.2.4}/pyproject.toml +10 -2
  4. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/__init__.py +2 -2
  5. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/cli.py +4 -3
  6. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/__init__.py +28 -38
  7. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/descriptive.py +3 -14
  8. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/inferential.py +3 -5
  9. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/preprocessing/__init__.py +106 -5
  10. {statslibx-0.2.2 → statslibx-0.2.4/statslibx.egg-info}/PKG-INFO +12 -1
  11. statslibx-0.2.4/statslibx.egg-info/requires.txt +17 -0
  12. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx.egg-info/top_level.txt +1 -0
  13. statslibx-0.2.4/tests/test1.py +30 -0
  14. statslibx-0.2.2/statslibx.egg-info/requires.txt +0 -9
  15. statslibx-0.2.2/tests/test1.py +0 -20
  16. {statslibx-0.2.2 → statslibx-0.2.4}/MANIFEST.in +0 -0
  17. {statslibx-0.2.2 → statslibx-0.2.4}/setup.cfg +0 -0
  18. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/computacional.py +0 -0
  19. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx +0 -0
  20. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/course_completion.csv +0 -0
  21. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/iris.csv +0 -0
  22. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/penguins.csv +0 -0
  23. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/sp500_companies.csv +0 -0
  24. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/titanic.csv +0 -0
  25. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx/utils.py +0 -0
  26. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx.egg-info/SOURCES.txt +0 -0
  27. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx.egg-info/dependency_links.txt +0 -0
  28. {statslibx-0.2.2 → statslibx-0.2.4}/statslibx.egg-info/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
5
5
  Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
6
6
  License: MIT
@@ -16,6 +16,14 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.8
17
17
  Description-Content-Type: text/markdown
18
18
  Requires-Dist: pandas>=1.5
19
+ Requires-Dist: matplotlib>=3.5
20
+ Requires-Dist: numpy>=1.23
21
+ Requires-Dist: scipy>=1.9
22
+ Requires-Dist: polars>=0.16
23
+ Requires-Dist: scikit-learn>=1.0
24
+ Requires-Dist: statsmodels>=0.13
25
+ Requires-Dist: seaborn>=0.11
26
+ Requires-Dist: plotly>=5.0
19
27
  Provides-Extra: viz
20
28
  Requires-Dist: seaborn>=0.11; extra == "viz"
21
29
  Requires-Dist: plotly>=5.0; extra == "viz"
@@ -29,6 +37,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
29
37
 
30
38
  Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
31
39
 
40
+ Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
41
+
32
42
  GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
33
43
 
34
44
  ## ✨ Características principales
@@ -63,6 +73,7 @@ pip install statslibx
63
73
 
64
74
  ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
65
75
  ```bash
76
+ statslibx # Informacion general de la libreria
66
77
  statslibx describe .\archive.csv # Devuelve una descripcion de la data
67
78
  statslibx quality .\archive.csv # Devuelve la calidad de los datos
68
79
  statslibx preview .\archive.csv # Devuelve una visualizacion de los datos
@@ -4,6 +4,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
4
4
 
5
5
  Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
6
6
 
7
+ Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
8
+
7
9
  GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
8
10
 
9
11
  ## ✨ Características principales
@@ -38,6 +40,7 @@ pip install statslibx
38
40
 
39
41
  ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
40
42
  ```bash
43
+ statslibx # Informacion general de la libreria
41
44
  statslibx describe .\archive.csv # Devuelve una descripcion de la data
42
45
  statslibx quality .\archive.csv # Devuelve la calidad de los datos
43
46
  statslibx preview .\archive.csv # Devuelve una visualizacion de los datos
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "statslibx"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "StatsLibx - Librería de estadística descriptiva, inferencial y computacional"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -27,7 +27,15 @@ classifiers = [
27
27
  ]
28
28
 
29
29
  dependencies = [
30
- "pandas>=1.5"
30
+ "pandas>=1.5",
31
+ "matplotlib>=3.5",
32
+ "numpy>=1.23",
33
+ "scipy>=1.9",
34
+ "polars>=0.16",
35
+ "scikit-learn>=1.0",
36
+ "statsmodels>=0.13",
37
+ "seaborn>=0.11",
38
+ "plotly>=5.0"
31
39
  ]
32
40
 
33
41
  [project.optional-dependencies]
@@ -1,10 +1,10 @@
1
1
  """
2
2
  StatsLibx - Librería de Estadística para Python
3
3
  Autor: Emmanuel Ascendra
4
- Versión: 0.2.2
4
+ Versión: 0.2.4
5
5
  """
6
6
 
7
- __version__ = "0.2.2"
7
+ __version__ = "0.2.4"
8
8
  __author__ = "Emmanuel Ascendra"
9
9
 
10
10
  # Importar las clases principales
@@ -1,5 +1,6 @@
1
1
  import argparse
2
- from statslibx.io import load_file
2
+ import statslibx as slx
3
+ from statslibx.datasets import load_dataset
3
4
  from statslibx.preprocessing import Preprocessing
4
5
 
5
6
 
@@ -27,10 +28,10 @@ def main():
27
28
  args = parser.parse_args()
28
29
 
29
30
  if not args.command:
30
- parser.print_help()
31
+ print(slx.welcome())
31
32
  return
32
33
 
33
- df = load_file(args.file)
34
+ df = load_dataset(args.file)
34
35
  pp = Preprocessing(df)
35
36
 
36
37
  if args.command == "describe":
@@ -3,7 +3,6 @@ import io
3
3
  import pkgutil
4
4
  from pathlib import Path
5
5
  import pandas as pd
6
- import polars as pl
7
6
  import numpy as np
8
7
  from numpy.typing import NDArray
9
8
 
@@ -12,10 +11,10 @@ _SUPPORTED_BACKENDS = ("pandas", "polars")
12
11
 
13
12
 
14
13
  def _validate_columns(
15
- df: Union[pd.DataFrame, pl.DataFrame],
16
- X_columns: List[str],
17
- y_column: str
18
- ) -> None:
14
+ df: pd.DataFrame, # 输入的数据框,可以是pandas或polars DataFrame
15
+ X_columns: List[str], # 特征列名列表
16
+ y_column: str # 目标列名
17
+ ) -> None: # 无返回值,函数仅用于验证
19
18
  columns = set(df.columns)
20
19
  missing = set(X_columns + [y_column]) - columns
21
20
  if missing:
@@ -23,7 +22,7 @@ def _validate_columns(
23
22
 
24
23
 
25
24
  def _X_y(
26
- df: Union[pd.DataFrame, pl.DataFrame],
25
+ df: pd.DataFrame,
27
26
  X_columns: List[str],
28
27
  y_column: str
29
28
  ) -> Tuple[NDArray, NDArray]:
@@ -37,25 +36,19 @@ def _X_y(
37
36
  y = df[y_column].to_numpy().ravel()
38
37
  return X, y
39
38
 
40
- elif isinstance(df, pl.DataFrame):
41
- X = df.select(X_columns).to_numpy()
42
- y = df.select(y_column).to_numpy().ravel()
43
- return X, y
44
-
45
39
  else:
46
40
  raise TypeError(
47
- "Backend no soportado. Use pandas.DataFrame o polars.DataFrame."
41
+ "Backend no soportado. Use pandas.DataFrame"
48
42
  )
49
43
 
50
44
 
51
45
  import io
52
46
  import pkgutil
53
47
  import pandas as pd
54
- import polars as pl
55
48
  from typing import Literal, Optional, Tuple, List, Union
56
49
  from numpy.typing import NDArray
57
50
 
58
- _SUPPORTED_BACKENDS = {"pandas", "polars"}
51
+ _SUPPORTED_BACKENDS = {"pandas"}
59
52
  _SUPPORTED_EXTENSIONS = {".csv", ".parquet", ".xlsx", ".xls", ".json"}
60
53
 
61
54
  def _read_file(
@@ -73,23 +66,15 @@ def _read_file(
73
66
  return pd.read_excel(buffer_or_path)
74
67
  if ext == ".json":
75
68
  return pd.read_json(buffer_or_path)
76
- else: # polars
77
- if ext == ".csv":
78
- return pl.read_csv(buffer_or_path)
79
- if ext == ".parquet":
80
- return pl.read_parquet(buffer_or_path)
81
- if ext == ".json":
82
- return pl.read_json(buffer_or_path)
83
69
 
84
70
  raise ValueError(f"Extensión '{ext}' no soportada para backend '{backend}'.")
85
71
 
86
-
87
72
  def load_dataset(
88
- name: str,
89
- backend: Literal["pandas", "polars"] = "pandas",
90
- return_X_y: Optional[Tuple[List[str], str]] = None,
91
- sep: str = ","
92
- ) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
73
+ name: str,
74
+ backend: str = "pandas",
75
+ return_X_y: Optional[Tuple[List[str], str]] = None,
76
+ sep: str = ","
77
+ ) -> Union[pd.DataFrame, Tuple[NDArray, NDArray]]:
93
78
  """
94
79
  Carga un dataset interno del paquete.
95
80
 
@@ -99,6 +84,7 @@ def load_dataset(
99
84
  - sp500_companies.csv
100
85
  - titanic.csv
101
86
  - course_completion.csv
87
+ - Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx
102
88
 
103
89
  Parámetros
104
90
  ----------
@@ -120,7 +106,10 @@ def load_dataset(
120
106
  f"Use uno de {_SUPPORTED_BACKENDS}."
121
107
  )
122
108
 
123
- ext = Path(name).suffix.lower()
109
+ path = Path(name)
110
+ resource_name = path.name
111
+ ext = path.suffix.lower()
112
+
124
113
 
125
114
  if ext not in _SUPPORTED_EXTENSIONS:
126
115
  raise ValueError(
@@ -130,26 +119,26 @@ def load_dataset(
130
119
 
131
120
  df = None
132
121
 
133
- # ---------- 1️⃣ Intentar cargar desde el paquete ----------
122
+ # 1️⃣ Intentar cargar desde el paquete
134
123
  try:
135
- data_bytes = pkgutil.get_data("statslibx.datasets", name)
124
+ data_bytes = pkgutil.get_data("statslibx.datasets", resource_name)
125
+
136
126
  if data_bytes is not None:
137
127
  buffer = io.BytesIO(data_bytes)
138
128
  df = _read_file(buffer, ext, backend, sep)
139
129
  except FileNotFoundError:
140
130
  pass
141
131
 
142
- # ---------- 2️⃣ Intentar cargar desde ruta local ----------
132
+ # 2️⃣ Intentar cargar desde ruta local
143
133
  if df is None:
144
- try:
145
- df = _read_file(name, ext, backend, sep)
146
- except FileNotFoundError:
134
+ if not path.exists():
147
135
  raise FileNotFoundError(
148
136
  f"Dataset '{name}' no encontrado "
149
- f"ni en statslibx.datasets ni en la ruta actual."
137
+ f"ni en statslibx.datasets ni en la ruta local."
150
138
  )
139
+ df = _read_file(path, ext, backend, sep)
151
140
 
152
- # ---------- 3️⃣ Devolver X, y si se solicita ----------
141
+ # 3️⃣ Devolver X, y si se solicita
153
142
  if return_X_y is not None:
154
143
  X_columns, y_column = return_X_y
155
144
  return _X_y(df, X_columns, y_column)
@@ -157,12 +146,13 @@ def load_dataset(
157
146
  return df
158
147
 
159
148
 
149
+
160
150
  # =========================
161
151
  # Datasets específicos
162
152
  # =========================
163
153
 
164
154
  def load_iris(
165
- backend: Literal["pandas", "polars"] = "pandas",
155
+ backend: str = "pandas",
166
156
  return_X_y: Optional[Tuple[List[str], str]] = None
167
157
  ):
168
158
  return load_dataset(
@@ -173,7 +163,7 @@ def load_iris(
173
163
 
174
164
 
175
165
  def load_penguins(
176
- backend: Literal["pandas", "polars"] = "pandas",
166
+ backend: str = "pandas",
177
167
  return_X_y: Optional[Tuple[List[str], str]] = None
178
168
  ):
179
169
  return load_dataset(
@@ -1,13 +1,9 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
- import polars as pl
4
3
  from typing import Optional, Union, Literal, List
5
4
  from datetime import datetime
6
- import os
7
5
  import matplotlib.pyplot as plt
8
6
  import seaborn as sns
9
- import io
10
- import base64
11
7
  import plotly.express as px
12
8
 
13
9
  class DescriptiveStats:
@@ -104,16 +100,9 @@ class DescriptiveStats:
104
100
  raise TypeError(
105
101
  "Data must be a pandas.DataFrame or numpy.ndarray."
106
102
  )
107
-
108
- if isinstance(data, np.ndarray):
109
- if data.ndim == 1:
110
- data = pd.DataFrame({'var': data})
111
- else:
112
- data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])],
113
- sep=self.sep) \
114
- if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
115
-
116
- self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
103
+
104
+ self._numeric_cols = self.data.select_dtypes(include=["number"]).columns.tolist()
105
+ self._categorical_cols = self.data.select_dtypes(include=["object", "category"]).columns.tolist()
117
106
  self.lang = lang
118
107
 
119
108
 
@@ -1,11 +1,9 @@
1
1
  from dataclasses import dataclass
2
2
  import numpy as np
3
3
  import pandas as pd
4
- import polars as pl
5
- from typing import Optional, Union, Literal, List, Dict, Any, Tuple
4
+ from typing import Union, Literal, Dict, Any, Tuple
6
5
  from datetime import datetime
7
6
  from scipy import stats
8
- import os
9
7
 
10
8
  class InferentialStats:
11
9
  """
@@ -94,8 +92,8 @@ class InferentialStats:
94
92
  else:
95
93
  data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
96
94
 
97
- self.data = data
98
- self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
95
+ self._numeric_cols = data.select_dtypes(include=["number"]).columns.tolist()
96
+ self._categorical_cols = self.data.select_dtypes(include=["object", "category"]).columns.tolist()
99
97
  self.lang = lang
100
98
 
101
99
  # ============= INTERVALOS DE CONFIANZA =============
@@ -10,6 +10,7 @@ class Preprocessing:
10
10
  if not isinstance(data, (pd.DataFrame, pl.DataFrame)):
11
11
  raise TypeError("data must be a pandas or polars DataFrame")
12
12
  self.data = data
13
+ self.columns = list(self.data.columns)
13
14
 
14
15
  # ------------------------------------------------------------------
15
16
  # Internal helpers
@@ -27,11 +28,11 @@ class Preprocessing:
27
28
  return int(self.data[column].null_count())
28
29
 
29
30
  def _get_columns(self, columns):
30
- if columns is None:
31
- return list(self.data.columns)
32
- if isinstance(columns, str):
33
- return [columns]
34
- return columns
31
+ if columns is None:
32
+ return list(self.data.columns)
33
+ if isinstance(columns, str):
34
+ return [columns]
35
+ return columns
35
36
 
36
37
  # ------------------------------------------------------------------
37
38
  # Inspection
@@ -226,3 +227,103 @@ class Preprocessing:
226
227
 
227
228
  return pd.DataFrame(rows)
228
229
 
230
+ def change_dtypes(
231
+ self,
232
+ columns: Union[List[str], str, None] = None,
233
+ from_type: Optional[str] = None,
234
+ to_type: Optional[str] = None
235
+ ) -> pd.DataFrame:
236
+
237
+ data = self.data
238
+
239
+ TYPE_MAP = {
240
+ "string": "string",
241
+ "object": "object",
242
+ "int": "int64",
243
+ "float": "float64",
244
+ "int64": "int64",
245
+ "float64": "float64",
246
+ "number": "float64"
247
+ }
248
+
249
+ if columns is None:
250
+ columns = list(data.columns)
251
+ elif isinstance(columns, str):
252
+ columns = [columns]
253
+
254
+ if to_type and to_type not in TYPE_MAP:
255
+ raise ValueError(f"Unsupported to_type: {to_type}")
256
+
257
+ if self._is_pandas():
258
+
259
+ for col in columns:
260
+
261
+ if col not in data.columns:
262
+ print(f"Column '{col}' does not exist in the DataFrame")
263
+ return
264
+
265
+ if from_type is not None:
266
+ current_type = str(data[col].dtype)
267
+
268
+ if from_type not in current_type:
269
+ continue
270
+
271
+ if to_type is not None:
272
+ try:
273
+
274
+ if to_type in ["int", "float", "number"]:
275
+ data[col] = pd.to_numeric(data[col], errors="raise")
276
+
277
+ if to_type == "int":
278
+ data[col] = data[col].astype("int64")
279
+
280
+ elif to_type == "string":
281
+ data[col] = data[col].astype("string")
282
+
283
+ elif to_type == "object":
284
+ data[col] = data[col].astype("object")
285
+
286
+ else:
287
+ data[col] = data[col].astype(TYPE_MAP[to_type])
288
+
289
+ except Exception:
290
+ print(f"Cannot convert column '{col}' to {to_type}")
291
+
292
+ return data
293
+
294
+ def clean_data(
295
+ self,
296
+ # 🔍 Missing values
297
+ handle_missing: bool = False,
298
+ missing_strategy: str = "mean", # mean, median, mode, drop, constant
299
+ fill_value=None,
300
+
301
+ # 🧹 Duplicados
302
+ remove_duplicates: bool = False,
303
+
304
+ # 📊 Tipos de datos
305
+ convert_dtypes: bool = False,
306
+
307
+ # 🚨 Outliers
308
+ detect_outliers: bool = False,
309
+ remove_outliers: bool = False,
310
+ outlier_method: str = "iqr", # iqr, zscore
311
+ z_thresh: float = 3.0,
312
+
313
+ # 📏 Escalado / Normalización
314
+ scale: bool = False,
315
+ scaling_method: str = "standard", # standard, minmax, robust
316
+
317
+ # 🔢 Transformaciones
318
+ log_transform: bool = False,
319
+ sqrt_transform: bool = False,
320
+
321
+ # 🧱 Columnas
322
+ drop_columns: list = None,
323
+ keep_columns: list = None,
324
+
325
+
326
+ # 🧪 Analisis
327
+ analizer: bool = True,
328
+ text_analizer: bool = False) -> pd.DataFrame | str:
329
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
5
5
  Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
6
6
  License: MIT
@@ -16,6 +16,14 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.8
17
17
  Description-Content-Type: text/markdown
18
18
  Requires-Dist: pandas>=1.5
19
+ Requires-Dist: matplotlib>=3.5
20
+ Requires-Dist: numpy>=1.23
21
+ Requires-Dist: scipy>=1.9
22
+ Requires-Dist: polars>=0.16
23
+ Requires-Dist: scikit-learn>=1.0
24
+ Requires-Dist: statsmodels>=0.13
25
+ Requires-Dist: seaborn>=0.11
26
+ Requires-Dist: plotly>=5.0
19
27
  Provides-Extra: viz
20
28
  Requires-Dist: seaborn>=0.11; extra == "viz"
21
29
  Requires-Dist: plotly>=5.0; extra == "viz"
@@ -29,6 +37,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
29
37
 
30
38
  Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
31
39
 
40
+ Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
41
+
32
42
  GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
33
43
 
34
44
  ## ✨ Características principales
@@ -63,6 +73,7 @@ pip install statslibx
63
73
 
64
74
  ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
65
75
  ```bash
76
+ statslibx # Informacion general de la libreria
66
77
  statslibx describe .\archive.csv # Devuelve una descripcion de la data
67
78
  statslibx quality .\archive.csv # Devuelve la calidad de los datos
68
79
  statslibx preview .\archive.csv # Devuelve una visualizacion de los datos
@@ -0,0 +1,17 @@
1
+ pandas>=1.5
2
+ matplotlib>=3.5
3
+ numpy>=1.23
4
+ scipy>=1.9
5
+ polars>=0.16
6
+ scikit-learn>=1.0
7
+ statsmodels>=0.13
8
+ seaborn>=0.11
9
+ plotly>=5.0
10
+
11
+ [advanced]
12
+ scikit-learn>=1.0
13
+ statsmodels>=0.13
14
+
15
+ [viz]
16
+ seaborn>=0.11
17
+ plotly>=5.0
@@ -1,3 +1,4 @@
1
+ Documentation_Page
1
2
  dist
2
3
  figures
3
4
  statslibx
@@ -0,0 +1,30 @@
1
+ from statslibx import load_dataset, DescriptiveStats, InferentialStats
2
+ import pandas as pd
3
+ # df = pd.read_csv(r"tests\bank (1).csv", sep=";")
4
+
5
+ # df = load_dataset(r"tests\bank (1).csv", sep=";")
6
+ # stats = DescriptiveStats(df)
7
+ # print(stats.data)
8
+
9
+ # infer = InferentialStats(df)
10
+ # print(infer.data)
11
+
12
+ # df = load_dataset(r"statslibx\datasets\Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx")
13
+
14
+ # ds = DescriptiveStats(df)
15
+
16
+ # print(ds.data)
17
+
18
+ import statslibx as slx
19
+
20
+ df = slx.datasets.load_penguins()
21
+ infer = slx.InferentialStats(df)
22
+
23
+ # Confidence Interval + Point Estimate
24
+ print(infer.confidence_interval(
25
+ column="bill_length_mm",
26
+ statistic="mean"
27
+ ))
28
+
29
+
30
+
@@ -1,9 +0,0 @@
1
- pandas>=1.5
2
-
3
- [advanced]
4
- scikit-learn>=1.0
5
- statsmodels>=0.13
6
-
7
- [viz]
8
- seaborn>=0.11
9
- plotly>=5.0
@@ -1,20 +0,0 @@
1
- from statslibx import load_dataset, DescriptiveStats, InferentialStats
2
- import pandas as pd
3
- # df = pd.read_csv(r"tests\bank (1).csv", sep=";")
4
-
5
- # df = load_dataset(r"tests\bank (1).csv", sep=";")
6
- # stats = DescriptiveStats(df)
7
- # print(stats.data)
8
-
9
- # infer = InferentialStats(df)
10
- # print(infer.data)
11
-
12
- df = load_dataset(r"statslibx\datasets\WHR25_Data_Figure_2.1.xlsx")
13
-
14
- ds = DescriptiveStats(df)
15
-
16
- print(ds.data)
17
-
18
-
19
-
20
-
File without changes
File without changes
File without changes