statslibx 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
statslibx/__init__.py CHANGED
@@ -11,6 +11,7 @@ __author__ = "Emmanuel Ascendra"
11
11
  from .descriptive import DescriptiveStats, DescriptiveSummary
12
12
  from .inferential import InferentialStats, TestResult
13
13
  from .utils import UtilsStats
14
+ from .preprocessing import Preprocessing
14
15
  from .datasets import load_dataset
15
16
 
16
17
  # Definir qué se expone cuando se hace: from statslib import *
@@ -22,6 +23,7 @@ __all__ = [
22
23
  'DescriptiveSummary',
23
24
  'TestResult',
24
25
  'UtilsStats',
26
+ 'Preprocessing',
25
27
  'load_dataset'
26
28
  ]
27
29
 
@@ -35,4 +37,5 @@ def welcome():
35
37
  print(f" - DescriptiveStats: Estadística descriptiva")
36
38
  print(f" - InferentialStats: Estadística inferencial")
37
39
  print(f" - UtilsStats: Utilidades Extras")
40
+ print(f" - Preprocessing: Preprocesamiento de datos")
38
41
  print(f"\nPara más información: help(statslibx)")
statslibx/cli.py ADDED
@@ -0,0 +1,47 @@
1
+ import argparse
2
+ from statslibx.io import load_file
3
+ from statslibx.preprocessing import Preprocessing
4
+
5
+
6
+ def main():
7
+ parser = argparse.ArgumentParser(
8
+ prog="statslibx",
9
+ description="Statslibx - Data analysis from terminal"
10
+ )
11
+
12
+ subparsers = parser.add_subparsers(dest="command")
13
+
14
+ # describe
15
+ describe = subparsers.add_parser("describe")
16
+ describe.add_argument("file")
17
+
18
+ # quality
19
+ quality = subparsers.add_parser("quality")
20
+ quality.add_argument("file")
21
+
22
+ # preview
23
+ preview = subparsers.add_parser("preview")
24
+ preview.add_argument("file")
25
+ preview.add_argument("-n", "--rows", type=int, default=5)
26
+
27
+ args = parser.parse_args()
28
+
29
+ if not args.command:
30
+ parser.print_help()
31
+ return
32
+
33
+ df = load_file(args.file)
34
+ pp = Preprocessing(df)
35
+
36
+ if args.command == "describe":
37
+ print(pp.describe_numeric())
38
+
39
+ elif args.command == "quality":
40
+ print(pp.data_quality())
41
+
42
+ elif args.command == "preview":
43
+ print(pp.preview_data(args.rows))
44
+
45
+
46
+ if __name__ == "__main__":
47
+ main()
@@ -1,16 +1,71 @@
1
+ from typing import Optional, Union, Literal, List
2
+ import polars as pl
1
3
  import pandas as pd
2
4
  import pkgutil
3
5
  import io
4
6
 
5
- def load_dataset(name: str):
7
+ def load_dataset(
8
+ name: str,
9
+ backend: Literal['pandas', 'polars'] = 'pandas'
10
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
6
11
  """Carga un dataset interno del paquete.
7
12
  Datasets Disponibles:
8
13
  - iris.csv
9
14
  - penguins.csv
10
15
  - sp500_companies.csv
11
16
  - titanic.csv
17
+ - course_completion.csv
12
18
  """
13
19
  data_bytes = pkgutil.get_data("statslibx.datasets", name)
14
20
  if data_bytes is None:
15
21
  raise FileNotFoundError(f"Dataset '{name}' no encontrado.")
16
- return pd.read_csv(io.BytesIO(data_bytes))
22
+
23
+ if backend == "pandas":
24
+ return pd.read_csv(io.BytesIO(data_bytes))
25
+ elif backend == "polars":
26
+ return pl.read_csv(io.BytesIO(data_bytes))
27
+ else:
28
+ raise ValueError(
29
+ "Backend no soportado. Use 'pandas' o 'polars'."
30
+ )
31
+
32
+ def load_iris(
33
+ backend: Literal['pandas', 'polars'] = 'pandas'
34
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
35
+ """Carga el dataset interno de la libreria: Iris
36
+ """
37
+ data_bytes = pkgutil.get_data("statslibx.datasets", "iris.csv")
38
+ if data_bytes is None:
39
+ raise FileNotFoundError(f"Dataset \"iris.csv\" no encontrado.")
40
+
41
+ if backend == "pandas":
42
+ return pd.read_csv(io.BytesIO(data_bytes))
43
+ elif backend == "polars":
44
+ raise ValueError(
45
+ "Backend no soportado aun. Use 'pandas'."
46
+ )
47
+ else:
48
+ raise ValueError(
49
+ "Backend no soportado. Use 'pandas' o 'polars'."
50
+ )
51
+
52
+ def load_penguins(
53
+ backend: Literal['pandas', 'polars'] = 'pandas'
54
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
55
+ """Carga un dataset interno de la libreria: Penguins
56
+ """
57
+ data_bytes = pkgutil.get_data("statslibx.datasets", "penguins.csv")
58
+ if data_bytes is None:
59
+ raise FileNotFoundError(f"Dataset \"penguins.csv\" no encontrado.")
60
+
61
+ if backend == "pandas":
62
+ return pd.read_csv(io.BytesIO(data_bytes))
63
+ elif backend == "polars":
64
+ raise ValueError(
65
+ "Backend no soportado aun. Use 'pandas'."
66
+ )
67
+ else:
68
+ raise ValueError(
69
+ "Backend no soportado. Use 'pandas' o 'polars'."
70
+ )
71
+