statslibx 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {statslibx-0.1.6 → statslibx-0.1.7}/PKG-INFO +10 -29
  2. statslibx-0.1.7/pyproject.toml +42 -0
  3. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/__init__.py +3 -0
  4. statslibx-0.1.7/statslibx/cli.py +47 -0
  5. statslibx-0.1.7/statslibx/datasets/__init__.py +71 -0
  6. statslibx-0.1.7/statslibx/descriptive.py +1199 -0
  7. statslibx-0.1.7/statslibx/io.py +21 -0
  8. statslibx-0.1.7/statslibx/preprocessing/__init__.py +221 -0
  9. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/PKG-INFO +10 -29
  10. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/SOURCES.txt +6 -2
  11. statslibx-0.1.7/statslibx.egg-info/entry_points.txt +2 -0
  12. statslibx-0.1.7/statslibx.egg-info/requires.txt +10 -0
  13. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/top_level.txt +1 -0
  14. statslibx-0.1.6/setup.py +0 -53
  15. statslibx-0.1.6/statslibx/datasets/__init__.py +0 -16
  16. statslibx-0.1.6/statslibx/descriptive.py +0 -856
  17. statslibx-0.1.6/statslibx.egg-info/requires.txt +0 -18
  18. {statslibx-0.1.6 → statslibx-0.1.7}/README.md +0 -0
  19. {statslibx-0.1.6 → statslibx-0.1.7}/setup.cfg +0 -0
  20. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/course_completion.csv +0 -0
  21. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/iris.csv +0 -0
  22. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/penguins.csv +0 -0
  23. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/sp500_companies.csv +0 -0
  24. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/titanic.csv +0 -0
  25. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/inferential.py +0 -0
  26. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/utils.py +0 -0
  27. {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/dependency_links.txt +0 -0
@@ -1,47 +1,28 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.1.6
4
- Summary: Librería de estadística descriptiva e inferencial para Python
5
- Home-page: https://github.com/Immanuel3008/StatsLibX
6
- Author: Emmanuel Ascendra Perez
7
- Author-email: ascendraemmanuel@gmail.com
3
+ Version: 0.1.7
4
+ Summary: StatsLibx - Librería de estadística descriptiva e inferencial
5
+ Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
6
+ License: MIT
8
7
  Classifier: Development Status :: 3 - Alpha
9
8
  Classifier: Intended Audience :: Science/Research
10
9
  Classifier: Topic :: Scientific/Engineering :: Mathematics
11
10
  Classifier: License :: OSI Approved :: MIT License
12
11
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.8
14
12
  Classifier: Programming Language :: Python :: 3.9
15
13
  Classifier: Programming Language :: Python :: 3.10
16
14
  Classifier: Programming Language :: Python :: 3.11
17
15
  Classifier: Programming Language :: Python :: 3.12
18
16
  Requires-Python: >=3.8
19
17
  Description-Content-Type: text/markdown
20
- Requires-Dist: numpy>=1.20.0
21
- Requires-Dist: pandas>=1.3.0
22
- Requires-Dist: scipy>=1.7.0
23
- Requires-Dist: matplotlib>=3.4.0
18
+ Requires-Dist: pandas>=1.5
19
+ Requires-Dist: polars>=0.20
24
20
  Provides-Extra: viz
25
- Requires-Dist: seaborn>=0.11.0; extra == "viz"
26
- Requires-Dist: plotly>=5.0.0; extra == "viz"
21
+ Requires-Dist: seaborn>=0.11; extra == "viz"
22
+ Requires-Dist: plotly>=5.0; extra == "viz"
27
23
  Provides-Extra: advanced
28
- Requires-Dist: scikit-learn>=1.0.0; extra == "advanced"
29
- Requires-Dist: statsmodels>=0.13.0; extra == "advanced"
30
- Provides-Extra: all
31
- Requires-Dist: seaborn>=0.11.0; extra == "all"
32
- Requires-Dist: plotly>=5.0.0; extra == "all"
33
- Requires-Dist: scikit-learn>=1.0.0; extra == "all"
34
- Requires-Dist: statsmodels>=0.13.0; extra == "all"
35
- Dynamic: author
36
- Dynamic: author-email
37
- Dynamic: classifier
38
- Dynamic: description
39
- Dynamic: description-content-type
40
- Dynamic: home-page
41
- Dynamic: provides-extra
42
- Dynamic: requires-dist
43
- Dynamic: requires-python
44
- Dynamic: summary
24
+ Requires-Dist: scikit-learn>=1.0; extra == "advanced"
25
+ Requires-Dist: statsmodels>=0.13; extra == "advanced"
45
26
 
46
27
  # 📦 Descripción para PyPI (Plantilla Profesional)
47
28
 
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "statslibx"
7
+ version = "0.1.7"
8
+ description = "StatsLibx - Librería de estadística descriptiva e inferencial"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = { text = "MIT" }
12
+
13
+ authors = [
14
+ { name = "Emmanuel Ascendra Perez", email = "ascendraemmanuel@gmail.com" }
15
+ ]
16
+
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Science/Research",
20
+ "Topic :: Scientific/Engineering :: Mathematics",
21
+ "License :: OSI Approved :: MIT License",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12"
27
+ ]
28
+
29
+ dependencies = [
30
+ "pandas>=1.5",
31
+ "polars>=0.20"
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ viz = ["seaborn>=0.11", "plotly>=5.0"]
36
+ advanced = ["scikit-learn>=1.0", "statsmodels>=0.13"]
37
+
38
+ [project.scripts]
39
+ statslibx = "statslibx.cli:main"
40
+
41
+ [tool.setuptools.packages.find]
42
+ where = ["."]
@@ -11,6 +11,7 @@ __author__ = "Emmanuel Ascendra"
11
11
  from .descriptive import DescriptiveStats, DescriptiveSummary
12
12
  from .inferential import InferentialStats, TestResult
13
13
  from .utils import UtilsStats
14
+ from .preprocessing import Preprocessing
14
15
  from .datasets import load_dataset
15
16
 
16
17
  # Definir qué se expone cuando se hace: from statslib import *
@@ -22,6 +23,7 @@ __all__ = [
22
23
  'DescriptiveSummary',
23
24
  'TestResult',
24
25
  'UtilsStats',
26
+ 'Preprocessing',
25
27
  'load_dataset'
26
28
  ]
27
29
 
@@ -35,4 +37,5 @@ def welcome():
35
37
  print(f" - DescriptiveStats: Estadística descriptiva")
36
38
  print(f" - InferentialStats: Estadística inferencial")
37
39
  print(f" - UtilsStats: Utilidades Extras")
40
+ print(f" - Preprocessing: Preprocesamiento de datos")
38
41
  print(f"\nPara más información: help(statslibx)")
@@ -0,0 +1,47 @@
1
+ import argparse
2
+ from statslibx.io import load_file
3
+ from statslibx.preprocessing import Preprocessing
4
+
5
+
6
+ def main():
7
+ parser = argparse.ArgumentParser(
8
+ prog="statslibx",
9
+ description="Statslibx - Data analysis from terminal"
10
+ )
11
+
12
+ subparsers = parser.add_subparsers(dest="command")
13
+
14
+ # describe
15
+ describe = subparsers.add_parser("describe")
16
+ describe.add_argument("file")
17
+
18
+ # quality
19
+ quality = subparsers.add_parser("quality")
20
+ quality.add_argument("file")
21
+
22
+ # preview
23
+ preview = subparsers.add_parser("preview")
24
+ preview.add_argument("file")
25
+ preview.add_argument("-n", "--rows", type=int, default=5)
26
+
27
+ args = parser.parse_args()
28
+
29
+ if not args.command:
30
+ parser.print_help()
31
+ return
32
+
33
+ df = load_file(args.file)
34
+ pp = Preprocessing(df)
35
+
36
+ if args.command == "describe":
37
+ print(pp.describe_numeric())
38
+
39
+ elif args.command == "quality":
40
+ print(pp.data_quality())
41
+
42
+ elif args.command == "preview":
43
+ print(pp.preview_data(args.rows))
44
+
45
+
46
+ if __name__ == "__main__":
47
+ main()
@@ -0,0 +1,71 @@
1
+ from typing import Optional, Union, Literal, List
2
+ import polars as pl
3
+ import pandas as pd
4
+ import pkgutil
5
+ import io
6
+
7
+ def load_dataset(
8
+ name: str,
9
+ backend: Literal['pandas', 'polars'] = 'pandas'
10
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
11
+ """Carga un dataset interno del paquete.
12
+ Datasets Disponibles:
13
+ - iris.csv
14
+ - penguins.csv
15
+ - sp500_companies.csv
16
+ - titanic.csv
17
+ - course_completion.csv
18
+ """
19
+ data_bytes = pkgutil.get_data("statslibx.datasets", name)
20
+ if data_bytes is None:
21
+ raise FileNotFoundError(f"Dataset '{name}' no encontrado.")
22
+
23
+ if backend == "pandas":
24
+ return pd.read_csv(io.BytesIO(data_bytes))
25
+ elif backend == "polars":
26
+ return pl.read_csv(io.BytesIO(data_bytes))
27
+ else:
28
+ raise ValueError(
29
+ "Backend no soportado. Use 'pandas' o 'polars'."
30
+ )
31
+
32
+ def load_iris(
33
+ backend: Literal['pandas', 'polars'] = 'pandas'
34
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
35
+ """Carga el dataset interno de la libreria: Iris
36
+ """
37
+ data_bytes = pkgutil.get_data("statslibx.datasets", "iris.csv")
38
+ if data_bytes is None:
39
+ raise FileNotFoundError(f"Dataset \"iris.csv\" no encontrado.")
40
+
41
+ if backend == "pandas":
42
+ return pd.read_csv(io.BytesIO(data_bytes))
43
+ elif backend == "polars":
44
+ raise ValueError(
45
+ "Backend no soportado aun. Use 'pandas'."
46
+ )
47
+ else:
48
+ raise ValueError(
49
+ "Backend no soportado. Use 'pandas' o 'polars'."
50
+ )
51
+
52
+ def load_penguins(
53
+ backend: Literal['pandas', 'polars'] = 'pandas'
54
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
55
+ """Carga un dataset interno de la libreria: Penguins
56
+ """
57
+ data_bytes = pkgutil.get_data("statslibx.datasets", "penguins.csv")
58
+ if data_bytes is None:
59
+ raise FileNotFoundError(f"Dataset \"penguins.csv\" no encontrado.")
60
+
61
+ if backend == "pandas":
62
+ return pd.read_csv(io.BytesIO(data_bytes))
63
+ elif backend == "polars":
64
+ raise ValueError(
65
+ "Backend no soportado aun. Use 'pandas'."
66
+ )
67
+ else:
68
+ raise ValueError(
69
+ "Backend no soportado. Use 'pandas' o 'polars'."
70
+ )
71
+