statslibx 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {statslibx-0.1.6 → statslibx-0.1.7}/PKG-INFO +10 -29
- statslibx-0.1.7/pyproject.toml +42 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/__init__.py +3 -0
- statslibx-0.1.7/statslibx/cli.py +47 -0
- statslibx-0.1.7/statslibx/datasets/__init__.py +71 -0
- statslibx-0.1.7/statslibx/descriptive.py +1199 -0
- statslibx-0.1.7/statslibx/io.py +21 -0
- statslibx-0.1.7/statslibx/preprocessing/__init__.py +221 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/PKG-INFO +10 -29
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/SOURCES.txt +6 -2
- statslibx-0.1.7/statslibx.egg-info/entry_points.txt +2 -0
- statslibx-0.1.7/statslibx.egg-info/requires.txt +10 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/top_level.txt +1 -0
- statslibx-0.1.6/setup.py +0 -53
- statslibx-0.1.6/statslibx/datasets/__init__.py +0 -16
- statslibx-0.1.6/statslibx/descriptive.py +0 -856
- statslibx-0.1.6/statslibx.egg-info/requires.txt +0 -18
- {statslibx-0.1.6 → statslibx-0.1.7}/README.md +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/setup.cfg +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/course_completion.csv +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/iris.csv +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/penguins.csv +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/sp500_companies.csv +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/datasets/titanic.csv +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/inferential.py +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx/utils.py +0 -0
- {statslibx-0.1.6 → statslibx-0.1.7}/statslibx.egg-info/dependency_links.txt +0 -0
|
@@ -1,47 +1,28 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: Librería de estadística descriptiva e inferencial
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Author-email: ascendraemmanuel@gmail.com
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: StatsLibx - Librería de estadística descriptiva e inferencial
|
|
5
|
+
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
|
+
License: MIT
|
|
8
7
|
Classifier: Development Status :: 3 - Alpha
|
|
9
8
|
Classifier: Intended Audience :: Science/Research
|
|
10
9
|
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
11
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
14
12
|
Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
16
|
Requires-Python: >=3.8
|
|
19
17
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist: scipy>=1.7.0
|
|
23
|
-
Requires-Dist: matplotlib>=3.4.0
|
|
18
|
+
Requires-Dist: pandas>=1.5
|
|
19
|
+
Requires-Dist: polars>=0.20
|
|
24
20
|
Provides-Extra: viz
|
|
25
|
-
Requires-Dist: seaborn>=0.11
|
|
26
|
-
Requires-Dist: plotly>=5.0
|
|
21
|
+
Requires-Dist: seaborn>=0.11; extra == "viz"
|
|
22
|
+
Requires-Dist: plotly>=5.0; extra == "viz"
|
|
27
23
|
Provides-Extra: advanced
|
|
28
|
-
Requires-Dist: scikit-learn>=1.0
|
|
29
|
-
Requires-Dist: statsmodels>=0.13
|
|
30
|
-
Provides-Extra: all
|
|
31
|
-
Requires-Dist: seaborn>=0.11.0; extra == "all"
|
|
32
|
-
Requires-Dist: plotly>=5.0.0; extra == "all"
|
|
33
|
-
Requires-Dist: scikit-learn>=1.0.0; extra == "all"
|
|
34
|
-
Requires-Dist: statsmodels>=0.13.0; extra == "all"
|
|
35
|
-
Dynamic: author
|
|
36
|
-
Dynamic: author-email
|
|
37
|
-
Dynamic: classifier
|
|
38
|
-
Dynamic: description
|
|
39
|
-
Dynamic: description-content-type
|
|
40
|
-
Dynamic: home-page
|
|
41
|
-
Dynamic: provides-extra
|
|
42
|
-
Dynamic: requires-dist
|
|
43
|
-
Dynamic: requires-python
|
|
44
|
-
Dynamic: summary
|
|
24
|
+
Requires-Dist: scikit-learn>=1.0; extra == "advanced"
|
|
25
|
+
Requires-Dist: statsmodels>=0.13; extra == "advanced"
|
|
45
26
|
|
|
46
27
|
# 📦 Descripción para PyPI (Plantilla Profesional)
|
|
47
28
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "statslibx"
|
|
7
|
+
version = "0.1.7"
|
|
8
|
+
description = "StatsLibx - Librería de estadística descriptiva e inferencial"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Emmanuel Ascendra Perez", email = "ascendraemmanuel@gmail.com" }
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
21
|
+
"License :: OSI Approved :: MIT License",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12"
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
dependencies = [
|
|
30
|
+
"pandas>=1.5",
|
|
31
|
+
"polars>=0.20"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
viz = ["seaborn>=0.11", "plotly>=5.0"]
|
|
36
|
+
advanced = ["scikit-learn>=1.0", "statsmodels>=0.13"]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
statslibx = "statslibx.cli:main"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["."]
|
|
@@ -11,6 +11,7 @@ __author__ = "Emmanuel Ascendra"
|
|
|
11
11
|
from .descriptive import DescriptiveStats, DescriptiveSummary
|
|
12
12
|
from .inferential import InferentialStats, TestResult
|
|
13
13
|
from .utils import UtilsStats
|
|
14
|
+
from .preprocessing import Preprocessing
|
|
14
15
|
from .datasets import load_dataset
|
|
15
16
|
|
|
16
17
|
# Definir qué se expone cuando se hace: from statslib import *
|
|
@@ -22,6 +23,7 @@ __all__ = [
|
|
|
22
23
|
'DescriptiveSummary',
|
|
23
24
|
'TestResult',
|
|
24
25
|
'UtilsStats',
|
|
26
|
+
'Preprocessing',
|
|
25
27
|
'load_dataset'
|
|
26
28
|
]
|
|
27
29
|
|
|
@@ -35,4 +37,5 @@ def welcome():
|
|
|
35
37
|
print(f" - DescriptiveStats: Estadística descriptiva")
|
|
36
38
|
print(f" - InferentialStats: Estadística inferencial")
|
|
37
39
|
print(f" - UtilsStats: Utilidades Extras")
|
|
40
|
+
print(f" - Preprocessing: Preprocesamiento de datos")
|
|
38
41
|
print(f"\nPara más información: help(statslibx)")
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from statslibx.io import load_file
|
|
3
|
+
from statslibx.preprocessing import Preprocessing
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def main():
|
|
7
|
+
parser = argparse.ArgumentParser(
|
|
8
|
+
prog="statslibx",
|
|
9
|
+
description="Statslibx - Data analysis from terminal"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
13
|
+
|
|
14
|
+
# describe
|
|
15
|
+
describe = subparsers.add_parser("describe")
|
|
16
|
+
describe.add_argument("file")
|
|
17
|
+
|
|
18
|
+
# quality
|
|
19
|
+
quality = subparsers.add_parser("quality")
|
|
20
|
+
quality.add_argument("file")
|
|
21
|
+
|
|
22
|
+
# preview
|
|
23
|
+
preview = subparsers.add_parser("preview")
|
|
24
|
+
preview.add_argument("file")
|
|
25
|
+
preview.add_argument("-n", "--rows", type=int, default=5)
|
|
26
|
+
|
|
27
|
+
args = parser.parse_args()
|
|
28
|
+
|
|
29
|
+
if not args.command:
|
|
30
|
+
parser.print_help()
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
df = load_file(args.file)
|
|
34
|
+
pp = Preprocessing(df)
|
|
35
|
+
|
|
36
|
+
if args.command == "describe":
|
|
37
|
+
print(pp.describe_numeric())
|
|
38
|
+
|
|
39
|
+
elif args.command == "quality":
|
|
40
|
+
print(pp.data_quality())
|
|
41
|
+
|
|
42
|
+
elif args.command == "preview":
|
|
43
|
+
print(pp.preview_data(args.rows))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
if __name__ == "__main__":
|
|
47
|
+
main()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Optional, Union, Literal, List
|
|
2
|
+
import polars as pl
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pkgutil
|
|
5
|
+
import io
|
|
6
|
+
|
|
7
|
+
def load_dataset(
|
|
8
|
+
name: str,
|
|
9
|
+
backend: Literal['pandas', 'polars'] = 'pandas'
|
|
10
|
+
) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
11
|
+
"""Carga un dataset interno del paquete.
|
|
12
|
+
Datasets Disponibles:
|
|
13
|
+
- iris.csv
|
|
14
|
+
- penguins.csv
|
|
15
|
+
- sp500_companies.csv
|
|
16
|
+
- titanic.csv
|
|
17
|
+
- course_completion.csv
|
|
18
|
+
"""
|
|
19
|
+
data_bytes = pkgutil.get_data("statslibx.datasets", name)
|
|
20
|
+
if data_bytes is None:
|
|
21
|
+
raise FileNotFoundError(f"Dataset '{name}' no encontrado.")
|
|
22
|
+
|
|
23
|
+
if backend == "pandas":
|
|
24
|
+
return pd.read_csv(io.BytesIO(data_bytes))
|
|
25
|
+
elif backend == "polars":
|
|
26
|
+
return pl.read_csv(io.BytesIO(data_bytes))
|
|
27
|
+
else:
|
|
28
|
+
raise ValueError(
|
|
29
|
+
"Backend no soportado. Use 'pandas' o 'polars'."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def load_iris(
|
|
33
|
+
backend: Literal['pandas', 'polars'] = 'pandas'
|
|
34
|
+
) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
35
|
+
"""Carga el dataset interno de la libreria: Iris
|
|
36
|
+
"""
|
|
37
|
+
data_bytes = pkgutil.get_data("statslibx.datasets", "iris.csv")
|
|
38
|
+
if data_bytes is None:
|
|
39
|
+
raise FileNotFoundError(f"Dataset \"iris.csv\" no encontrado.")
|
|
40
|
+
|
|
41
|
+
if backend == "pandas":
|
|
42
|
+
return pd.read_csv(io.BytesIO(data_bytes))
|
|
43
|
+
elif backend == "polars":
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"Backend no soportado aun. Use 'pandas'."
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
"Backend no soportado. Use 'pandas' o 'polars'."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def load_penguins(
|
|
53
|
+
backend: Literal['pandas', 'polars'] = 'pandas'
|
|
54
|
+
) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
55
|
+
"""Carga un dataset interno de la libreria: Penguins
|
|
56
|
+
"""
|
|
57
|
+
data_bytes = pkgutil.get_data("statslibx.datasets", "penguins.csv")
|
|
58
|
+
if data_bytes is None:
|
|
59
|
+
raise FileNotFoundError(f"Dataset \"penguins.csv\" no encontrado.")
|
|
60
|
+
|
|
61
|
+
if backend == "pandas":
|
|
62
|
+
return pd.read_csv(io.BytesIO(data_bytes))
|
|
63
|
+
elif backend == "polars":
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"Backend no soportado aun. Use 'pandas'."
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
"Backend no soportado. Use 'pandas' o 'polars'."
|
|
70
|
+
)
|
|
71
|
+
|