statslibx 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {statslibx-0.2.3/statslibx.egg-info → statslibx-0.2.4}/PKG-INFO +3 -1
- {statslibx-0.2.3 → statslibx-0.2.4}/README.md +2 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/pyproject.toml +1 -1
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/__init__.py +2 -2
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/descriptive.py +2 -2
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/inferential.py +2 -2
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/preprocessing/__init__.py +37 -0
- {statslibx-0.2.3 → statslibx-0.2.4/statslibx.egg-info}/PKG-INFO +3 -1
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx.egg-info/top_level.txt +1 -0
- statslibx-0.2.4/tests/test1.py +30 -0
- statslibx-0.2.3/tests/test1.py +0 -20
- {statslibx-0.2.3 → statslibx-0.2.4}/MANIFEST.in +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/setup.cfg +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/cli.py +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/computacional.py +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/__init__.py +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/course_completion.csv +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/iris.csv +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/penguins.csv +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/sp500_companies.csv +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/datasets/titanic.csv +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx/utils.py +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx.egg-info/SOURCES.txt +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx.egg-info/dependency_links.txt +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx.egg-info/entry_points.txt +0 -0
- {statslibx-0.2.3 → statslibx-0.2.4}/statslibx.egg-info/requires.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
|
|
5
5
|
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -37,6 +37,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
|
|
|
37
37
|
|
|
38
38
|
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
39
39
|
|
|
40
|
+
Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
|
|
41
|
+
|
|
40
42
|
GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
|
|
41
43
|
|
|
42
44
|
## ✨ Características principales
|
|
@@ -4,6 +4,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
|
|
|
4
4
|
|
|
5
5
|
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
6
6
|
|
|
7
|
+
Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
|
|
8
|
+
|
|
7
9
|
GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
|
|
8
10
|
|
|
9
11
|
## ✨ Características principales
|
|
@@ -101,8 +101,8 @@ class DescriptiveStats:
|
|
|
101
101
|
"Data must be a pandas.DataFrame or numpy.ndarray."
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
-
self._numeric_cols = self.data.select_dtypes(include=[
|
|
105
|
-
self._categorical_cols = self.data.select_dtypes(include=[
|
|
104
|
+
self._numeric_cols = self.data.select_dtypes(include=["number"]).columns.tolist()
|
|
105
|
+
self._categorical_cols = self.data.select_dtypes(include=["object", "category"]).columns.tolist()
|
|
106
106
|
self.lang = lang
|
|
107
107
|
|
|
108
108
|
|
|
@@ -92,8 +92,8 @@ class InferentialStats:
|
|
|
92
92
|
else:
|
|
93
93
|
data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
|
|
94
94
|
|
|
95
|
-
self._numeric_cols = data.select_dtypes(include=[
|
|
96
|
-
self._categorical_cols = self.data.select_dtypes(include=[
|
|
95
|
+
self._numeric_cols = data.select_dtypes(include=["number"]).columns.tolist()
|
|
96
|
+
self._categorical_cols = self.data.select_dtypes(include=["object", "category"]).columns.tolist()
|
|
97
97
|
self.lang = lang
|
|
98
98
|
|
|
99
99
|
# ============= INTERVALOS DE CONFIANZA =============
|
|
@@ -290,3 +290,40 @@ class Preprocessing:
|
|
|
290
290
|
print(f"Cannot convert column '{col}' to {to_type}")
|
|
291
291
|
|
|
292
292
|
return data
|
|
293
|
+
|
|
294
|
+
def clean_data(
|
|
295
|
+
self,
|
|
296
|
+
# 🔍 Missing values
|
|
297
|
+
handle_missing: bool = False,
|
|
298
|
+
missing_strategy: str = "mean", # mean, median, mode, drop, constant
|
|
299
|
+
fill_value=None,
|
|
300
|
+
|
|
301
|
+
# 🧹 Duplicados
|
|
302
|
+
remove_duplicates: bool = False,
|
|
303
|
+
|
|
304
|
+
# 📊 Tipos de datos
|
|
305
|
+
convert_dtypes: bool = False,
|
|
306
|
+
|
|
307
|
+
# 🚨 Outliers
|
|
308
|
+
detect_outliers: bool = False,
|
|
309
|
+
remove_outliers: bool = False,
|
|
310
|
+
outlier_method: str = "iqr", # iqr, zscore
|
|
311
|
+
z_thresh: float = 3.0,
|
|
312
|
+
|
|
313
|
+
# 📏 Escalado / Normalización
|
|
314
|
+
scale: bool = False,
|
|
315
|
+
scaling_method: str = "standard", # standard, minmax, robust
|
|
316
|
+
|
|
317
|
+
# 🔢 Transformaciones
|
|
318
|
+
log_transform: bool = False,
|
|
319
|
+
sqrt_transform: bool = False,
|
|
320
|
+
|
|
321
|
+
# 🧱 Columnas
|
|
322
|
+
drop_columns: list = None,
|
|
323
|
+
keep_columns: list = None,
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# 🧪 Analisis
|
|
327
|
+
analizer: bool = True,
|
|
328
|
+
text_analizer: bool = False) -> pd.DataFrame | str:
|
|
329
|
+
pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
|
|
5
5
|
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -37,6 +37,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
|
|
|
37
37
|
|
|
38
38
|
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
39
39
|
|
|
40
|
+
Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
|
|
41
|
+
|
|
40
42
|
GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
|
|
41
43
|
|
|
42
44
|
## ✨ Características principales
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from statslibx import load_dataset, DescriptiveStats, InferentialStats
|
|
2
|
+
import pandas as pd
|
|
3
|
+
# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
|
|
4
|
+
|
|
5
|
+
# df = load_dataset(r"tests\bank (1).csv", sep=";")
|
|
6
|
+
# stats = DescriptiveStats(df)
|
|
7
|
+
# print(stats.data)
|
|
8
|
+
|
|
9
|
+
# infer = InferentialStats(df)
|
|
10
|
+
# print(infer.data)
|
|
11
|
+
|
|
12
|
+
# df = load_dataset(r"statslibx\datasets\Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx")
|
|
13
|
+
|
|
14
|
+
# ds = DescriptiveStats(df)
|
|
15
|
+
|
|
16
|
+
# print(ds.data)
|
|
17
|
+
|
|
18
|
+
import statslibx as slx
|
|
19
|
+
|
|
20
|
+
df = slx.datasets.load_penguins()
|
|
21
|
+
infer = slx.InferentialStats(df)
|
|
22
|
+
|
|
23
|
+
# Confidence Interval + Point Estimate
|
|
24
|
+
print(infer.confidence_interval(
|
|
25
|
+
column="bill_length_mm",
|
|
26
|
+
statistic="mean"
|
|
27
|
+
))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
statslibx-0.2.3/tests/test1.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from statslibx import load_dataset, DescriptiveStats, InferentialStats
|
|
2
|
-
import pandas as pd
|
|
3
|
-
# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
|
|
4
|
-
|
|
5
|
-
# df = load_dataset(r"tests\bank (1).csv", sep=";")
|
|
6
|
-
# stats = DescriptiveStats(df)
|
|
7
|
-
# print(stats.data)
|
|
8
|
-
|
|
9
|
-
# infer = InferentialStats(df)
|
|
10
|
-
# print(infer.data)
|
|
11
|
-
|
|
12
|
-
df = load_dataset(r"statslibx\datasets\Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx")
|
|
13
|
-
|
|
14
|
-
ds = DescriptiveStats(df)
|
|
15
|
-
|
|
16
|
-
print(ds.data)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|