PyPI - statslibx - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

statslibx 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{statslibx-0.2.2/statslibx.egg-info → statslibx-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: statslibx
-Version: 0.2.2
+Version: 0.2.4
 Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
 Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
 License: MIT
@@ -16,6 +16,14 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 Requires-Dist: pandas>=1.5
+Requires-Dist: matplotlib>=3.5
+Requires-Dist: numpy>=1.23
+Requires-Dist: scipy>=1.9
+Requires-Dist: polars>=0.16
+Requires-Dist: scikit-learn>=1.0
+Requires-Dist: statsmodels>=0.13
+Requires-Dist: seaborn>=0.11
+Requires-Dist: plotly>=5.0
 Provides-Extra: viz
 Requires-Dist: seaborn>=0.11; extra == "viz"
 Requires-Dist: plotly>=5.0; extra == "viz"
@@ -29,6 +37,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
 Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
+Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
 GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
 ## ✨ Características principales
@@ -63,6 +73,7 @@ pip install statslibx
 ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
 ```bash
+statslibx                        # Informacion general de la libreria
 statslibx describe .\archive.csv # Devuelve una descripcion de la data
 statslibx quality .\archive.csv # Devuelve la calidad de los datos
 statslibx preview .\archive.csv # Devuelve una visualizacion de los datos

{statslibx-0.2.2 → statslibx-0.2.4}/README.md RENAMED Viewed

@@ -4,6 +4,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
 Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
+Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
 GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
 ## ✨ Características principales
@@ -38,6 +40,7 @@ pip install statslibx
 ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
 ```bash
+statslibx                        # Informacion general de la libreria
 statslibx describe .\archive.csv # Devuelve una descripcion de la data
 statslibx quality .\archive.csv # Devuelve la calidad de los datos
 statslibx preview .\archive.csv # Devuelve una visualizacion de los datos

{statslibx-0.2.2 → statslibx-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "statslibx"
-version = "0.2.2"
+version = "0.2.4"
 description = "StatsLibx - Librería de estadística descriptiva, inferencial y computacional"
 readme = "README.md"
 requires-python = ">=3.8"
@@ -27,7 +27,15 @@ classifiers = [
 ]
 dependencies = [
-    "pandas>=1.5"
+    "pandas>=1.5",
+    "matplotlib>=3.5",
+    "numpy>=1.23",
+    "scipy>=1.9",
+    "polars>=0.16",
+    "scikit-learn>=1.0",
+    "statsmodels>=0.13",
+    "seaborn>=0.11",
+    "plotly>=5.0"
 ]
 [project.optional-dependencies]

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx/__init__.py RENAMED Viewed

@@ -1,10 +1,10 @@
 """
 StatsLibx - Librería de Estadística para Python
 Autor: Emmanuel Ascendra
-Versión: 0.2.2
+Versión: 0.2.4
 """
-__version__ = "0.2.2"
+__version__ = "0.2.4"
 __author__ = "Emmanuel Ascendra"
 # Importar las clases principales

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx/cli.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import argparse
-from statslibx.io import load_file
+import statslibx as slx
+from statslibx.datasets import load_dataset
 from statslibx.preprocessing import Preprocessing
@@ -27,10 +28,10 @@ def main():
     args = parser.parse_args()
     if not args.command:
-        parser.print_help()
+        print(slx.welcome())
         return
-    df = load_file(args.file)
+    df = load_dataset(args.file)
     pp = Preprocessing(df)
     if args.command == "describe":

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx/datasets/__init__.py RENAMED Viewed

@@ -3,7 +3,6 @@ import io
 import pkgutil
 from pathlib import Path
 import pandas as pd
-import polars as pl
 import numpy as np
 from numpy.typing import NDArray
@@ -12,10 +11,10 @@ _SUPPORTED_BACKENDS = ("pandas", "polars")
 def _validate_columns(
-    df: Union[pd.DataFrame, pl.DataFrame],
-    X_columns: List[str],
-    y_column: str
-) -> None:
+    df: pd.DataFrame,  # 输入的数据框，可以是pandas或polars DataFrame
+    X_columns: List[str],  # 特征列名列表
+    y_column: str  # 目标列名
+) -> None:  # 无返回值，函数仅用于验证
     columns = set(df.columns)
     missing = set(X_columns + [y_column]) - columns
     if missing:
@@ -23,7 +22,7 @@ def _validate_columns(
 def _X_y(
-    df: Union[pd.DataFrame, pl.DataFrame],
+    df: pd.DataFrame,
     X_columns: List[str],
     y_column: str
 ) -> Tuple[NDArray, NDArray]:
@@ -37,25 +36,19 @@ def _X_y(
         y = df[y_column].to_numpy().ravel()
         return X, y
-    elif isinstance(df, pl.DataFrame):
-        X = df.select(X_columns).to_numpy()
-        y = df.select(y_column).to_numpy().ravel()
-        return X, y
     else:
         raise TypeError(
-            "Backend no soportado. Use pandas.DataFrame o polars.DataFrame."
+            "Backend no soportado. Use pandas.DataFrame"
         )
 import io
 import pkgutil
 import pandas as pd
-import polars as pl
 from typing import Literal, Optional, Tuple, List, Union
 from numpy.typing import NDArray
-_SUPPORTED_BACKENDS = {"pandas", "polars"}
+_SUPPORTED_BACKENDS = {"pandas"}
 _SUPPORTED_EXTENSIONS = {".csv", ".parquet", ".xlsx", ".xls", ".json"}
 def _read_file(
@@ -73,23 +66,15 @@ def _read_file(
             return pd.read_excel(buffer_or_path)
         if ext == ".json":
             return pd.read_json(buffer_or_path)
-    else:  # polars
-        if ext == ".csv":
-            return pl.read_csv(buffer_or_path)
-        if ext == ".parquet":
-            return pl.read_parquet(buffer_or_path)
-        if ext == ".json":
-            return pl.read_json(buffer_or_path)
     raise ValueError(f"Extensión '{ext}' no soportada para backend '{backend}'.")
 def load_dataset(
-    name: str,
-    backend: Literal["pandas", "polars"] = "pandas",
-    return_X_y: Optional[Tuple[List[str], str]] = None,
-    sep: str = ","
-) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
+        name: str,
+        backend: str = "pandas",
+        return_X_y: Optional[Tuple[List[str], str]] = None,
+        sep: str = ","
+    ) -> Union[pd.DataFrame, Tuple[NDArray, NDArray]]:
     """
     Carga un dataset interno del paquete.
@@ -99,6 +84,7 @@ def load_dataset(
     - sp500_companies.csv
     - titanic.csv
     - course_completion.csv
+    - Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx
     Parámetros
     ----------
@@ -120,7 +106,10 @@ def load_dataset(
             f"Use uno de {_SUPPORTED_BACKENDS}."
         )
-    ext = Path(name).suffix.lower()
+    path = Path(name)
+    resource_name = path.name
+    ext = path.suffix.lower()
     if ext not in _SUPPORTED_EXTENSIONS:
         raise ValueError(
@@ -130,26 +119,26 @@ def load_dataset(
     df = None
-    # ---------- 1️⃣ Intentar cargar desde el paquete ----------
+    # 1️⃣ Intentar cargar desde el paquete
     try:
-        data_bytes = pkgutil.get_data("statslibx.datasets", name)
+        data_bytes = pkgutil.get_data("statslibx.datasets", resource_name)
         if data_bytes is not None:
             buffer = io.BytesIO(data_bytes)
             df = _read_file(buffer, ext, backend, sep)
     except FileNotFoundError:
         pass
-    # ---------- 2️⃣ Intentar cargar desde ruta local ----------
+    # 2️⃣ Intentar cargar desde ruta local
     if df is None:
-        try:
-            df = _read_file(name, ext, backend, sep)
-        except FileNotFoundError:
+        if not path.exists():
             raise FileNotFoundError(
                 f"Dataset '{name}' no encontrado "
-                f"ni en statslibx.datasets ni en la ruta actual."
+                f"ni en statslibx.datasets ni en la ruta local."
             )
+        df = _read_file(path, ext, backend, sep)
-    # ---------- 3️⃣ Devolver X, y si se solicita ----------
+    # 3️⃣ Devolver X, y si se solicita
     if return_X_y is not None:
         X_columns, y_column = return_X_y
         return _X_y(df, X_columns, y_column)
@@ -157,12 +146,13 @@ def load_dataset(
     return df
 # =========================
 # Datasets específicos
 # =========================
 def load_iris(
-    backend: Literal["pandas", "polars"] = "pandas",
+    backend: str = "pandas",
     return_X_y: Optional[Tuple[List[str], str]] = None
 ):
     return load_dataset(
@@ -173,7 +163,7 @@ def load_iris(
 def load_penguins(
-    backend: Literal["pandas", "polars"] = "pandas",
+    backend: str = "pandas",
     return_X_y: Optional[Tuple[List[str], str]] = None
 ):
     return load_dataset(

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx/descriptive.py RENAMED Viewed

@@ -1,13 +1,9 @@
 import numpy as np
 import pandas as pd
-import polars as pl
 from typing import Optional, Union, Literal, List
 from datetime import datetime
-import os
 import matplotlib.pyplot as plt
 import seaborn as sns
-import io
-import base64
 import plotly.express as px
 class DescriptiveStats:
@@ -104,16 +100,9 @@ class DescriptiveStats:
             raise TypeError(
                 "Data must be a pandas.DataFrame or numpy.ndarray."
             )
-        if isinstance(data, np.ndarray):
-            if data.ndim == 1:
-                data = pd.DataFrame({'var': data})
-            else:
-                data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])],
-                                    sep=self.sep) \
-                    if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
-        self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
+        self._numeric_cols = self.data.select_dtypes(include=["number"]).columns.tolist()
+        self._categorical_cols = self.data.select_dtypes(include=["object", "category"]).columns.tolist()
         self.lang = lang

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx/inferential.py RENAMED Viewed

@@ -1,11 +1,9 @@
 from dataclasses import dataclass
 import numpy as np
 import pandas as pd
-import polars as pl
-from typing import Optional, Union, Literal, List, Dict, Any, Tuple
+from typing import Union, Literal, Dict, Any, Tuple
 from datetime import datetime
 from scipy import stats
-import os
 class InferentialStats:
     """
@@ -94,8 +92,8 @@ class InferentialStats:
             else:
                 data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
-        self.data = data
-        self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
+        self._numeric_cols = data.select_dtypes(include=["number"]).columns.tolist()
+        self._categorical_cols = self.data.select_dtypes(include=["object", "category"]).columns.tolist()
         self.lang = lang
     # ============= INTERVALOS DE CONFIANZA =============

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx/preprocessing/__init__.py RENAMED Viewed

@@ -10,6 +10,7 @@ class Preprocessing:
         if not isinstance(data, (pd.DataFrame, pl.DataFrame)):
             raise TypeError("data must be a pandas or polars DataFrame")
         self.data = data
+        self.columns = list(self.data.columns)
     # ------------------------------------------------------------------
     # Internal helpers
@@ -27,11 +28,11 @@ class Preprocessing:
         return int(self.data[column].null_count())
     def _get_columns(self, columns):
-        if columns is None:
-            return list(self.data.columns)
-        if isinstance(columns, str):
-            return [columns]
-        return columns
+            if columns is None:
+                return list(self.data.columns)
+            if isinstance(columns, str):
+                return [columns]
+            return columns
     # ------------------------------------------------------------------
     # Inspection
@@ -226,3 +227,103 @@ class Preprocessing:
         return pd.DataFrame(rows)
+    def change_dtypes(
+        self,
+        columns: Union[List[str], str, None] = None,
+        from_type: Optional[str] = None,
+        to_type: Optional[str] = None
+    ) -> pd.DataFrame:
+        data = self.data
+        TYPE_MAP = {
+            "string": "string",
+            "object": "object",
+            "int": "int64",
+            "float": "float64",
+            "int64": "int64",
+            "float64": "float64",
+            "number": "float64"
+        }
+        if columns is None:
+            columns = list(data.columns)
+        elif isinstance(columns, str):
+            columns = [columns]
+        if to_type and to_type not in TYPE_MAP:
+            raise ValueError(f"Unsupported to_type: {to_type}")
+        if self._is_pandas():
+            for col in columns:
+                if col not in data.columns:
+                    print(f"Column '{col}' does not exist in the DataFrame")
+                    return
+                if from_type is not None:
+                    current_type = str(data[col].dtype)
+                    if from_type not in current_type:
+                        continue
+                if to_type is not None:
+                    try:
+                        if to_type in ["int", "float", "number"]:
+                            data[col] = pd.to_numeric(data[col], errors="raise")
+                            if to_type == "int":
+                                data[col] = data[col].astype("int64")
+                        elif to_type == "string":
+                            data[col] = data[col].astype("string")
+                        elif to_type == "object":
+                            data[col] = data[col].astype("object")
+                        else:
+                            data[col] = data[col].astype(TYPE_MAP[to_type])
+                    except Exception:
+                        print(f"Cannot convert column '{col}' to {to_type}")
+        return data
+    def clean_data(
+        self,
+        # 🔍 Missing values
+        handle_missing: bool = False,
+        missing_strategy: str = "mean",  # mean, median, mode, drop, constant
+        fill_value=None,
+        # 🧹 Duplicados
+        remove_duplicates: bool = False,
+        # 📊 Tipos de datos
+        convert_dtypes: bool = False,
+        # 🚨 Outliers
+        detect_outliers: bool = False,
+        remove_outliers: bool = False,
+        outlier_method: str = "iqr",  # iqr, zscore
+        z_thresh: float = 3.0,
+        # 📏 Escalado / Normalización
+        scale: bool = False,
+        scaling_method: str = "standard",  # standard, minmax, robust
+        # 🔢 Transformaciones
+        log_transform: bool = False,
+        sqrt_transform: bool = False,
+        # 🧱 Columnas
+        drop_columns: list = None,
+        keep_columns: list = None,
+        # 🧪 Analisis
+        analizer: bool = True,
+        text_analizer: bool = False) -> pd.DataFrame | str:
+        pass

{statslibx-0.2.2 → statslibx-0.2.4/statslibx.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: statslibx
-Version: 0.2.2
+Version: 0.2.4
 Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
 Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
 License: MIT
@@ -16,6 +16,14 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 Requires-Dist: pandas>=1.5
+Requires-Dist: matplotlib>=3.5
+Requires-Dist: numpy>=1.23
+Requires-Dist: scipy>=1.9
+Requires-Dist: polars>=0.16
+Requires-Dist: scikit-learn>=1.0
+Requires-Dist: statsmodels>=0.13
+Requires-Dist: seaborn>=0.11
+Requires-Dist: plotly>=5.0
 Provides-Extra: viz
 Requires-Dist: seaborn>=0.11; extra == "viz"
 Requires-Dist: plotly>=5.0; extra == "viz"
@@ -29,6 +37,8 @@ StatsLibX es un paquete de Python diseñado para proporcionar una solución senc
 Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
+Pagina Web: [StatsLibX](https://ghostanalyst30.github.io/StatsLibX/Documentation_Page/index.html)
 GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
 ## ✨ Características principales
@@ -63,6 +73,7 @@ pip install statslibx
 ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
 ```bash
+statslibx                        # Informacion general de la libreria
 statslibx describe .\archive.csv # Devuelve una descripcion de la data
 statslibx quality .\archive.csv # Devuelve la calidad de los datos
 statslibx preview .\archive.csv # Devuelve una visualizacion de los datos

statslibx-0.2.4/statslibx.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,17 @@
+pandas>=1.5
+matplotlib>=3.5
+numpy>=1.23
+scipy>=1.9
+polars>=0.16
+scikit-learn>=1.0
+statsmodels>=0.13
+seaborn>=0.11
+plotly>=5.0
+[advanced]
+scikit-learn>=1.0
+statsmodels>=0.13
+[viz]
+seaborn>=0.11
+plotly>=5.0

{statslibx-0.2.2 → statslibx-0.2.4}/statslibx.egg-info/top_level.txt RENAMED Viewed

@@ -1,3 +1,4 @@
+Documentation_Page
 dist
 figures
 statslibx

statslibx-0.2.4/tests/test1.py ADDED Viewed

@@ -0,0 +1,30 @@
+from statslibx import load_dataset, DescriptiveStats, InferentialStats
+import pandas as pd
+# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
+# df = load_dataset(r"tests\bank (1).csv", sep=";")
+# stats = DescriptiveStats(df)
+# print(stats.data)
+# infer = InferentialStats(df)
+# print(infer.data)
+# df = load_dataset(r"statslibx\datasets\Cocoa_Bubbles_Investment_Nigeria_Ghana_1980_2023.xlsx")
+# ds = DescriptiveStats(df)
+# print(ds.data)
+import statslibx as slx
+df = slx.datasets.load_penguins()
+infer = slx.InferentialStats(df)
+# Confidence Interval + Point Estimate
+print(infer.confidence_interval(
+    column="bill_length_mm",
+    statistic="mean"
+))

statslibx-0.2.2/statslibx.egg-info/requires.txt DELETED Viewed

@@ -1,9 +0,0 @@
-pandas>=1.5
-[advanced]
-scikit-learn>=1.0
-statsmodels>=0.13
-[viz]
-seaborn>=0.11
-plotly>=5.0

statslibx-0.2.2/tests/test1.py DELETED Viewed

@@ -1,20 +0,0 @@
-from statslibx import load_dataset, DescriptiveStats, InferentialStats
-import pandas as pd
-# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
-# df = load_dataset(r"tests\bank (1).csv", sep=";")
-# stats = DescriptiveStats(df)
-# print(stats.data)
-# infer = InferentialStats(df)
-# print(infer.data)
-df = load_dataset(r"statslibx\datasets\WHR25_Data_Figure_2.1.xlsx")
-ds = DescriptiveStats(df)
-print(ds.data)