PyPI - statslibx - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

statslibx 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

statslibx/__init__.py +2 -2
statslibx/datasets/__init__.py +4 -3
statslibx/descriptive.py +10 -63
statslibx/inferential.py +7 -49
{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/METADATA +1 -2
{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/RECORD +10 -9
{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/top_level.txt +1 -0
tests/test1.py +14 -0
{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/WHEEL +0 -0
{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/entry_points.txt +0 -0

statslibx/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """
 StatsLibx - Librería de Estadística para Python
 Autor: Emmanuel Ascendra
-Versión: 0.2.0
+Versión: 0.2.1
 """
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 __author__ = "Emmanuel Ascendra"
 # Importar las clases principales

statslibx/datasets/__init__.py CHANGED Viewed

@@ -61,8 +61,9 @@ def load_dataset(
     name: str,
     backend: Literal["pandas", "polars"] = "pandas",
     return_X_y: Optional[Tuple[List[str], str]] = None,
+    sep: str = ",",
     save: Optional[bool] = False,
-    filename: Optional[str] = None
+    filename: Optional[str] = None,
 ) -> Union[pd.DataFrame, pl.DataFrame, Tuple[NDArray, NDArray]]:
     """
     Carga un dataset interno del paquete.
@@ -101,7 +102,7 @@ def load_dataset(
         data_bytes = pkgutil.get_data("statslibx.datasets", name)
         if data_bytes is not None:
             df = (
-                pd.read_csv(io.BytesIO(data_bytes))
+                pd.read_csv(io.BytesIO(data_bytes), sep=sep)
                 if backend == "pandas"
                 else pl.read_csv(io.BytesIO(data_bytes))
             )
@@ -112,7 +113,7 @@ def load_dataset(
     if df is None:
         try:
             df = (
-                pd.read_csv(name)
+                pd.read_csv(name, sep=sep)
                 if backend == "pandas"
                 else pl.read_csv(name)
             )

statslibx/descriptive.py CHANGED Viewed

@@ -3,7 +3,6 @@ import pandas as pd
 import polars as pl
 from typing import Optional, Union, Literal, List
 from datetime import datetime
-import flet as ft
 import os
 import matplotlib.pyplot as plt
 import seaborn as sns
@@ -81,10 +80,6 @@ class DescriptiveStats:
     """
     def __init__(self, data: Union[pd.DataFrame, np.ndarray],
-                sep: str = None,
-                decimal: str = None,
-                thousand: str = None,
-                backend: Literal['pandas', 'polars'] = 'pandas',
                 lang: Literal['es-ES', 'en-US'] = 'es-ES'):
         """
         # Initialize DataFrame
@@ -92,9 +87,6 @@ class DescriptiveStats:
         ## **Parameters:**
         - **data** : Data to analyze
-        - **sep** : Column separator
-        - **decimal** : Decimal separator
-        - **thousand** : Thousand separator
         - **backend** : 'pandas' or 'polars' for processing
         (Proximamente estara habilitado polars para big data)
@@ -104,72 +96,26 @@ class DescriptiveStats:
         stats = DescriptiveStats(data)
         ``
         """
-        if isinstance(data, str) and os.path.exists(data):
-                data = DescriptiveStats.from_file(data).data
-        if isinstance(data, pl.DataFrame):
+        if isinstance(data, pd.DataFrame):
+            self.data = data
+        elif isinstance(data, np.ndarray):
+            self.data = pd.DataFrame(data)
+        else:
             raise TypeError(
-                "Polars aún no soportado. Use pandas.DataFrame."
+                "Data must be a pandas.DataFrame or numpy.ndarray."
             )
         if isinstance(data, np.ndarray):
             if data.ndim == 1:
                 data = pd.DataFrame({'var': data})
             else:
-                data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])]) \
+                data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])],
+                                    sep=self.sep) \
                     if isinstance(data, pd.DataFrame) else pl.DataFrame(data, )
-        self.data = data
-        self.backend = backend
         self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
-        self.sep = sep
-        self.decimal = decimal
-        self.thousand = thousand
         self.lang = lang
-    @classmethod
-    def from_file(self, path: str):
-        """
-        Carga automática de archivos y devuelve instancia de Intelligence.
-        Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
-        Automatic file upload and returns Intelligence instance.
-        Supports CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
-        Parametros / Parameters:
-        ------------------------
-        path : str
-            Ruta del archivo
-            File path
-        """
-        if not os.path.exists(path):
-            raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
-        ext = os.path.splitext(path)[1].lower()
-        if ext == ".csv":
-            df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
-        elif ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
-        elif ext in [".txt", ".tsv"]:
-            df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
-        elif ext == ".json":
-            df = pd.read_json(path)
-        elif ext == ".parquet":
-            df = pd.read_parquet(path)
-        elif ext == ".feather":
-            df = pd.read_feather(path)
-        else:
-            raise ValueError(f"Formato no soportado / Unsupported format: {ext}")
-        return DescriptiveStats(df)
     # ============= MÉTODOS UNIVARIADOS =============
@@ -1262,3 +1208,4 @@ class LinearRegressionResult:
             plt.ylabel("Residuos")
             plt.title("Residuos vs Predicciones")
             plt.show()

statslibx/inferential.py CHANGED Viewed

@@ -69,8 +69,6 @@ class InferentialStats:
     """
     def __init__(self, data: Union[pd.DataFrame, np.ndarray],
-                backend: Literal['pandas', 'polars'] = 'pandas',
-                sep: str = None, decimal: str = None, thousand: str = None,
                 lang: Literal['es-ES', 'en-US'] = 'es-ES'):
         """
         Initialize DataFrame
@@ -79,17 +77,16 @@ class InferentialStats:
         -----------
         data : DataFrame o ndarray
             Data to analyze
-        backend : str
-            'pandas' or 'polars' for processing
         """
-        if isinstance(data, str) and os.path.exists(data):
-                data = InferentialStats.from_file(data).data
-        if isinstance(data, pl.DataFrame):
+        if isinstance(data, pd.DataFrame):
+            self.data = data
+        elif isinstance(data, np.ndarray):
+            self.data = pd.DataFrame(data)
+        else:
             raise TypeError(
-                "Polars aún no soportado. Use pandas.DataFrame."
-            )
+                "Data must be a pandas.DataFrame or numpy.ndarray."
+            )
         if isinstance(data, np.ndarray):
             if data.ndim == 1:
@@ -98,48 +95,9 @@ class InferentialStats:
                 data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
         self.data = data
-        self.backend = backend
         self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
-        self.sep = sep
-        self.decimal = decimal
-        self.thousand = thousand
         self.lang = lang
-    @classmethod
-    def from_file(path: str):
-        """
-        Carga automática de archivos y devuelve instancia de Intelligence.
-        Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
-        """
-        if not os.path.exists(path):
-            raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
-        ext = os.path.splitext(path)[1].lower()
-        if ext == ".csv":
-            df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
-        elif ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
-        elif ext in [".txt", ".tsv"]:
-            df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
-        elif ext == ".json":
-            df = pd.read_json(path)
-        elif ext == ".parquet":
-            df = pd.read_parquet(path)
-        elif ext == ".feather":
-            df = pd.read_feather(path)
-        else:
-            raise ValueError(f"Formato no soportado: {ext}")
-        return InferentialStats(df)
     # ============= INTERVALOS DE CONFIANZA =============
     def confidence_interval(self, column: str, confidence: float = 0.95,

{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: statslibx
-Version: 0.2.0
+Version: 0.2.1
 Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
 Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
 License: MIT
@@ -16,7 +16,6 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 Requires-Dist: pandas>=1.5
-Requires-Dist: polars>=0.20
 Provides-Extra: viz
 Requires-Dist: seaborn>=0.11; extra == "viz"
 Requires-Dist: plotly>=5.0; extra == "viz"

{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/RECORD RENAMED Viewed

@@ -1,19 +1,20 @@
-statslibx/__init__.py,sha256=YUKUQhO1vUYvcUQmlz1ZtvU6MWNZERdAG55-trf25ZY,1500
+statslibx/__init__.py,sha256=82KG6z_wJZf_ZF8jpViRvtzn4qV9uEZd8a3sRUucKLE,1500
 statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
 statslibx/computacional.py,sha256=z46bRUiH9a3ajxVTYE2sGO-pg20L87MdOKM3Y_Tcq44,4062
-statslibx/descriptive.py,sha256=GrUR4QfstUeLTXdxKSZsmKaOJkDso-QH51hlwTUaubA,63513
-statslibx/inferential.py,sha256=xiJCppezhWK4TrAARdOufuxjZcoGKsfHtRujKfuXbgg,83068
+statslibx/descriptive.py,sha256=QLIzPB-pEC2BXCIUsjpDyU7peHAs6fRduPukj1gA160,61671
+statslibx/inferential.py,sha256=_mUzX-Uo2Y55zVTZbQnIRloqKcHjh40djLW1J12HQPU,81617
 statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
 statslibx/utils.py,sha256=gWXduW8LMN1q4ZwNggmodRsT9Rcsot-S82NsQiqrjUo,69992
-statslibx/datasets/__init__.py,sha256=wiSp4qGwpILCiaN5vVuwWgKnbdELpbi5pxnNB9Wg2nI,7282
+statslibx/datasets/__init__.py,sha256=KI1N2ByjWpmr9F9_1CDDHEnZ-kDJEKmZON7_4E6Jf_4,7322
 statslibx/datasets/course_completion.csv,sha256=jaqyxAh4YCsYuH5OFsjvGV7KUyM_7vQt6LgnqnNAFsI,22422135
 statslibx/datasets/iris.csv,sha256=xSdC5QMVqZ-Vajg_rt91dVUmdfZAnvD5pHB23QhHmTA,3858
 statslibx/datasets/penguins.csv,sha256=4HY2vYr3QmAJnqL4Z44uq7813vV5lAzHb2cGHuFsBsE,13478
 statslibx/datasets/sp500_companies.csv,sha256=WKS72YOGnAbyLR6kD95fOpIYZt5oXGjPryyFVqLRF_k,803820
 statslibx/datasets/titanic.csv,sha256=5seOS8ybyBMBCCWhgKZrsbu06m_OWyKtD9l0YXOImXU,29474
 statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
-statslibx-0.2.0.dist-info/METADATA,sha256=w7f-3RgizY3PHUSxoBl6YuHImHz2qFyillhZk82WUfE,2993
-statslibx-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-statslibx-0.2.0.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
-statslibx-0.2.0.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
-statslibx-0.2.0.dist-info/RECORD,,
+tests/test1.py,sha256=zGaLe9cKLCLrgNbjo-WeDGIjdH4bODtm1_juOn96Mtk,306
+statslibx-0.2.1.dist-info/METADATA,sha256=mNVj_Qo9pROrznPaOkCvWBH7ypw_0j0p9WdCWHgFt5o,2964
+statslibx-0.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+statslibx-0.2.1.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
+statslibx-0.2.1.dist-info/top_level.txt,sha256=Mz7hCT3d_WEbs8d6hWac4m3fkI4RlxUkXnHYt967KG8,16
+statslibx-0.2.1.dist-info/RECORD,,

{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/top_level.txt RENAMED Viewed

	@@ -1 +1,2 @@
1 1	statslibx
2	+ tests

tests/test1.py ADDED Viewed

@@ -0,0 +1,14 @@
+from statslibx import load_dataset, DescriptiveStats, InferentialStats
+import pandas as pd
+# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
+df = load_dataset(r"tests\bank (1).csv", sep=";")
+stats = DescriptiveStats(df)
+print(stats.data)
+infer = InferentialStats(df)
+print(infer.data)

{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{statslibx-0.2.0.dist-info → statslibx-0.2.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

statslibx 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

statslibx 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl