PyPI - statslibx - Versions diffs - 0.1.8__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

statslibx 0.1.8py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

statslibx/__init__.py +5 -5
statslibx/computacional.py +126 -2
statslibx/datasets/__init__.py +29 -12
statslibx/datasets/course_completion.csv +100001 -0
statslibx/datasets/iris.csv +151 -0
statslibx/datasets/penguins.csv +345 -0
statslibx/datasets/sp500_companies.csv +504 -0
statslibx/datasets/titanic.csv +419 -0
statslibx/descriptive.py +83 -70
statslibx/inferential.py +65 -46
statslibx/utils.py +71 -13
{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/METADATA +5 -6
statslibx-0.2.1.dist-info/RECORD +20 -0
{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/WHEEL +1 -1
{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/top_level.txt +1 -0
tests/test1.py +14 -0
statslibx/probability.py +0 -2
statslibx-0.1.8.dist-info/RECORD +0 -15
{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/entry_points.txt +0 -0

statslibx/inferential.py CHANGED Viewed

@@ -8,12 +8,67 @@ from scipy import stats
 import os
 class InferentialStats:
-    """
-    Class for inferential statistics (hypothesis tests, confidence intervals, etc.)
+    """
+    InferentialStats
+    A class for performing inferential statistical analysis, including hypothesis tests, confidence intervals,
+    normality tests, and more. This class supports operations on pandas DataFrame or numpy arrays.
+    Attributes:
+    -----------
+    data : pd.DataFrame
+        The dataset to analyze.
+        The backend used for processing ('pandas' or 'polars').
+    sep : str
+        Separator for reading files.
+    decimal : str
+        Decimal separator for reading files.
+    thousand : str
+        Thousand separator for reading files.
+    lang : str
+        Language for help and error messages ('es-ES' or 'en-US').
+    Methods:
+    --------
+    from_file(path: str):
+        Load data from a file and return an instance of InferentialStats.
+    confidence_interval(column: str, confidence: float = 0.95, statistic: Literal['mean', 'median', 'proportion'] = 'mean') -> tuple:
+        Calculate confidence intervals for mean, median, or proportion.
+    t_test_1sample(column: str, popmean: float = None, popmedian: float = None, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
+        Perform a one-sample t-test or Wilcoxon signed-rank test for median.
+    t_test_2sample(column1: str, column2: str, equal_var: bool = True, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
+        Perform a two-sample independent t-test.
+    t_test_paired(column1: str, column2: str, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
+        Perform a paired t-test for dependent samples.
+    mann_whitney_test(column1: str, column2: str, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
+        Perform the Mann-Whitney U test, a non-parametric alternative to the two-sample t-test.
+    chi_square_test(column1: str, column2: str, alpha: float = 0.05) -> 'TestResult':
+        Perform a Chi-square test of independence between two categorical variables.
+    anova_oneway(column: str, groups: str, alpha: float = 0.05) -> 'TestResult':
+        Perform a one-way ANOVA test to compare means across multiple groups.
+    kruskal_wallis_test(column: str, groups: str, alpha: float = 0.05) -> 'TestResult':
+        Perform the Kruskal-Wallis test, a non-parametric alternative to one-way ANOVA.
+    normality_test(column: str, method: Literal['shapiro', 'ks', 'anderson', 'jarque_bera', 'all'] = 'shapiro', test_statistic: Literal['mean', 'median', 'mode'] = 'mean', alpha: float = 0.05) -> Union['TestResult', dict]:
+        Perform normality tests using various methods.
+    hypothesis_test(method: Literal["mean", "difference_mean", "proportion", "variance"] = "mean", column1: str = None, column2: str = None, pop_mean: float = None, pop_proportion: Union[float, Tuple[float, float]] = 0.5, alpha: float = 0.05, homoscedasticity: Literal["levene", "bartlett", "var_test"] = "levene") -> Dict[str, Any]:
+        Perform hypothesis testing for mean, difference of means, proportion, or variance.
+    variance_test(column1: str, column2: str, method: Literal['levene', 'bartlett', 'var_test'] = 'levene', center: Literal['mean', 'median', 'trimmed'] = 'median', alpha: float = 0.05) -> 'TestResult':
+        Perform a test for equality of variances between two columns.
+    help():
+        Display a detailed help guide for the InferentialStats class and its methods.
     """
     def __init__(self, data: Union[pd.DataFrame, np.ndarray],
-                backend: Literal['pandas', 'polars'] = 'pandas',
                 lang: Literal['es-ES', 'en-US'] = 'es-ES'):
         """
         Initialize DataFrame
@@ -22,17 +77,16 @@ class InferentialStats:
         -----------
         data : DataFrame o ndarray
             Data to analyze
-        backend : str
-            'pandas' or 'polars' for processing
         """
-        if isinstance(data, str) and os.path.exists(data):
-                data = InferentialStats.from_file(data).data
-        if isinstance(data, pl.DataFrame):
+        if isinstance(data, pd.DataFrame):
+            self.data = data
+        elif isinstance(data, np.ndarray):
+            self.data = pd.DataFrame(data)
+        else:
             raise TypeError(
-                "Polars aún no soportado. Use pandas.DataFrame."
-            )
+                "Data must be a pandas.DataFrame or numpy.ndarray."
+            )
         if isinstance(data, np.ndarray):
             if data.ndim == 1:
@@ -41,44 +95,9 @@ class InferentialStats:
                 data = pd.DataFrame(data, columns=[f'var_{i}' for i in range(data.shape[1])])
         self.data = data
-        self.backend = backend
         self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
         self.lang = lang
-    @classmethod
-    def from_file(path: str):
-        """
-        Carga automática de archivos y devuelve instancia de Intelligence.
-        Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
-        """
-        if not os.path.exists(path):
-            raise FileNotFoundError(f"Archivo no encontrado: {path}")
-        ext = os.path.splitext(path)[1].lower()
-        if ext == ".csv":
-            df = pd.read_csv(path)
-        elif ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(path)
-        elif ext in [".txt", ".tsv"]:
-            df = pd.read_table(path)
-        elif ext == ".json":
-            df = pd.read_json(path)
-        elif ext == ".parquet":
-            df = pd.read_parquet(path)
-        elif ext == ".feather":
-            df = pd.read_feather(path)
-        else:
-            raise ValueError(f"Formato no soportado: {ext}")
-        return InferentialStats(df)
     # ============= INTERVALOS DE CONFIANZA =============
     def confidence_interval(self, column: str, confidence: float = 0.95,

statslibx/utils.py CHANGED Viewed

@@ -11,23 +11,81 @@ from pathlib import Path
 class UtilsStats:
     """
-    Clase utilitaria para operaciones estadísticas comunes y visualización
+    UtilsStats
+    A utility class for common statistical operations and visualization.
+    This class provides methods for data validation, basic statistical analysis,
+    and visualization of results. It also supports loading data directly from files.
+    >>> # Load data from a file
+    >>> data = utils.load_data("data.csv")
+    >>> utils.check_normality(data, column='age')
+    >>> # Analyze data from an array
+    Methods:
+    --------
+    _setup_plotting_style():
+        Configures default plotting styles for matplotlib.
-    Esta clase proporciona métodos para validación de datos, análisis estadísticos
-    básicos y visualización de resultados. Ahora con soporte para leer archivos directamente.
+    set_plot_backend(backend: Literal['matplotlib', 'seaborn', 'plotly']):
+        Sets the default visualization backend.
-    Examples:
-    ---------
-    >>> utils = UtilsStats()
-    >>> # Desde archivo
-    >>> data = utils.load_data("datos.csv")
-    >>> utils.check_normality(data, column='edad')
-    >>> # Desde array
-    >>> data = np.random.normal(0, 1, 100)
-    >>> utils.check_normality(data)
-    >>> utils.plot_distribution(data)
+    set_default_figsize(figsize: Tuple[int, int]):
+        Sets the default figure size for plots.
+    set_save_fig_options(save_fig: Optional[bool] = False, fig_format: str = 'png',
+                         fig_dpi: int = 300, figures_dir: str = 'figures'):
+        Configures options for saving figures.
+    load_data(path: Union[str, Path], **kwargs) -> pd.DataFrame:
+        Loads data from a file in various formats (CSV, Excel, JSON, etc.).
+    validate_dataframe(data: Union[pd.DataFrame, np.ndarray, list, str, Path]) -> pd.DataFrame:
+        Validates and converts data to a DataFrame. Also accepts file paths.
+    format_number(num: float, decimals: int = 6, scientific: bool = False) -> str:
+        Formats a number with specified decimal places.
+    check_normality(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
+                    column: Optional[str] = None, alpha: float = 0.05) -> dict:
+        Checks if the data follows a normal distribution using the Shapiro-Wilk test.
+    calculate_confidence_intervals(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
+                                    column: Optional[str] = None, confidence_level: float = 0.95,
+        Calculates confidence intervals for the mean using parametric or bootstrap methods.
+    detect_outliers(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
+                    column: Optional[str] = None, method: Literal['iqr', 'zscore', 'isolation_forest'] = 'iqr',
+        Detects outliers using different methods: 'iqr', 'zscore', or 'isolation_forest'.
+    calculate_effect_size(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path] = None,
+        Calculates the effect size between two groups using Cohen's d or Hedges' g.
+    plot_distribution(data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
+                      column: Optional[str] = None, plot_type: Literal['hist', 'kde', 'box', 'violin', 'all'] = 'hist',
+                      bins: int = 30, figsize: Optional[Tuple[int, int]] = None,
+                      save_fig: Optional[bool] = False, filename: Optional[str] = None, **kwargs):
+        Plots the distribution of a variable using various plot types and backends.
+    plot_correlation_matrix(data: Union[pd.DataFrame, str, Path],
+                            filename: Optional[str] = None, **kwargs):
+        Visualizes the correlation matrix using a heatmap.
+    plot_scatter_matrix(data: Union[pd.DataFrame, str, Path],
+                        filename: Optional[str] = None, **kwargs):
+        Creates a scatter matrix (pairplot) for visualizing relationships between variables.
+    plot_distribution_with_ci(data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
+                              column: Optional[str] = None, confidence_level: float = 0.95,
+                              ci_method: str = 'parametric', bins: int = 30,
+                              filename: Optional[str] = None, **kwargs) -> plt.Figure:
+        Plots the distribution of a variable with confidence intervals.
+    get_descriptive_stats(data, column=None) -> dict:
+        Returns a dictionary of descriptive statistics for the given data.
+    help():
+        Displays a complete help guide for the UtilsStats class.
     """
     def __init__(self):
         """Inicializar la clase utilitaria"""
         self._plot_backend = 'seaborn'

{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: statslibx
-Version: 0.1.8
-Summary: StatsLibx - Librería de estadística descriptiva e inferencial
+Version: 0.2.1
+Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
 Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
 License: MIT
 Classifier: Development Status :: 3 - Alpha
@@ -16,7 +16,6 @@ Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 Requires-Dist: pandas>=1.5
-Requires-Dist: polars>=0.20
 Provides-Extra: viz
 Requires-Dist: seaborn>=0.11; extra == "viz"
 Requires-Dist: plotly>=5.0; extra == "viz"
@@ -28,9 +27,9 @@ Requires-Dist: statsmodels>=0.13; extra == "advanced"
 StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
-Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva e inferencial** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
+Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
-GitHub del Proyecto: [text](https://github.com/GhostAnalyst30/StatsLibX)
+GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
 ## ✨ Características principales
@@ -55,7 +54,7 @@ stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
 stats.summary()
 ```
-Para ver mas funciones: [text](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
+Para ver mas funciones: [https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
 ##  📦 Instalación
 ```bash

statslibx-0.2.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+statslibx/__init__.py,sha256=82KG6z_wJZf_ZF8jpViRvtzn4qV9uEZd8a3sRUucKLE,1500
+statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
+statslibx/computacional.py,sha256=z46bRUiH9a3ajxVTYE2sGO-pg20L87MdOKM3Y_Tcq44,4062
+statslibx/descriptive.py,sha256=QLIzPB-pEC2BXCIUsjpDyU7peHAs6fRduPukj1gA160,61671
+statslibx/inferential.py,sha256=_mUzX-Uo2Y55zVTZbQnIRloqKcHjh40djLW1J12HQPU,81617
+statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
+statslibx/utils.py,sha256=gWXduW8LMN1q4ZwNggmodRsT9Rcsot-S82NsQiqrjUo,69992
+statslibx/datasets/__init__.py,sha256=KI1N2ByjWpmr9F9_1CDDHEnZ-kDJEKmZON7_4E6Jf_4,7322
+statslibx/datasets/course_completion.csv,sha256=jaqyxAh4YCsYuH5OFsjvGV7KUyM_7vQt6LgnqnNAFsI,22422135
+statslibx/datasets/iris.csv,sha256=xSdC5QMVqZ-Vajg_rt91dVUmdfZAnvD5pHB23QhHmTA,3858
+statslibx/datasets/penguins.csv,sha256=4HY2vYr3QmAJnqL4Z44uq7813vV5lAzHb2cGHuFsBsE,13478
+statslibx/datasets/sp500_companies.csv,sha256=WKS72YOGnAbyLR6kD95fOpIYZt5oXGjPryyFVqLRF_k,803820
+statslibx/datasets/titanic.csv,sha256=5seOS8ybyBMBCCWhgKZrsbu06m_OWyKtD9l0YXOImXU,29474
+statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
+tests/test1.py,sha256=zGaLe9cKLCLrgNbjo-WeDGIjdH4bODtm1_juOn96Mtk,306
+statslibx-0.2.1.dist-info/METADATA,sha256=mNVj_Qo9pROrznPaOkCvWBH7ypw_0j0p9WdCWHgFt5o,2964
+statslibx-0.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+statslibx-0.2.1.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
+statslibx-0.2.1.dist-info/top_level.txt,sha256=Mz7hCT3d_WEbs8d6hWac4m3fkI4RlxUkXnHYt967KG8,16
+statslibx-0.2.1.dist-info/RECORD,,

{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/top_level.txt RENAMED Viewed

	@@ -1 +1,2 @@
1 1	statslibx
2	+ tests

tests/test1.py ADDED Viewed

@@ -0,0 +1,14 @@
+from statslibx import load_dataset, DescriptiveStats, InferentialStats
+import pandas as pd
+# df = pd.read_csv(r"tests\bank (1).csv", sep=";")
+df = load_dataset(r"tests\bank (1).csv", sep=";")
+stats = DescriptiveStats(df)
+print(stats.data)
+infer = InferentialStats(df)
+print(infer.data)

statslibx/probability.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- class ProbabilityStats:
2	- pass

statslibx-0.1.8.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-statslibx/__init__.py,sha256=KeEoEZVPUR_PZACWoCpS_2l6luPbEee7VRlcrLgbKQQ,1490
-statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
-statslibx/computacional.py,sha256=Nv8wk67RUuuv15oBRu2XPp0_k7O4ZgmT51vThH2OuFk,35
-statslibx/descriptive.py,sha256=r5D4reP1Cdzsu1tSLmf2OEaFAkGvHSd3FIYfUclEaRU,60178
-statslibx/inferential.py,sha256=H0R6g3dJFk-53m1bKldrXObgk0SSmpcdqQg_tIgRKBI,79169
-statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
-statslibx/probability.py,sha256=MUME4eXWzbdU93F-QdKwmmyd9IgZK1flFUYQHitp10o,33
-statslibx/utils.py,sha256=iJzt0jDacaoUfjtp4dU2PFuIBEheMP9Qrq-HnLTW_Qw,66515
-statslibx/datasets/__init__.py,sha256=GuUl_7-d6YanuDFht1dwB1bFrqjShvKh1m-iRYAbYZE,6875
-statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
-statslibx-0.1.8.dist-info/METADATA,sha256=uyhAd0xghADIfVee7WzDp76nLA2snjqQcNayio_UrIc,2835
-statslibx-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-statslibx-0.1.8.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
-statslibx-0.1.8.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
-statslibx-0.1.8.dist-info/RECORD,,

{statslibx-0.1.8.dist-info → statslibx-0.2.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

statslibx 0.1.8__py3-none-any.whl → 0.2.1__py3-none-any.whl

statslibx 0.1.8py3-none-any.whl → 0.2.1py3-none-any.whl