PyPI - statslibx - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

statslibx 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

statslibx/__init__.py +2 -2
statslibx/inferential.py +188 -2
statslibx/utils.py +84 -92
{statslibx-0.1.1.dist-info → statslibx-0.1.2.dist-info}/METADATA +34 -3
statslibx-0.1.2.dist-info/RECORD +8 -0
statslibx-0.1.1.dist-info/RECORD +0 -8
{statslibx-0.1.1.dist-info → statslibx-0.1.2.dist-info}/WHEEL +0 -0
{statslibx-0.1.1.dist-info → statslibx-0.1.2.dist-info}/top_level.txt +0 -0

statslibx/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """
 StatsLibx - Librería de Estadística para Python
 Autor: Emmanuel Ascendra
-Versión: 0.1.0
+Versión: 0.1.2
 """
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 __author__ = "Emmanuel Ascendra"
 # Importar las clases principales

statslibx/inferential.py CHANGED Viewed

@@ -1,7 +1,9 @@
+from dataclasses import dataclass
 import numpy as np
 import pandas as pd
-from typing import Optional, Union, Literal, List
+from typing import Optional, Union, Literal, List, Dict, Any
 from datetime import datetime
+from scipy import stats
 class InferentialStats:
     """
@@ -447,6 +449,183 @@ class InferentialStats:
             else:
                 interpretations.append(f"Se RECHAZA normalidad al {sig_level}% de significancia")
         return interpretations
+    def hypothesis_test(
+            self,
+            method: Literal["mean", "difference_mean", "proportion", "variance"] = "mean",
+            column1: str = None,
+            column2: str = None,
+            alpha: float = 0.05,
+            homoscedasticity: Literal["levene", "bartlett", "var_test"] = "levene"
+        ) -> Dict[str, Any]:
+            data = self.data
+            if column1 is None:
+                raise ValueError("Debes especificar 'column1'.")
+            x = data[column1].dropna()
+            if method in ["difference_mean", "variance"] and column2 is None:
+                raise ValueError("Para este método debes pasar 'column2'.")
+            y = data[column2].dropna() if column2 else None
+            # --- homoscedasticity test ---
+            homo_result = None
+            if method in ["difference_mean", "variance"]:
+                homo_result = self._homoscedasticity_test(x, y, homoscedasticity)
+            # --- MAIN HYPOTHESIS TESTS ---
+            if method == "mean":
+                # One-sample t-test
+                t_stat, p_value = stats.ttest_1samp(x, popmean=np.mean(x))
+                test_name = "One-sample t-test"
+            elif method == "difference_mean":
+                # Two-sample t-test
+                equal_var = homo_result["equal_var"]
+                t_stat, p_value = stats.ttest_ind(x, y, equal_var=equal_var)
+                test_name = "Two-sample t-test"
+            elif method == "proportion":
+                # Proportion test (z-test)
+                p_hat = np.mean(x)
+                n = len(x)
+                z_stat = (p_hat - 0.5) / np.sqrt(0.5 * 0.5 / n)
+                p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
+                t_stat = z_stat
+                test_name = "Proportion Z-test"
+            elif method == "variance":
+                # Classic F-test
+                var_x = np.var(x, ddof=1)
+                var_y = np.var(y, ddof=1)
+                F = var_x / var_y
+                dfn = len(x) - 1
+                dfd = len(y) - 1
+                p_value = 2 * min(stats.f.cdf(F, dfn, dfd), 1 - stats.f.cdf(F, dfn, dfd))
+                t_stat = F
+                test_name = "Variance F-test"
+            return {
+                "test": test_name,
+                "statistic": t_stat,
+                "p_value": p_value,
+                "alpha": alpha,
+                "reject_H0": p_value < alpha,
+                "homoscedasticity_test": homo_result
+            }
+    def _homoscedasticity_test(
+        self,
+        x,
+        y,
+        method: Literal["levene", "bartlett", "var_test"] = "levene"
+    ) -> Dict[str, Any]:
+        if method == "levene":
+            stat, p = stats.levene(x, y)
+        elif method == "bartlett":
+            stat, p = stats.bartlett(x, y)
+        elif method == "var_test":
+            # R's var.test equivalent: F-test
+            var_x = np.var(x, ddof=1)
+            var_y = np.var(y, ddof=1)
+            F = var_x / var_y
+            dfn = len(x) - 1
+            dfd = len(y) - 1
+            p = 2 * min(stats.f.cdf(F, dfn, dfd), 1 - stats.f.cdf(F, dfn, dfd))
+            stat = F
+        else:
+            raise ValueError("Método de homocedasticidad no válido.")
+        return {
+            "method": method,
+            "statistic": stat,
+            "p_value": p,
+            "equal_var": p > 0.05   # estándar
+        }
+    def variance_test(self, column1: str, column2: str,
+                    method: Literal['levene', 'bartlett', 'var_test'] = 'levene',
+                    center: Literal['mean', 'median', 'trimmed'] = 'median'
+                    ) -> 'TestResult':
+        """
+        Prueba de igualdad de varianzas entre dos columnas.
+        Parameters:
+        -----------
+        column1, column2 : str
+            Columnas numéricas a comparar
+        method : str
+            'levene'   -> robusto, recomendado cuando no se asume normalidad
+            'bartlett' -> muy sensible a normalidad
+            'var_test' -> equivalente a var.test de R (F-test)
+        center : str
+            Método de centrado para Levene ('mean', 'median', 'trimmed')
+        Returns:
+        --------
+        TestResult
+        """
+        from scipy import stats
+        data1 = self.data[column1].dropna().values
+        data2 = self.data[column2].dropna().values
+        if method == 'levene':
+            statistic, pvalue = stats.levene(data1, data2, center=center)
+            test_name = f'Test de Levene (center={center})'
+            params = {
+                'var1': data1.var(ddof=1),
+                'var2': data2.var(ddof=1),
+                'n1': len(data1), 'n2': len(data2)
+            }
+        elif method == 'bartlett':
+            statistic, pvalue = stats.bartlett(data1, data2)
+            test_name = 'Test de Bartlett'
+            params = {
+                'var1': data1.var(ddof=1),
+                'var2': data2.var(ddof=1),
+                'n1': len(data1), 'n2': len(data2)
+            }
+        elif method == 'var_test':
+            # F-test clásico de comparación de varianzas
+            var1 = data1.var(ddof=1)
+            var2 = data2.var(ddof=1)
+            f_stat = var1 / var2
+            df1 = len(data1) - 1
+            df2 = len(data2) - 1
+            # p-valor bilateral
+            pvalue = 2 * min(
+                stats.f.cdf(f_stat, df1, df2),
+                1 - stats.f.cdf(f_stat, df1, df2)
+            )
+            statistic = f_stat
+            test_name = 'F-test de Varianzas (var.test estilo R)'
+            params = {
+                'var1': var1, 'var2': var2,
+                'ratio': f_stat,
+                'df1': df1, 'df2': df2
+            }
+        else:
+            raise ValueError(f"Método '{method}' no válido. Usa levene, bartlett o var_test.")
+        return TestResult(
+            test_name=test_name,
+            statistic=statistic,
+            pvalue=pvalue,
+            alternative='two-sided',
+            params=params
+        )
     def help(self):
         """
@@ -514,6 +693,12 @@ class InferentialStats:
   • .mann_whitney_test(column1, column2, alternative='two-sided')
     Alternativa no paramétrica al t-test de dos muestras
+  🔹 Pruebas Extras:
+  • .hypothesis_test(method='mean', column1=None, column2=None,
+                   alpha=0.05, homoscedasticity='levene')
+  • .variance_test(column1, column2, method='levene', center='median')
 ┌────────────────────────────────────────────────────────────────────────────┐
 │ 4. 🧪 PRUEBAS PARA MÚLTIPLES GRUPOS                                        │
 └────────────────────────────────────────────────────────────────────────────┘
@@ -737,7 +922,8 @@ class InferentialStats:
 ╚════════════════════════════════════════════════════════════════════════════╝
     """
         print(help_text)
+@dataclass
 class TestResult:
     """Clase para resultados de pruebas de hipótesis"""

statslibx/utils.py CHANGED Viewed

@@ -602,20 +602,18 @@ class UtilsStats:
     # ============= GRÁFICOS CON INTERVALOS DE CONFIANZA =============
-    def plot_distribution_with_ci(self,
-                                 data: Union[pd.DataFrame, pd.Series, np.ndarray],
-                                 column: Optional[str] = None,
-                                 confidence_level: float = 0.95,
-                                 ci_method: str = 'parametric',
-                                 bins: int = 30,
-                                 figsize: Optional[Tuple[int, int]] = None,
-                                 save_fig: Optional[bool] = None,
-                                 filename: Optional[str] = None,
-                                 **kwargs) -> plt.Figure:
-        """
-        Grafica la distribución junto con intervalos de confianza
-        """
-        # Extraer y limpiar datos
+    def plot_distribution_with_ci(self,
+                               data: Union[pd.DataFrame, pd.Series, np.ndarray],
+                               column: Optional[str] = None,
+                               confidence_level: float = 0.95,
+                               ci_method: str = 'parametric',
+                               bins: int = 30,
+                               figsize: Optional[Tuple[int, int]] = None,
+                               save_fig: Optional[bool] = None,
+                               filename: Optional[str] = None,
+                               **kwargs) -> plt.Figure:
+        # ======= PREPARACIÓN =======
         if isinstance(data, pd.DataFrame):
             if column is None:
                 raise ValueError("Debe especificar 'column' cuando data es DataFrame")
@@ -627,98 +625,91 @@ class UtilsStats:
         else:
             plot_data = pd.Series(data).dropna()
             data_name = 'Variable'
         data_array = plot_data.values
-        default_filename = f"distribucion_ci_{data_name.lower().replace(' ', '_')}"
-        filename = filename or default_filename
-        # Calcular estadísticas e intervalos de confianza
+        filename = filename or f"distribucion_ci_{data_name.lower().replace(' ', '_')}"
+        # Estadísticas
         ci_result = self.calculate_confidence_intervals(data_array, confidence_level, ci_method)
         normality_result = self.check_normality(data_array)
-        # Crear figura con dos subgráficas
-        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize or (14, 6))
-        # ===== PRIMERA GRÁFICA: Distribución básica =====
-        n, bins, patches = ax1.hist(data_array, bins=bins, alpha=0.7,
-                                   color='skyblue', edgecolor='black',
-                                   density=True, label='Histograma')
         # KDE
         kde = stats.gaussian_kde(data_array)
-        x_range = np.linspace(data_array.min(), data_array.max(), 200)
+        x_range = np.linspace(data_array.min(), data_array.max(), 300)
+        # ======= FIGURA =======
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize or (14, 6))
+        # ============================================================
+        # PANEL 1: HISTOGRAMA + KDE
+        # ============================================================
+        ax1.hist(data_array, bins=bins, density=True,
+                color='skyblue', edgecolor='black', alpha=0.7)
         ax1.plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE')
-        # Línea vertical en la media
-        ax1.axvline(ci_result['mean'], color='red', linestyle='--',
-                   linewidth=2, label=f'Media: {ci_result["mean"]:.2f}')
-        ax1.set_xlabel('Valores')
-        ax1.set_ylabel('Densidad')
-        ax1.set_title(f'Distribución de {data_name}\n'
-                     f'Media: {ci_result["mean"]:.2f}, '
-                     f'Desv. Est.: {ci_result["std"]:.2f}')
+        ax1.axvline(ci_result['mean'], color='red', linestyle='--', linewidth=2,
+                    label=f"Media: {ci_result['mean']:.2f}")
+        ax1.set_title(f"Distribución de {data_name}")
+        ax1.set_xlabel("Valores")
+        ax1.set_ylabel("Densidad")
         ax1.legend()
-        ax1.grid(True, alpha=0.3)
-        # ===== SEGUNDA GRÁFICA: Distribución con intervalos de confianza =====
-        n, bins, patches = ax2.hist(data_array, bins=bins, alpha=0.7,
-                                   color='lightgreen', edgecolor='black',
-                                   density=True, label='Histograma')
-        # KDE
+        ax1.grid(alpha=0.3)
+        # ============================================================
+        # PANEL 2: KDE + INTERVALO DE CONFIANZA
+        # ============================================================
+        # KDE pura
         ax2.plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE')
-        # Media y intervalos de confianza
-        ax2.axvline(ci_result['mean'], color='red', linestyle='-',
-                   linewidth=3, label=f'Media: {ci_result["mean"]:.2f}')
-        # Intervalo de confianza
-        ax2.axvspan(ci_result['ci_lower'], ci_result['ci_upper'],
-                   alpha=0.3, color='orange',
-                   label=f'IC {confidence_level*100}%: [{ci_result["ci_lower"]:.2f}, {ci_result["ci_upper"]:.2f}]')
-        # Líneas para los límites del IC
-        ax2.axvline(ci_result['ci_lower'], color='orange', linestyle='--', linewidth=2)
-        ax2.axvline(ci_result['ci_upper'], color='orange', linestyle='--', linewidth=2)
-        # Distribución normal teórica (si los datos son normales)
-        if normality_result['is_normal']:
-            normal_x = np.linspace(data_array.min(), data_array.max(), 200)
-            normal_y = stats.norm.pdf(normal_x, ci_result['mean'], ci_result['std'])
-            ax2.plot(normal_x, normal_y, 'g--', linewidth=2, alpha=0.7,
-                    label='Distribución Normal Teórica')
-        ax2.set_xlabel('Valores')
-        ax2.set_ylabel('Densidad')
-        ax2.set_title(f'Distribución con Intervalos de Confianza\n'
-                     f'Método: {ci_method}, n={ci_result["n"]}')
+        # Intervalo de Confianza
+        ax2.axvspan(ci_result["ci_lower"], ci_result["ci_upper"],
+                    color='orange', alpha=0.3,
+                    label=f"IC {confidence_level*100:.0f}%")
+        # Media
+        ax2.axvline(ci_result["mean"], color='red', linewidth=2)
+        # Distribución normal teórica (si aplica)
+        if normality_result["is_normal"]:
+            normal_y = stats.norm.pdf(x_range, ci_result['mean'], ci_result['std'])
+            ax2.plot(x_range, normal_y, 'g--', linewidth=2, alpha=0.7,
+                    label="Normal Teórica")
+        ax2.set_title(f"IC con método '{ci_method}'")
+        ax2.set_xlabel("Valores")
+        ax2.set_ylabel("Densidad")
         ax2.legend()
-        ax2.grid(True, alpha=0.3)
-        # Información adicional como texto
-        info_text = (f'Estadísticas:\n'
-                    f'• Media: {ci_result["mean"]:.3f}\n'
-                    f'• Desv. Est.: {ci_result["std"]:.3f}\n'
-                    f'• n: {ci_result["n"]}\n'
-                    f'• IC {confidence_level*100}%: [{ci_result["ci_lower"]:.3f}, {ci_result["ci_upper"]:.3f}]\n'
-                    f'• Margen Error: ±{ci_result["margin_error"]:.3f}\n'
-                    f'• Normalidad: {normality_result["interpretation"]}\n'
-                    f'• p-value: {normality_result["shapiro_pvalue"]:.4f}')
-        fig.text(0.02, 0.02, info_text, fontsize=9,
-                bbox=dict(boxstyle="round,pad=0.5", facecolor="lightgray", alpha=0.7),
-                verticalalignment='bottom')
+        ax2.grid(alpha=0.3)
+        # ======= CUADRO DE INFO =======
+        info = (
+            f"Estadísticas de {data_name}:\n"
+            f"• n = {ci_result['n']}\n"
+            f"• Media = {ci_result['mean']:.3f}\n"
+            f"• Desv. Est. = {ci_result['std']:.3f}\n"
+            f"• IC {confidence_level*100:.0f}% = [{ci_result['ci_lower']:.3f}, {ci_result['ci_upper']:.3f}]\n"
+            f"• Margen Error = ±{ci_result['margin_error']:.3f}\n"
+            f"• Normalidad = {normality_result['interpretation']}\n"
+            f"• p-value Shapiro = {normality_result['shapiro_pvalue']:.4f}"
+        )
+        fig.text(0.01, 0.01, info, fontsize=9,
+                bbox=dict(facecolor='lightgray', alpha=0.6),
+                va='bottom')
         plt.tight_layout()
-        # Guardar figura si está activado
+        # Guardado opcional
         save_fig = save_fig if save_fig is not None else self._save_fig
         if save_fig:
             self._save_figure(fig, filename)
         return fig
     def plot_multiple_distributions_with_ci(self,
                                            data_dict: dict,
                                            confidence_level: float = 0.95,
@@ -877,7 +868,8 @@ class UtilsStats:
     backend: 'matplotlib', 'seaborn', 'plotly'
   • .plot_distribution_with_ci(data, column=None, confidence_level=0.95,
-                               ci_method='parametric', bins=30)
+                               ci_method='parametric', bins=30, figsize=None,
+                               save_fig=None, filename=None)
     Distribución con intervalos de confianza visualizados

{statslibx-0.1.1.dist-info → statslibx-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.4
 Name: statslibx
-Version: 0.1.1
+Version: 0.1.2
 Summary: Librería de estadística descriptiva e inferencial para Python
-Home-page: https://github.com/Immanuel3008/StatsLibrary
+Home-page: https://github.com/Immanuel3008/StatsLibX
 Author: Emmanuel Ascendra Perez
 Author-email: ascendraemmanuel@gmail.com
 Classifier: Development Status :: 3 - Alpha
@@ -43,4 +43,35 @@ Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary
-Librería de estadística descriptiva e inferencial para Python
+📦 Descripción para PyPI (Plantilla Profesional)
+StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
+Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la estadistica descriptiva e inferencial sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
+✨ Características principales
+⚡ Rápido y eficiente: optimizado para ofrecer un rendimiento suave incluso en tareas exigentes.
+🧩 Fácil de usar: una API limpia para que empieces en segundos.
+🔧 Altamente extensible: personalízalo según tus necesidades.
+📚 Documentación clara: ejemplos simples y prácticos.
+🔮 Diseñado con visión a futuro: construido para escalar y adaptarse.
+🚀 Ejemplo rápido
+from statslibx import DescriptiveStats, InferentialStats, UtilsStats
+stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
+stats.help()
+📦 Instalación
+pip install statslibx
+🤝 Contribuciones
+¡Todas las mejoras e ideas son bienvenidas!
+E-mail: ascendraemmanuel@gmail.com

statslibx-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+statslibx/__init__.py,sha256=TLTlwOvXPila3LVTloogrIMsy1G6cJ4wb051YSYXNhE,1117
+statslibx/descriptive.py,sha256=Hu7VuOGXs6oOq-zxQNiBKg7UtkNdNQ1Qy3PP-wEO5_k,36971
+statslibx/inferential.py,sha256=BVBxEdLnNCw2yC-3s5fZ84oeJ8LqJYR_IJquPEiyiOk,48234
+statslibx/utils.py,sha256=tdf1yZuR4fsmNq24ygv69BgCLzB0iE_x0ki1IV7Iwxs,60693
+statslibx-0.1.2.dist-info/METADATA,sha256=vz1-UMNdrew0WyDciZbu96uoXhw9uPngFHZqKSW-X70,2887
+statslibx-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+statslibx-0.1.2.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
+statslibx-0.1.2.dist-info/RECORD,,

statslibx-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-statslibx/__init__.py,sha256=p1AydN8u_zWgm1rJBJb8TVQkRUzQMA3iNDwfFZn8k00,1117
-statslibx/descriptive.py,sha256=Hu7VuOGXs6oOq-zxQNiBKg7UtkNdNQ1Qy3PP-wEO5_k,36971
-statslibx/inferential.py,sha256=slLh32Ny4doLA0EA8pYRUGQSuMI8oBUCMBu-CTX-7FY,41732
-statslibx/utils.py,sha256=vnfs5LmWEKsB9p8Fs2Di3btReepkB1RYAwQfT-eZs6c,61856
-statslibx-0.1.1.dist-info/METADATA,sha256=ujX3UiJWx5ibgNv7OaJevK5YYeIaxudPkKzlFWBChO0,1737
-statslibx-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-statslibx-0.1.1.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
-statslibx-0.1.1.dist-info/RECORD,,

{statslibx-0.1.1.dist-info → statslibx-0.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{statslibx-0.1.1.dist-info → statslibx-0.1.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

statslibx 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

statslibx 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl