statslibx 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
statslibx/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
1
  """
2
2
  StatsLibx - Librería de Estadística para Python
3
3
  Autor: Emmanuel Ascendra
4
- Versión: 0.1.0
4
+ Versión: 0.1.2
5
5
  """
6
6
 
7
- __version__ = "0.1.1"
7
+ __version__ = "0.1.2"
8
8
  __author__ = "Emmanuel Ascendra"
9
9
 
10
10
  # Importar las clases principales
statslibx/inferential.py CHANGED
@@ -1,7 +1,9 @@
1
+ from dataclasses import dataclass
1
2
  import numpy as np
2
3
  import pandas as pd
3
- from typing import Optional, Union, Literal, List
4
+ from typing import Optional, Union, Literal, List, Dict, Any
4
5
  from datetime import datetime
6
+ from scipy import stats
5
7
 
6
8
  class InferentialStats:
7
9
  """
@@ -447,6 +449,183 @@ class InferentialStats:
447
449
  else:
448
450
  interpretations.append(f"Se RECHAZA normalidad al {sig_level}% de significancia")
449
451
  return interpretations
452
+
453
+ def hypothesis_test(
454
+ self,
455
+ method: Literal["mean", "difference_mean", "proportion", "variance"] = "mean",
456
+ column1: str = None,
457
+ column2: str = None,
458
+ alpha: float = 0.05,
459
+ homoscedasticity: Literal["levene", "bartlett", "var_test"] = "levene"
460
+ ) -> Dict[str, Any]:
461
+
462
+ data = self.data
463
+
464
+ if column1 is None:
465
+ raise ValueError("Debes especificar 'column1'.")
466
+
467
+ x = data[column1].dropna()
468
+
469
+ if method in ["difference_mean", "variance"] and column2 is None:
470
+ raise ValueError("Para este método debes pasar 'column2'.")
471
+
472
+ y = data[column2].dropna() if column2 else None
473
+
474
+ # --- homoscedasticity test ---
475
+ homo_result = None
476
+ if method in ["difference_mean", "variance"]:
477
+ homo_result = self._homoscedasticity_test(x, y, homoscedasticity)
478
+
479
+ # --- MAIN HYPOTHESIS TESTS ---
480
+ if method == "mean":
481
+ # One-sample t-test
482
+ t_stat, p_value = stats.ttest_1samp(x, popmean=np.mean(x))
483
+ test_name = "One-sample t-test"
484
+
485
+ elif method == "difference_mean":
486
+ # Two-sample t-test
487
+ equal_var = homo_result["equal_var"]
488
+ t_stat, p_value = stats.ttest_ind(x, y, equal_var=equal_var)
489
+ test_name = "Two-sample t-test"
490
+
491
+ elif method == "proportion":
492
+ # Proportion test (z-test)
493
+ p_hat = np.mean(x)
494
+ n = len(x)
495
+ z_stat = (p_hat - 0.5) / np.sqrt(0.5 * 0.5 / n)
496
+ p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
497
+ t_stat = z_stat
498
+ test_name = "Proportion Z-test"
499
+
500
+ elif method == "variance":
501
+ # Classic F-test
502
+ var_x = np.var(x, ddof=1)
503
+ var_y = np.var(y, ddof=1)
504
+ F = var_x / var_y
505
+ dfn = len(x) - 1
506
+ dfd = len(y) - 1
507
+
508
+ p_value = 2 * min(stats.f.cdf(F, dfn, dfd), 1 - stats.f.cdf(F, dfn, dfd))
509
+ t_stat = F
510
+ test_name = "Variance F-test"
511
+
512
+ return {
513
+ "test": test_name,
514
+ "statistic": t_stat,
515
+ "p_value": p_value,
516
+ "alpha": alpha,
517
+ "reject_H0": p_value < alpha,
518
+ "homoscedasticity_test": homo_result
519
+ }
520
+
521
+ def _homoscedasticity_test(
522
+ self,
523
+ x,
524
+ y,
525
+ method: Literal["levene", "bartlett", "var_test"] = "levene"
526
+ ) -> Dict[str, Any]:
527
+
528
+ if method == "levene":
529
+ stat, p = stats.levene(x, y)
530
+ elif method == "bartlett":
531
+ stat, p = stats.bartlett(x, y)
532
+ elif method == "var_test":
533
+ # R's var.test equivalent: F-test
534
+ var_x = np.var(x, ddof=1)
535
+ var_y = np.var(y, ddof=1)
536
+ F = var_x / var_y
537
+ dfn = len(x) - 1
538
+ dfd = len(y) - 1
539
+ p = 2 * min(stats.f.cdf(F, dfn, dfd), 1 - stats.f.cdf(F, dfn, dfd))
540
+ stat = F
541
+ else:
542
+ raise ValueError("Método de homocedasticidad no válido.")
543
+
544
+ return {
545
+ "method": method,
546
+ "statistic": stat,
547
+ "p_value": p,
548
+ "equal_var": p > 0.05 # estándar
549
+ }
550
+
551
+ def variance_test(self, column1: str, column2: str,
552
+ method: Literal['levene', 'bartlett', 'var_test'] = 'levene',
553
+ center: Literal['mean', 'median', 'trimmed'] = 'median'
554
+ ) -> 'TestResult':
555
+ """
556
+ Prueba de igualdad de varianzas entre dos columnas.
557
+
558
+ Parameters:
559
+ -----------
560
+ column1, column2 : str
561
+ Columnas numéricas a comparar
562
+ method : str
563
+ 'levene' -> robusto, recomendado cuando no se asume normalidad
564
+ 'bartlett' -> muy sensible a normalidad
565
+ 'var_test' -> equivalente a var.test de R (F-test)
566
+ center : str
567
+ Método de centrado para Levene ('mean', 'median', 'trimmed')
568
+
569
+ Returns:
570
+ --------
571
+ TestResult
572
+ """
573
+ from scipy import stats
574
+
575
+ data1 = self.data[column1].dropna().values
576
+ data2 = self.data[column2].dropna().values
577
+
578
+ if method == 'levene':
579
+ statistic, pvalue = stats.levene(data1, data2, center=center)
580
+ test_name = f'Test de Levene (center={center})'
581
+ params = {
582
+ 'var1': data1.var(ddof=1),
583
+ 'var2': data2.var(ddof=1),
584
+ 'n1': len(data1), 'n2': len(data2)
585
+ }
586
+
587
+ elif method == 'bartlett':
588
+ statistic, pvalue = stats.bartlett(data1, data2)
589
+ test_name = 'Test de Bartlett'
590
+ params = {
591
+ 'var1': data1.var(ddof=1),
592
+ 'var2': data2.var(ddof=1),
593
+ 'n1': len(data1), 'n2': len(data2)
594
+ }
595
+
596
+ elif method == 'var_test':
597
+ # F-test clásico de comparación de varianzas
598
+ var1 = data1.var(ddof=1)
599
+ var2 = data2.var(ddof=1)
600
+ f_stat = var1 / var2
601
+ df1 = len(data1) - 1
602
+ df2 = len(data2) - 1
603
+
604
+ # p-valor bilateral
605
+ pvalue = 2 * min(
606
+ stats.f.cdf(f_stat, df1, df2),
607
+ 1 - stats.f.cdf(f_stat, df1, df2)
608
+ )
609
+
610
+ statistic = f_stat
611
+ test_name = 'F-test de Varianzas (var.test estilo R)'
612
+ params = {
613
+ 'var1': var1, 'var2': var2,
614
+ 'ratio': f_stat,
615
+ 'df1': df1, 'df2': df2
616
+ }
617
+
618
+ else:
619
+ raise ValueError(f"Método '{method}' no válido. Usa levene, bartlett o var_test.")
620
+
621
+ return TestResult(
622
+ test_name=test_name,
623
+ statistic=statistic,
624
+ pvalue=pvalue,
625
+ alternative='two-sided',
626
+ params=params
627
+ )
628
+
450
629
 
451
630
  def help(self):
452
631
  """
@@ -514,6 +693,12 @@ class InferentialStats:
514
693
  • .mann_whitney_test(column1, column2, alternative='two-sided')
515
694
  Alternativa no paramétrica al t-test de dos muestras
516
695
 
696
+ 🔹 Pruebas Extras:
697
+ • .hypothesis_test(method='mean', column1=None, column2=None,
698
+ alpha=0.05, homoscedasticity='levene')
699
+ • .variance_test(column1, column2, method='levene', center='median')
700
+
701
+
517
702
  ┌────────────────────────────────────────────────────────────────────────────┐
518
703
  │ 4. 🧪 PRUEBAS PARA MÚLTIPLES GRUPOS │
519
704
  └────────────────────────────────────────────────────────────────────────────┘
@@ -737,7 +922,8 @@ class InferentialStats:
737
922
  ╚════════════════════════════════════════════════════════════════════════════╝
738
923
  """
739
924
  print(help_text)
740
-
925
+
926
+ @dataclass
741
927
  class TestResult:
742
928
  """Clase para resultados de pruebas de hipótesis"""
743
929
 
statslibx/utils.py CHANGED
@@ -602,20 +602,18 @@ class UtilsStats:
602
602
 
603
603
  # ============= GRÁFICOS CON INTERVALOS DE CONFIANZA =============
604
604
 
605
- def plot_distribution_with_ci(self,
606
- data: Union[pd.DataFrame, pd.Series, np.ndarray],
607
- column: Optional[str] = None,
608
- confidence_level: float = 0.95,
609
- ci_method: str = 'parametric',
610
- bins: int = 30,
611
- figsize: Optional[Tuple[int, int]] = None,
612
- save_fig: Optional[bool] = None,
613
- filename: Optional[str] = None,
614
- **kwargs) -> plt.Figure:
615
- """
616
- Grafica la distribución junto con intervalos de confianza
617
- """
618
- # Extraer y limpiar datos
605
+ def plot_distribution_with_ci(self,
606
+ data: Union[pd.DataFrame, pd.Series, np.ndarray],
607
+ column: Optional[str] = None,
608
+ confidence_level: float = 0.95,
609
+ ci_method: str = 'parametric',
610
+ bins: int = 30,
611
+ figsize: Optional[Tuple[int, int]] = None,
612
+ save_fig: Optional[bool] = None,
613
+ filename: Optional[str] = None,
614
+ **kwargs) -> plt.Figure:
615
+
616
+ # ======= PREPARACIÓN =======
619
617
  if isinstance(data, pd.DataFrame):
620
618
  if column is None:
621
619
  raise ValueError("Debe especificar 'column' cuando data es DataFrame")
@@ -627,98 +625,91 @@ class UtilsStats:
627
625
  else:
628
626
  plot_data = pd.Series(data).dropna()
629
627
  data_name = 'Variable'
630
-
628
+
631
629
  data_array = plot_data.values
632
- default_filename = f"distribucion_ci_{data_name.lower().replace(' ', '_')}"
633
- filename = filename or default_filename
634
-
635
- # Calcular estadísticas e intervalos de confianza
630
+ filename = filename or f"distribucion_ci_{data_name.lower().replace(' ', '_')}"
631
+
632
+ # Estadísticas
636
633
  ci_result = self.calculate_confidence_intervals(data_array, confidence_level, ci_method)
637
634
  normality_result = self.check_normality(data_array)
638
-
639
- # Crear figura con dos subgráficas
640
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize or (14, 6))
641
-
642
- # ===== PRIMERA GRÁFICA: Distribución básica =====
643
- n, bins, patches = ax1.hist(data_array, bins=bins, alpha=0.7,
644
- color='skyblue', edgecolor='black',
645
- density=True, label='Histograma')
646
-
635
+
647
636
  # KDE
648
637
  kde = stats.gaussian_kde(data_array)
649
- x_range = np.linspace(data_array.min(), data_array.max(), 200)
638
+ x_range = np.linspace(data_array.min(), data_array.max(), 300)
639
+
640
+ # ======= FIGURA =======
641
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize or (14, 6))
642
+
643
+ # ============================================================
644
+ # PANEL 1: HISTOGRAMA + KDE
645
+ # ============================================================
646
+ ax1.hist(data_array, bins=bins, density=True,
647
+ color='skyblue', edgecolor='black', alpha=0.7)
648
+
650
649
  ax1.plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE')
651
-
652
- # Línea vertical en la media
653
- ax1.axvline(ci_result['mean'], color='red', linestyle='--',
654
- linewidth=2, label=f'Media: {ci_result["mean"]:.2f}')
655
-
656
- ax1.set_xlabel('Valores')
657
- ax1.set_ylabel('Densidad')
658
- ax1.set_title(f'Distribución de {data_name}\n'
659
- f'Media: {ci_result["mean"]:.2f}, '
660
- f'Desv. Est.: {ci_result["std"]:.2f}')
650
+
651
+ ax1.axvline(ci_result['mean'], color='red', linestyle='--', linewidth=2,
652
+ label=f"Media: {ci_result['mean']:.2f}")
653
+
654
+ ax1.set_title(f"Distribución de {data_name}")
655
+ ax1.set_xlabel("Valores")
656
+ ax1.set_ylabel("Densidad")
661
657
  ax1.legend()
662
- ax1.grid(True, alpha=0.3)
663
-
664
- # ===== SEGUNDA GRÁFICA: Distribución con intervalos de confianza =====
665
- n, bins, patches = ax2.hist(data_array, bins=bins, alpha=0.7,
666
- color='lightgreen', edgecolor='black',
667
- density=True, label='Histograma')
668
-
669
- # KDE
658
+ ax1.grid(alpha=0.3)
659
+
660
+ # ============================================================
661
+ # PANEL 2: KDE + INTERVALO DE CONFIANZA
662
+ # ============================================================
663
+
664
+ # KDE pura
670
665
  ax2.plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE')
671
-
672
- # Media y intervalos de confianza
673
- ax2.axvline(ci_result['mean'], color='red', linestyle='-',
674
- linewidth=3, label=f'Media: {ci_result["mean"]:.2f}')
675
-
676
- # Intervalo de confianza
677
- ax2.axvspan(ci_result['ci_lower'], ci_result['ci_upper'],
678
- alpha=0.3, color='orange',
679
- label=f'IC {confidence_level*100}%: [{ci_result["ci_lower"]:.2f}, {ci_result["ci_upper"]:.2f}]')
680
-
681
- # Líneas para los límites del IC
682
- ax2.axvline(ci_result['ci_lower'], color='orange', linestyle='--', linewidth=2)
683
- ax2.axvline(ci_result['ci_upper'], color='orange', linestyle='--', linewidth=2)
684
-
685
- # Distribución normal teórica (si los datos son normales)
686
- if normality_result['is_normal']:
687
- normal_x = np.linspace(data_array.min(), data_array.max(), 200)
688
- normal_y = stats.norm.pdf(normal_x, ci_result['mean'], ci_result['std'])
689
- ax2.plot(normal_x, normal_y, 'g--', linewidth=2, alpha=0.7,
690
- label='Distribución Normal Teórica')
691
-
692
- ax2.set_xlabel('Valores')
693
- ax2.set_ylabel('Densidad')
694
- ax2.set_title(f'Distribución con Intervalos de Confianza\n'
695
- f'Método: {ci_method}, n={ci_result["n"]}')
666
+
667
+ # Intervalo de Confianza
668
+ ax2.axvspan(ci_result["ci_lower"], ci_result["ci_upper"],
669
+ color='orange', alpha=0.3,
670
+ label=f"IC {confidence_level*100:.0f}%")
671
+
672
+ # Media
673
+ ax2.axvline(ci_result["mean"], color='red', linewidth=2)
674
+
675
+ # Distribución normal teórica (si aplica)
676
+ if normality_result["is_normal"]:
677
+ normal_y = stats.norm.pdf(x_range, ci_result['mean'], ci_result['std'])
678
+ ax2.plot(x_range, normal_y, 'g--', linewidth=2, alpha=0.7,
679
+ label="Normal Teórica")
680
+
681
+ ax2.set_title(f"IC con método '{ci_method}'")
682
+ ax2.set_xlabel("Valores")
683
+ ax2.set_ylabel("Densidad")
696
684
  ax2.legend()
697
- ax2.grid(True, alpha=0.3)
698
-
699
- # Información adicional como texto
700
- info_text = (f'Estadísticas:\n'
701
- f'• Media: {ci_result["mean"]:.3f}\n'
702
- f'Desv. Est.: {ci_result["std"]:.3f}\n'
703
- f'n: {ci_result["n"]}\n'
704
- f'IC {confidence_level*100}%: [{ci_result["ci_lower"]:.3f}, {ci_result["ci_upper"]:.3f}]\n'
705
- f'Margen Error: ±{ci_result["margin_error"]:.3f}\n'
706
- f'Normalidad: {normality_result["interpretation"]}\n'
707
- f'p-value: {normality_result["shapiro_pvalue"]:.4f}')
708
-
709
- fig.text(0.02, 0.02, info_text, fontsize=9,
710
- bbox=dict(boxstyle="round,pad=0.5", facecolor="lightgray", alpha=0.7),
711
- verticalalignment='bottom')
712
-
685
+ ax2.grid(alpha=0.3)
686
+
687
+ # ======= CUADRO DE INFO =======
688
+ info = (
689
+ f"Estadísticas de {data_name}:\n"
690
+ f"n = {ci_result['n']}\n"
691
+ f"Media = {ci_result['mean']:.3f}\n"
692
+ f"Desv. Est. = {ci_result['std']:.3f}\n"
693
+ f"IC {confidence_level*100:.0f}% = [{ci_result['ci_lower']:.3f}, {ci_result['ci_upper']:.3f}]\n"
694
+ f"Margen Error = ±{ci_result['margin_error']:.3f}\n"
695
+ f"Normalidad = {normality_result['interpretation']}\n"
696
+ f"• p-value Shapiro = {normality_result['shapiro_pvalue']:.4f}"
697
+ )
698
+
699
+ fig.text(0.01, 0.01, info, fontsize=9,
700
+ bbox=dict(facecolor='lightgray', alpha=0.6),
701
+ va='bottom')
702
+
713
703
  plt.tight_layout()
714
-
715
- # Guardar figura si está activado
704
+
705
+ # Guardado opcional
716
706
  save_fig = save_fig if save_fig is not None else self._save_fig
717
707
  if save_fig:
718
708
  self._save_figure(fig, filename)
719
-
709
+
720
710
  return fig
721
711
 
712
+
722
713
  def plot_multiple_distributions_with_ci(self,
723
714
  data_dict: dict,
724
715
  confidence_level: float = 0.95,
@@ -877,7 +868,8 @@ class UtilsStats:
877
868
  backend: 'matplotlib', 'seaborn', 'plotly'
878
869
 
879
870
  • .plot_distribution_with_ci(data, column=None, confidence_level=0.95,
880
- ci_method='parametric', bins=30)
871
+ ci_method='parametric', bins=30, figsize=None,
872
+ save_fig=None, filename=None)
881
873
 
882
874
  Distribución con intervalos de confianza visualizados
883
875
 
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Librería de estadística descriptiva e inferencial para Python
5
- Home-page: https://github.com/Immanuel3008/StatsLibrary
5
+ Home-page: https://github.com/Immanuel3008/StatsLibX
6
6
  Author: Emmanuel Ascendra Perez
7
7
  Author-email: ascendraemmanuel@gmail.com
8
8
  Classifier: Development Status :: 3 - Alpha
@@ -43,4 +43,35 @@ Dynamic: requires-dist
43
43
  Dynamic: requires-python
44
44
  Dynamic: summary
45
45
 
46
- Librería de estadística descriptiva e inferencial para Python
46
+ 📦 Descripción para PyPI (Plantilla Profesional)
47
+
48
+ StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
49
+
50
+ Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la estadistica descriptiva e inferencial sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
51
+
52
+ ✨ Características principales
53
+
54
+ ⚡ Rápido y eficiente: optimizado para ofrecer un rendimiento suave incluso en tareas exigentes.
55
+
56
+ 🧩 Fácil de usar: una API limpia para que empieces en segundos.
57
+
58
+ 🔧 Altamente extensible: personalízalo según tus necesidades.
59
+
60
+ 📚 Documentación clara: ejemplos simples y prácticos.
61
+
62
+ 🔮 Diseñado con visión a futuro: construido para escalar y adaptarse.
63
+
64
+ 🚀 Ejemplo rápido
65
+ from statslibx import DescriptiveStats, InferentialStats, UtilsStats
66
+
67
+ stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
68
+ stats.help()
69
+
70
+ 📦 Instalación
71
+ pip install statslibx
72
+
73
+ 🤝 Contribuciones
74
+
75
+ ¡Todas las mejoras e ideas son bienvenidas!
76
+
77
+ E-mail: ascendraemmanuel@gmail.com
@@ -0,0 +1,8 @@
1
+ statslibx/__init__.py,sha256=TLTlwOvXPila3LVTloogrIMsy1G6cJ4wb051YSYXNhE,1117
2
+ statslibx/descriptive.py,sha256=Hu7VuOGXs6oOq-zxQNiBKg7UtkNdNQ1Qy3PP-wEO5_k,36971
3
+ statslibx/inferential.py,sha256=BVBxEdLnNCw2yC-3s5fZ84oeJ8LqJYR_IJquPEiyiOk,48234
4
+ statslibx/utils.py,sha256=tdf1yZuR4fsmNq24ygv69BgCLzB0iE_x0ki1IV7Iwxs,60693
5
+ statslibx-0.1.2.dist-info/METADATA,sha256=vz1-UMNdrew0WyDciZbu96uoXhw9uPngFHZqKSW-X70,2887
6
+ statslibx-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ statslibx-0.1.2.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
8
+ statslibx-0.1.2.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- statslibx/__init__.py,sha256=p1AydN8u_zWgm1rJBJb8TVQkRUzQMA3iNDwfFZn8k00,1117
2
- statslibx/descriptive.py,sha256=Hu7VuOGXs6oOq-zxQNiBKg7UtkNdNQ1Qy3PP-wEO5_k,36971
3
- statslibx/inferential.py,sha256=slLh32Ny4doLA0EA8pYRUGQSuMI8oBUCMBu-CTX-7FY,41732
4
- statslibx/utils.py,sha256=vnfs5LmWEKsB9p8Fs2Di3btReepkB1RYAwQfT-eZs6c,61856
5
- statslibx-0.1.1.dist-info/METADATA,sha256=ujX3UiJWx5ibgNv7OaJevK5YYeIaxudPkKzlFWBChO0,1737
6
- statslibx-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- statslibx-0.1.1.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
8
- statslibx-0.1.1.dist-info/RECORD,,