statslibx 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +12 -8
- statslibx/computacional.py +2 -0
- statslibx/datasets/__init__.py +227 -54
- statslibx/descriptive.py +8 -9
- statslibx/inferential.py +746 -307
- statslibx/preprocessing/__init__.py +12 -5
- statslibx/probability.py +2 -0
- statslibx/utils.py +112 -150
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/METADATA +17 -3
- statslibx-0.1.8.dist-info/RECORD +15 -0
- statslibx/datasets/course_completion.csv +0 -100001
- statslibx/datasets/iris.csv +0 -151
- statslibx/datasets/penguins.csv +0 -345
- statslibx/datasets/sp500_companies.csv +0 -504
- statslibx/datasets/titanic.csv +0 -419
- statslibx-0.1.7.dist-info/RECORD +0 -18
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/WHEEL +0 -0
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/entry_points.txt +0 -0
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/top_level.txt +0 -0
|
@@ -169,26 +169,33 @@ class Preprocessing:
|
|
|
169
169
|
column: str,
|
|
170
170
|
method: str = "iqr"
|
|
171
171
|
) -> pd.DataFrame:
|
|
172
|
-
|
|
173
172
|
if self._is_pandas():
|
|
174
173
|
series = self.data[column]
|
|
175
174
|
else:
|
|
176
175
|
series = self.data[column].to_pandas()
|
|
177
176
|
|
|
177
|
+
# 2. Calcular la máscara según el método
|
|
178
178
|
if method == "iqr":
|
|
179
179
|
q1 = series.quantile(0.25)
|
|
180
180
|
q3 = series.quantile(0.75)
|
|
181
181
|
iqr = q3 - q1
|
|
182
|
-
|
|
182
|
+
mask_values = (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
|
|
183
183
|
|
|
184
184
|
elif method == "zscore":
|
|
185
185
|
z = (series - series.mean()) / series.std()
|
|
186
|
-
|
|
187
|
-
|
|
186
|
+
mask_values = z.abs() > 3
|
|
188
187
|
else:
|
|
189
188
|
raise ValueError("method must be 'iqr' or 'zscore'")
|
|
190
189
|
|
|
191
|
-
|
|
190
|
+
outliers = self.data[mask_values.values]
|
|
191
|
+
|
|
192
|
+
# 4. Manejo de retorno profesional
|
|
193
|
+
if len(outliers) == 0:
|
|
194
|
+
print(f"No outliers found in column '{column}'")
|
|
195
|
+
return outliers
|
|
196
|
+
|
|
197
|
+
return outliers
|
|
198
|
+
|
|
192
199
|
|
|
193
200
|
# ------------------------------------------------------------------
|
|
194
201
|
# Data Quality Report
|
statslibx/probability.py
ADDED
statslibx/utils.py
CHANGED
|
@@ -398,11 +398,35 @@ class UtilsStats:
|
|
|
398
398
|
|
|
399
399
|
return outliers
|
|
400
400
|
|
|
401
|
-
def calculate_effect_size(self,
|
|
402
|
-
|
|
401
|
+
def calculate_effect_size(self,
|
|
402
|
+
data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path] = None,
|
|
403
|
+
group1: Union[str, pd.Series, np.ndarray] = None,
|
|
404
|
+
group2: Union[str, pd.Series, np.ndarray] = None,
|
|
405
|
+
method: Literal['cohen', 'hedges'] = 'cohen') -> dict:
|
|
403
406
|
"""
|
|
404
407
|
Calcula el tamaño del efecto entre dos grupos
|
|
405
408
|
"""
|
|
409
|
+
|
|
410
|
+
# --- Preparar arrays ---
|
|
411
|
+
# Caso 1: data es DataFrame y group1/group2 son nombres de columna
|
|
412
|
+
if isinstance(data, pd.DataFrame):
|
|
413
|
+
group1 = np.array(data[group1])
|
|
414
|
+
group2 = np.array(data[group2])
|
|
415
|
+
# Caso 2: data no es None, y es una serie o array, usarlo como group1
|
|
416
|
+
elif isinstance(data, (pd.Series, np.ndarray)) and group2 is not None:
|
|
417
|
+
group1 = np.array(data)
|
|
418
|
+
group2 = np.array(group2)
|
|
419
|
+
# Caso 3: group1 y group2 ya son arrays o Series
|
|
420
|
+
else:
|
|
421
|
+
group1 = np.array(group1)
|
|
422
|
+
group2 = np.array(group2)
|
|
423
|
+
|
|
424
|
+
# Eliminar nan automáticamente
|
|
425
|
+
group1 = group1[~np.isnan(group1)]
|
|
426
|
+
group2 = group2[~np.isnan(group2)]
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# --- Calcular estadísticas ---
|
|
406
430
|
mean1, mean2 = np.mean(group1), np.mean(group2)
|
|
407
431
|
std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
|
|
408
432
|
n1, n2 = len(group1), len(group2)
|
|
@@ -434,6 +458,7 @@ class UtilsStats:
|
|
|
434
458
|
'pooled_std': pooled_std
|
|
435
459
|
}
|
|
436
460
|
|
|
461
|
+
|
|
437
462
|
# ============= MÉTODOS DE VISUALIZACIÓN COMPLETOS =============
|
|
438
463
|
|
|
439
464
|
def _plot_distribution_seaborn(self, data, plot_type, bins, figsize, title, **kwargs):
|
|
@@ -528,6 +553,47 @@ class UtilsStats:
|
|
|
528
553
|
plt.tight_layout()
|
|
529
554
|
|
|
530
555
|
return fig
|
|
556
|
+
|
|
557
|
+
def _plot_distribution_plotly(self, data, plot_type, bins, title, **kwargs):
|
|
558
|
+
"""Implementación con plotly"""
|
|
559
|
+
try:
|
|
560
|
+
import plotly.graph_objects as go
|
|
561
|
+
import plotly.express as px
|
|
562
|
+
from plotly.subplots import make_subplots
|
|
563
|
+
except ImportError:
|
|
564
|
+
raise ImportError("Plotly no está instalado. Instale con: pip install plotly")
|
|
565
|
+
|
|
566
|
+
if plot_type == 'all':
|
|
567
|
+
fig = make_subplots(
|
|
568
|
+
rows=2, cols=2,
|
|
569
|
+
subplot_titles=('Histograma', 'Box Plot', 'Violin Plot', 'Distribución Acumulada')
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Histograma
|
|
573
|
+
fig.add_trace(go.Histogram(x=data, nbinsx=bins, name='Histograma'), row=1, col=1)
|
|
574
|
+
|
|
575
|
+
# Box plot
|
|
576
|
+
fig.add_trace(go.Box(y=data, name='Box Plot'), row=1, col=2)
|
|
577
|
+
|
|
578
|
+
# Violin plot
|
|
579
|
+
fig.add_trace(go.Violin(y=data, name='Violin Plot'), row=2, col=1)
|
|
580
|
+
|
|
581
|
+
# Distribución acumulada
|
|
582
|
+
hist, bin_edges = np.histogram(data, bins=bins, density=True)
|
|
583
|
+
cdf = np.cumsum(hist * np.diff(bin_edges))
|
|
584
|
+
fig.add_trace(go.Scatter(x=bin_edges[1:], y=cdf, name='CDF'), row=2, col=2)
|
|
585
|
+
|
|
586
|
+
else:
|
|
587
|
+
if plot_type == 'hist':
|
|
588
|
+
fig = px.histogram(data, nbins=bins, title=title)
|
|
589
|
+
elif plot_type == 'box':
|
|
590
|
+
fig = px.box(y=data, title=title)
|
|
591
|
+
elif plot_type == 'violin':
|
|
592
|
+
fig = px.violin(y=data, title=title, box=True)
|
|
593
|
+
else:
|
|
594
|
+
fig = px.histogram(data, nbins=bins, title=title)
|
|
595
|
+
|
|
596
|
+
return fig
|
|
531
597
|
|
|
532
598
|
def plot_distribution(self,
|
|
533
599
|
data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
|
|
@@ -536,7 +602,7 @@ class UtilsStats:
|
|
|
536
602
|
backend: Optional[Literal['matplotlib', 'seaborn', 'plotly']] = "seaborn",
|
|
537
603
|
bins: int = 30,
|
|
538
604
|
figsize: Optional[Tuple[int, int]] = None,
|
|
539
|
-
save_fig: Optional[bool] =
|
|
605
|
+
save_fig: Optional[bool] = False,
|
|
540
606
|
filename: Optional[str] = None,
|
|
541
607
|
**kwargs):
|
|
542
608
|
"""
|
|
@@ -568,7 +634,7 @@ class UtilsStats:
|
|
|
568
634
|
"""
|
|
569
635
|
backend = backend or self._plot_backend
|
|
570
636
|
figsize = figsize or self._default_figsize
|
|
571
|
-
|
|
637
|
+
self._save_fig = save_fig
|
|
572
638
|
|
|
573
639
|
# Resolver datos
|
|
574
640
|
data, source = self._resolve_data(data, column)
|
|
@@ -605,59 +671,20 @@ class UtilsStats:
|
|
|
605
671
|
if save_fig and backend != 'plotly':
|
|
606
672
|
self._save_figure(fig, filename)
|
|
607
673
|
|
|
608
|
-
|
|
674
|
+
if backend == 'plotly':
|
|
675
|
+
return fig
|
|
609
676
|
|
|
610
677
|
except Exception as e:
|
|
611
678
|
print(f"Error en plot_distribution: {e}")
|
|
612
679
|
raise
|
|
613
680
|
|
|
614
|
-
def _plot_distribution_plotly(self, data, plot_type, bins, title, **kwargs):
|
|
615
|
-
"""Implementación con plotly"""
|
|
616
|
-
try:
|
|
617
|
-
import plotly.graph_objects as go
|
|
618
|
-
import plotly.express as px
|
|
619
|
-
from plotly.subplots import make_subplots
|
|
620
|
-
except ImportError:
|
|
621
|
-
raise ImportError("Plotly no está instalado. Instale con: pip install plotly")
|
|
622
|
-
|
|
623
|
-
if plot_type == 'all':
|
|
624
|
-
fig = make_subplots(
|
|
625
|
-
rows=2, cols=2,
|
|
626
|
-
subplot_titles=('Histograma', 'Box Plot', 'Violin Plot', 'Distribución Acumulada')
|
|
627
|
-
)
|
|
628
|
-
|
|
629
|
-
# Histograma
|
|
630
|
-
fig.add_trace(go.Histogram(x=data, nbinsx=bins, name='Histograma'), row=1, col=1)
|
|
631
|
-
|
|
632
|
-
# Box plot
|
|
633
|
-
fig.add_trace(go.Box(y=data, name='Box Plot'), row=1, col=2)
|
|
634
|
-
|
|
635
|
-
# Violin plot
|
|
636
|
-
fig.add_trace(go.Violin(y=data, name='Violin Plot'), row=2, col=1)
|
|
637
|
-
|
|
638
|
-
# Distribución acumulada
|
|
639
|
-
hist, bin_edges = np.histogram(data, bins=bins, density=True)
|
|
640
|
-
cdf = np.cumsum(hist * np.diff(bin_edges))
|
|
641
|
-
fig.add_trace(go.Scatter(x=bin_edges[1:], y=cdf, name='CDF'), row=2, col=2)
|
|
642
|
-
|
|
643
|
-
else:
|
|
644
|
-
if plot_type == 'hist':
|
|
645
|
-
fig = px.histogram(data, nbins=bins, title=title)
|
|
646
|
-
elif plot_type == 'box':
|
|
647
|
-
fig = px.box(y=data, title=title)
|
|
648
|
-
elif plot_type == 'violin':
|
|
649
|
-
fig = px.violin(y=data, title=title, box=True)
|
|
650
|
-
else:
|
|
651
|
-
fig = px.histogram(data, nbins=bins, title=title)
|
|
652
|
-
|
|
653
|
-
return fig
|
|
654
|
-
|
|
655
681
|
def plot_correlation_matrix(self,
|
|
656
682
|
data: Union[pd.DataFrame, str, Path],
|
|
657
|
-
method:
|
|
658
|
-
backend: Optional[Literal['seaborn', 'plotly']] =
|
|
683
|
+
method: Literal['pearson', 'kendall', 'spearman'] = 'pearson',
|
|
684
|
+
backend: Optional[Literal['seaborn', 'plotly']] = "seaborn",
|
|
685
|
+
triangular: Optional[bool] = False,
|
|
659
686
|
figsize: Optional[Tuple[int, int]] = None,
|
|
660
|
-
save_fig: Optional[bool] =
|
|
687
|
+
save_fig: Optional[bool] = False,
|
|
661
688
|
filename: Optional[str] = None,
|
|
662
689
|
**kwargs):
|
|
663
690
|
"""
|
|
@@ -674,25 +701,32 @@ class UtilsStats:
|
|
|
674
701
|
"""
|
|
675
702
|
backend = backend or self._plot_backend
|
|
676
703
|
figsize = figsize or self._default_figsize
|
|
677
|
-
save_fig = save_fig
|
|
704
|
+
self.save_fig = save_fig
|
|
678
705
|
filename = filename or "matriz_correlacion"
|
|
679
|
-
|
|
706
|
+
|
|
680
707
|
# Resolver datos
|
|
681
708
|
data, source = self._resolve_data(data)
|
|
682
709
|
|
|
683
710
|
if not isinstance(data, pd.DataFrame):
|
|
684
711
|
raise ValueError("Se requiere un DataFrame para calcular matriz de correlación")
|
|
712
|
+
else:
|
|
713
|
+
data = data.select_dtypes(include=['float64', 'int64'])
|
|
685
714
|
|
|
686
715
|
# Calcular matriz de correlación
|
|
687
716
|
corr_matrix = data.corr(method=method)
|
|
688
717
|
|
|
689
718
|
if backend == 'seaborn':
|
|
690
719
|
fig, ax = plt.subplots(figsize=figsize)
|
|
691
|
-
|
|
720
|
+
if triangular:
|
|
721
|
+
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
|
|
692
722
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
723
|
+
sns.heatmap(corr_matrix, mask=mask, annot=True, fmt='.2f',
|
|
724
|
+
cmap='coolwarm', center=0, ax=ax,
|
|
725
|
+
square=True, linewidths=0.5, **kwargs)
|
|
726
|
+
else:
|
|
727
|
+
sns.heatmap(corr_matrix, annot=True, fmt='.2f',
|
|
728
|
+
cmap='coolwarm', center=0, ax=ax,
|
|
729
|
+
square=True, linewidths=0.5, **kwargs)
|
|
696
730
|
ax.set_title(f'Matriz de Correlación ({method})', fontsize=14, pad=20)
|
|
697
731
|
plt.tight_layout()
|
|
698
732
|
|
|
@@ -731,15 +765,15 @@ class UtilsStats:
|
|
|
731
765
|
print(f"✓ Figura Plotly guardada: {filepath}")
|
|
732
766
|
except Exception as e:
|
|
733
767
|
print(f"✗ Error guardando figura Plotly: {e}")
|
|
734
|
-
|
|
735
|
-
|
|
768
|
+
if backend == 'plotly':
|
|
769
|
+
return fig
|
|
736
770
|
|
|
737
771
|
def plot_scatter_matrix(self,
|
|
738
772
|
data: Union[pd.DataFrame, str, Path],
|
|
739
773
|
columns: Optional[List[str]] = None,
|
|
740
774
|
backend: Optional[Literal['seaborn', 'plotly', 'pandas']] = None,
|
|
741
775
|
figsize: Optional[Tuple[int, int]] = None,
|
|
742
|
-
save_fig: Optional[bool] =
|
|
776
|
+
save_fig: Optional[bool] = False,
|
|
743
777
|
filename: Optional[str] = None,
|
|
744
778
|
**kwargs):
|
|
745
779
|
"""
|
|
@@ -752,7 +786,7 @@ class UtilsStats:
|
|
|
752
786
|
"""
|
|
753
787
|
backend = backend or self._plot_backend
|
|
754
788
|
figsize = figsize or self._default_figsize
|
|
755
|
-
save_fig = save_fig
|
|
789
|
+
self.save_fig = save_fig
|
|
756
790
|
filename = filename or "scatter_matrix"
|
|
757
791
|
|
|
758
792
|
# Resolver datos
|
|
@@ -791,7 +825,8 @@ class UtilsStats:
|
|
|
791
825
|
except Exception as e:
|
|
792
826
|
print(f"✗ Error guardando figura Plotly: {e}")
|
|
793
827
|
|
|
794
|
-
|
|
828
|
+
if backend == 'plotly':
|
|
829
|
+
return fig
|
|
795
830
|
|
|
796
831
|
# ============= GRÁFICOS CON INTERVALOS DE CONFIANZA =============
|
|
797
832
|
|
|
@@ -802,7 +837,7 @@ class UtilsStats:
|
|
|
802
837
|
ci_method: str = 'parametric',
|
|
803
838
|
bins: int = 30,
|
|
804
839
|
figsize: Optional[Tuple[int, int]] = None,
|
|
805
|
-
save_fig: Optional[bool] =
|
|
840
|
+
save_fig: Optional[bool] = False,
|
|
806
841
|
filename: Optional[str] = None,
|
|
807
842
|
**kwargs) -> plt.Figure:
|
|
808
843
|
"""
|
|
@@ -838,7 +873,7 @@ class UtilsStats:
|
|
|
838
873
|
x_range = np.linspace(data_array.min(), data_array.max(), 300)
|
|
839
874
|
|
|
840
875
|
# ======= FIGURA =======
|
|
841
|
-
fig, (ax1, ax2) = plt.subplots(
|
|
876
|
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize or (14, 6))
|
|
842
877
|
|
|
843
878
|
# ============================================================
|
|
844
879
|
# PANEL 1: HISTOGRAMA + KDE
|
|
@@ -903,109 +938,37 @@ class UtilsStats:
|
|
|
903
938
|
plt.tight_layout()
|
|
904
939
|
|
|
905
940
|
# Guardado opcional
|
|
906
|
-
save_fig = save_fig
|
|
941
|
+
self.save_fig = save_fig
|
|
907
942
|
if save_fig:
|
|
908
943
|
self._save_figure(fig, filename)
|
|
909
944
|
|
|
910
|
-
return fig
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
def plot_multiple_distributions_with_ci(self,
|
|
914
|
-
data_dict: dict,
|
|
915
|
-
confidence_level: float = 0.95,
|
|
916
|
-
figsize: Optional[Tuple[int, int]] = None,
|
|
917
|
-
save_fig: Optional[bool] = None,
|
|
918
|
-
filename: Optional[str] = None,
|
|
919
|
-
**kwargs) -> plt.Figure:
|
|
920
|
-
"""
|
|
921
|
-
Grafica múltiples distribuciones con sus intervalos de confianza
|
|
922
|
-
"""
|
|
923
|
-
n_distributions = len(data_dict)
|
|
924
|
-
fig, axes = plt.subplots(n_distributions, 2,
|
|
925
|
-
figsize=figsize or (14, 5 * n_distributions))
|
|
926
|
-
|
|
927
|
-
if n_distributions == 1:
|
|
928
|
-
axes = axes.reshape(1, -1)
|
|
929
|
-
|
|
930
|
-
colors = plt.cm.Set3(np.linspace(0, 1, n_distributions))
|
|
931
|
-
|
|
932
|
-
for idx, (name, data) in enumerate(data_dict.items()):
|
|
933
|
-
ax1, ax2 = axes[idx]
|
|
934
|
-
|
|
935
|
-
if isinstance(data, pd.Series):
|
|
936
|
-
data_array = data.dropna().values
|
|
937
|
-
else:
|
|
938
|
-
data_array = np.array(data)
|
|
939
|
-
data_array = data_array[~np.isnan(data_array)]
|
|
940
|
-
|
|
941
|
-
# Calcular estadísticas
|
|
942
|
-
ci_result = self.calculate_confidence_intervals(data_array, confidence_level=confidence_level)
|
|
943
|
-
|
|
944
|
-
# Gráfica izquierda: Distribución básica
|
|
945
|
-
ax1.hist(data_array, bins=30, alpha=0.7, color=colors[idx],
|
|
946
|
-
edgecolor='black', density=True)
|
|
947
|
-
|
|
948
|
-
kde = stats.gaussian_kde(data_array)
|
|
949
|
-
x_range = np.linspace(data_array.min(), data_array.max(), 200)
|
|
950
|
-
ax1.plot(x_range, kde(x_range), 'k-', linewidth=2)
|
|
951
|
-
ax1.axvline(ci_result['mean'], color='red', linestyle='--', linewidth=2)
|
|
952
|
-
|
|
953
|
-
ax1.set_title(f'{name}\nMedia: {ci_result["mean"]:.2f}')
|
|
954
|
-
ax1.grid(True, alpha=0.3)
|
|
955
|
-
|
|
956
|
-
# Gráfica derecha: Con intervalos de confianza
|
|
957
|
-
ax2.hist(data_array, bins=30, alpha=0.7, color=colors[idx],
|
|
958
|
-
edgecolor='black', density=True)
|
|
959
|
-
ax2.plot(x_range, kde(x_range), 'k-', linewidth=2)
|
|
960
|
-
|
|
961
|
-
ax2.axvline(ci_result['mean'], color='red', linestyle='-', linewidth=3)
|
|
962
|
-
ax2.axvspan(ci_result['ci_lower'], ci_result['ci_upper'],
|
|
963
|
-
alpha=0.3, color='orange')
|
|
964
|
-
ax2.axvline(ci_result['ci_lower'], color='orange', linestyle='--', linewidth=2)
|
|
965
|
-
ax2.axvline(ci_result['ci_upper'], color='orange', linestyle='--', linewidth=2)
|
|
966
|
-
|
|
967
|
-
ax2.set_title(f'{name} con IC {confidence_level*100}%')
|
|
968
|
-
ax2.grid(True, alpha=0.3)
|
|
969
|
-
|
|
970
|
-
plt.tight_layout()
|
|
971
|
-
|
|
972
|
-
# Guardar figura si está activado
|
|
973
|
-
save_fig = save_fig if save_fig is not None else self._save_fig
|
|
974
|
-
if save_fig:
|
|
975
|
-
filename = filename or "multiples_distribuciones_ci"
|
|
976
|
-
self._save_figure(fig, filename)
|
|
977
|
-
|
|
978
|
-
return fig
|
|
979
945
|
|
|
980
946
|
# ============= MÉTODOS UTILITARIOS ADICIONALES =============
|
|
981
947
|
|
|
982
|
-
def get_descriptive_stats(self,
|
|
983
|
-
|
|
984
|
-
column: Optional[str] = None) -> dict:
|
|
985
|
-
"""
|
|
986
|
-
Obtiene estadísticas descriptivas completas
|
|
987
|
-
|
|
988
|
-
Ahora acepta rutas de archivos
|
|
989
|
-
"""
|
|
990
|
-
# Resolver datos
|
|
991
|
-
data, source = self._resolve_data(data, column)
|
|
992
|
-
|
|
948
|
+
def get_descriptive_stats(self, data, column=None):
|
|
949
|
+
|
|
993
950
|
if isinstance(data, pd.DataFrame):
|
|
994
951
|
if column is None:
|
|
995
|
-
raise ValueError("Debe
|
|
952
|
+
raise ValueError("Debe especificarse una columna")
|
|
996
953
|
data_series = data[column]
|
|
997
|
-
elif isinstance(data, pd.Series):
|
|
998
|
-
data_series = data
|
|
999
954
|
else:
|
|
1000
955
|
data_series = pd.Series(data)
|
|
1001
|
-
|
|
956
|
+
|
|
1002
957
|
data_clean = data_series.dropna()
|
|
1003
|
-
|
|
958
|
+
|
|
959
|
+
if len(data_clean) == 0:
|
|
960
|
+
return {k: np.nan for k in [
|
|
961
|
+
'count','mean','median','mode','std','variance',
|
|
962
|
+
'min','max','q1','q3','iqr','skewness','kurtosis','range'
|
|
963
|
+
]}
|
|
964
|
+
|
|
965
|
+
mode_result = stats.mode(data_clean, keepdims=False)
|
|
966
|
+
|
|
1004
967
|
return {
|
|
1005
968
|
'count': len(data_clean),
|
|
1006
969
|
'mean': np.mean(data_clean),
|
|
1007
970
|
'median': np.median(data_clean),
|
|
1008
|
-
'mode':
|
|
971
|
+
'mode': mode_result.mode,
|
|
1009
972
|
'std': np.std(data_clean, ddof=1),
|
|
1010
973
|
'variance': np.var(data_clean, ddof=1),
|
|
1011
974
|
'min': np.min(data_clean),
|
|
@@ -1017,7 +980,6 @@ class UtilsStats:
|
|
|
1017
980
|
'kurtosis': stats.kurtosis(data_clean),
|
|
1018
981
|
'range': np.max(data_clean) - np.min(data_clean)
|
|
1019
982
|
}
|
|
1020
|
-
|
|
1021
983
|
def help(self):
|
|
1022
984
|
"""
|
|
1023
985
|
Muestra ayuda completa de la clase DescriptiveStats
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: statslibx
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: StatsLibx - Librería de estadística descriptiva e inferencial
|
|
5
5
|
Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -24,12 +24,14 @@ Provides-Extra: advanced
|
|
|
24
24
|
Requires-Dist: scikit-learn>=1.0; extra == "advanced"
|
|
25
25
|
Requires-Dist: statsmodels>=0.13; extra == "advanced"
|
|
26
26
|
|
|
27
|
-
# 📦
|
|
27
|
+
# 📦 StatsLibX
|
|
28
28
|
|
|
29
29
|
StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
|
|
30
30
|
|
|
31
31
|
Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva e inferencial** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
|
|
32
32
|
|
|
33
|
+
GitHub del Proyecto: [text](https://github.com/GhostAnalyst30/StatsLibX)
|
|
34
|
+
|
|
33
35
|
## ✨ Características principales
|
|
34
36
|
|
|
35
37
|
- ⚡ Rápido y eficiente: optimizado para ofrecer un rendimiento suave incluso en tareas exigentes.
|
|
@@ -45,16 +47,28 @@ Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y
|
|
|
45
47
|
## 🚀 Ejemplo rápido
|
|
46
48
|
```python
|
|
47
49
|
from statslibx import DescriptiveStats, InferentialStats, UtilsStats
|
|
50
|
+
from statslibx.datasets import load_iris()
|
|
51
|
+
|
|
52
|
+
data = load_iris()
|
|
48
53
|
|
|
49
54
|
stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
|
|
50
|
-
|
|
55
|
+
|
|
56
|
+
stats.summary()
|
|
51
57
|
```
|
|
58
|
+
Para ver mas funciones: [text](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
|
|
52
59
|
|
|
53
60
|
## 📦 Instalación
|
|
54
61
|
```bash
|
|
55
62
|
pip install statslibx
|
|
56
63
|
```
|
|
57
64
|
|
|
65
|
+
## 👩💻 ¡Usalo en la terminal! (De forma preliminar)
|
|
66
|
+
```bash
|
|
67
|
+
statslibx describe .\archive.csv # Devuelve una descripcion de la data
|
|
68
|
+
statslibx quality .\archive.csv # Devuelve la calidad de los datos
|
|
69
|
+
statslibx preview .\archive.csv # Devuelve una visualizacion de los datos
|
|
70
|
+
```
|
|
71
|
+
|
|
58
72
|
🤝 Contribuciones
|
|
59
73
|
|
|
60
74
|
¡Todas las mejoras e ideas son bienvenidas!
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
statslibx/__init__.py,sha256=KeEoEZVPUR_PZACWoCpS_2l6luPbEee7VRlcrLgbKQQ,1490
|
|
2
|
+
statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
|
|
3
|
+
statslibx/computacional.py,sha256=Nv8wk67RUuuv15oBRu2XPp0_k7O4ZgmT51vThH2OuFk,35
|
|
4
|
+
statslibx/descriptive.py,sha256=r5D4reP1Cdzsu1tSLmf2OEaFAkGvHSd3FIYfUclEaRU,60178
|
|
5
|
+
statslibx/inferential.py,sha256=H0R6g3dJFk-53m1bKldrXObgk0SSmpcdqQg_tIgRKBI,79169
|
|
6
|
+
statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
|
|
7
|
+
statslibx/probability.py,sha256=MUME4eXWzbdU93F-QdKwmmyd9IgZK1flFUYQHitp10o,33
|
|
8
|
+
statslibx/utils.py,sha256=iJzt0jDacaoUfjtp4dU2PFuIBEheMP9Qrq-HnLTW_Qw,66515
|
|
9
|
+
statslibx/datasets/__init__.py,sha256=GuUl_7-d6YanuDFht1dwB1bFrqjShvKh1m-iRYAbYZE,6875
|
|
10
|
+
statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
|
|
11
|
+
statslibx-0.1.8.dist-info/METADATA,sha256=uyhAd0xghADIfVee7WzDp76nLA2snjqQcNayio_UrIc,2835
|
|
12
|
+
statslibx-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
statslibx-0.1.8.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
|
|
14
|
+
statslibx-0.1.8.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
|
|
15
|
+
statslibx-0.1.8.dist-info/RECORD,,
|