statslibx 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -169,26 +169,33 @@ class Preprocessing:
169
169
  column: str,
170
170
  method: str = "iqr"
171
171
  ) -> pd.DataFrame:
172
-
173
172
  if self._is_pandas():
174
173
  series = self.data[column]
175
174
  else:
176
175
  series = self.data[column].to_pandas()
177
176
 
177
+ # 2. Calcular la máscara según el método
178
178
  if method == "iqr":
179
179
  q1 = series.quantile(0.25)
180
180
  q3 = series.quantile(0.75)
181
181
  iqr = q3 - q1
182
- mask = (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
182
+ mask_values = (series < q1 - 1.5 * iqr) | (series > q3 + 1.5 * iqr)
183
183
 
184
184
  elif method == "zscore":
185
185
  z = (series - series.mean()) / series.std()
186
- mask = z.abs() > 3
187
-
186
+ mask_values = z.abs() > 3
188
187
  else:
189
188
  raise ValueError("method must be 'iqr' or 'zscore'")
190
189
 
191
- return self.data[mask]
190
+ outliers = self.data[mask_values.values]
191
+
192
+ # 4. Manejo de retorno profesional
193
+ if len(outliers) == 0:
194
+ print(f"No outliers found in column '{column}'")
195
+ return outliers
196
+
197
+ return outliers
198
+
192
199
 
193
200
  # ------------------------------------------------------------------
194
201
  # Data Quality Report
statslibx/utils.py CHANGED
@@ -11,23 +11,81 @@ from pathlib import Path
11
11
 
12
12
  class UtilsStats:
13
13
  """
14
- Clase utilitaria para operaciones estadísticas comunes y visualización
14
+ UtilsStats
15
+ A utility class for common statistical operations and visualization.
16
+ This class provides methods for data validation, basic statistical analysis,
17
+ and visualization of results. It also supports loading data directly from files.
18
+ >>> # Load data from a file
19
+ >>> data = utils.load_data("data.csv")
20
+ >>> utils.check_normality(data, column='age')
21
+ >>> # Analyze data from an array
22
+ Methods:
23
+ --------
24
+ _setup_plotting_style():
25
+ Configures default plotting styles for matplotlib.
15
26
 
16
- Esta clase proporciona métodos para validación de datos, análisis estadísticos
17
- básicos y visualización de resultados. Ahora con soporte para leer archivos directamente.
27
+ set_plot_backend(backend: Literal['matplotlib', 'seaborn', 'plotly']):
28
+ Sets the default visualization backend.
18
29
 
19
- Examples:
20
- ---------
21
- >>> utils = UtilsStats()
22
- >>> # Desde archivo
23
- >>> data = utils.load_data("datos.csv")
24
- >>> utils.check_normality(data, column='edad')
25
- >>> # Desde array
26
- >>> data = np.random.normal(0, 1, 100)
27
- >>> utils.check_normality(data)
28
- >>> utils.plot_distribution(data)
30
+ set_default_figsize(figsize: Tuple[int, int]):
31
+ Sets the default figure size for plots.
32
+
33
+ set_save_fig_options(save_fig: Optional[bool] = False, fig_format: str = 'png',
34
+ fig_dpi: int = 300, figures_dir: str = 'figures'):
35
+ Configures options for saving figures.
36
+
37
+ load_data(path: Union[str, Path], **kwargs) -> pd.DataFrame:
38
+ Loads data from a file in various formats (CSV, Excel, JSON, etc.).
39
+
40
+ validate_dataframe(data: Union[pd.DataFrame, np.ndarray, list, str, Path]) -> pd.DataFrame:
41
+ Validates and converts data to a DataFrame. Also accepts file paths.
42
+
43
+ format_number(num: float, decimals: int = 6, scientific: bool = False) -> str:
44
+ Formats a number with specified decimal places.
45
+
46
+ check_normality(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
47
+ column: Optional[str] = None, alpha: float = 0.05) -> dict:
48
+ Checks if the data follows a normal distribution using the Shapiro-Wilk test.
49
+
50
+ calculate_confidence_intervals(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
51
+ column: Optional[str] = None, confidence_level: float = 0.95,
52
+ Calculates confidence intervals for the mean using parametric or bootstrap methods.
53
+
54
+ detect_outliers(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path],
55
+ column: Optional[str] = None, method: Literal['iqr', 'zscore', 'isolation_forest'] = 'iqr',
56
+ Detects outliers using different methods: 'iqr', 'zscore', or 'isolation_forest'.
57
+
58
+ calculate_effect_size(data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path] = None,
59
+ Calculates the effect size between two groups using Cohen's d or Hedges' g.
60
+
61
+ plot_distribution(data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
62
+ column: Optional[str] = None, plot_type: Literal['hist', 'kde', 'box', 'violin', 'all'] = 'hist',
63
+ bins: int = 30, figsize: Optional[Tuple[int, int]] = None,
64
+ save_fig: Optional[bool] = False, filename: Optional[str] = None, **kwargs):
65
+ Plots the distribution of a variable using various plot types and backends.
66
+
67
+ plot_correlation_matrix(data: Union[pd.DataFrame, str, Path],
68
+ filename: Optional[str] = None, **kwargs):
69
+ Visualizes the correlation matrix using a heatmap.
70
+
71
+ plot_scatter_matrix(data: Union[pd.DataFrame, str, Path],
72
+ filename: Optional[str] = None, **kwargs):
73
+ Creates a scatter matrix (pairplot) for visualizing relationships between variables.
74
+
75
+ plot_distribution_with_ci(data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
76
+ column: Optional[str] = None, confidence_level: float = 0.95,
77
+ ci_method: str = 'parametric', bins: int = 30,
78
+ filename: Optional[str] = None, **kwargs) -> plt.Figure:
79
+ Plots the distribution of a variable with confidence intervals.
80
+
81
+ get_descriptive_stats(data, column=None) -> dict:
82
+ Returns a dictionary of descriptive statistics for the given data.
83
+
84
+ help():
85
+ Displays a complete help guide for the UtilsStats class.
29
86
  """
30
87
 
88
+
31
89
  def __init__(self):
32
90
  """Inicializar la clase utilitaria"""
33
91
  self._plot_backend = 'seaborn'
@@ -398,11 +456,35 @@ class UtilsStats:
398
456
 
399
457
  return outliers
400
458
 
401
- def calculate_effect_size(self, group1: np.ndarray, group2: np.ndarray,
402
- method: Literal['cohen', 'hedges'] = 'cohen') -> dict:
459
+ def calculate_effect_size(self,
460
+ data: Union[pd.Series, np.ndarray, pd.DataFrame, str, Path] = None,
461
+ group1: Union[str, pd.Series, np.ndarray] = None,
462
+ group2: Union[str, pd.Series, np.ndarray] = None,
463
+ method: Literal['cohen', 'hedges'] = 'cohen') -> dict:
403
464
  """
404
465
  Calcula el tamaño del efecto entre dos grupos
405
466
  """
467
+
468
+ # --- Preparar arrays ---
469
+ # Caso 1: data es DataFrame y group1/group2 son nombres de columna
470
+ if isinstance(data, pd.DataFrame):
471
+ group1 = np.array(data[group1])
472
+ group2 = np.array(data[group2])
473
+ # Caso 2: data no es None, y es una serie o array, usarlo como group1
474
+ elif isinstance(data, (pd.Series, np.ndarray)) and group2 is not None:
475
+ group1 = np.array(data)
476
+ group2 = np.array(group2)
477
+ # Caso 3: group1 y group2 ya son arrays o Series
478
+ else:
479
+ group1 = np.array(group1)
480
+ group2 = np.array(group2)
481
+
482
+ # Eliminar nan automáticamente
483
+ group1 = group1[~np.isnan(group1)]
484
+ group2 = group2[~np.isnan(group2)]
485
+
486
+
487
+ # --- Calcular estadísticas ---
406
488
  mean1, mean2 = np.mean(group1), np.mean(group2)
407
489
  std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
408
490
  n1, n2 = len(group1), len(group2)
@@ -434,6 +516,7 @@ class UtilsStats:
434
516
  'pooled_std': pooled_std
435
517
  }
436
518
 
519
+
437
520
  # ============= MÉTODOS DE VISUALIZACIÓN COMPLETOS =============
438
521
 
439
522
  def _plot_distribution_seaborn(self, data, plot_type, bins, figsize, title, **kwargs):
@@ -528,6 +611,47 @@ class UtilsStats:
528
611
  plt.tight_layout()
529
612
 
530
613
  return fig
614
+
615
+ def _plot_distribution_plotly(self, data, plot_type, bins, title, **kwargs):
616
+ """Implementación con plotly"""
617
+ try:
618
+ import plotly.graph_objects as go
619
+ import plotly.express as px
620
+ from plotly.subplots import make_subplots
621
+ except ImportError:
622
+ raise ImportError("Plotly no está instalado. Instale con: pip install plotly")
623
+
624
+ if plot_type == 'all':
625
+ fig = make_subplots(
626
+ rows=2, cols=2,
627
+ subplot_titles=('Histograma', 'Box Plot', 'Violin Plot', 'Distribución Acumulada')
628
+ )
629
+
630
+ # Histograma
631
+ fig.add_trace(go.Histogram(x=data, nbinsx=bins, name='Histograma'), row=1, col=1)
632
+
633
+ # Box plot
634
+ fig.add_trace(go.Box(y=data, name='Box Plot'), row=1, col=2)
635
+
636
+ # Violin plot
637
+ fig.add_trace(go.Violin(y=data, name='Violin Plot'), row=2, col=1)
638
+
639
+ # Distribución acumulada
640
+ hist, bin_edges = np.histogram(data, bins=bins, density=True)
641
+ cdf = np.cumsum(hist * np.diff(bin_edges))
642
+ fig.add_trace(go.Scatter(x=bin_edges[1:], y=cdf, name='CDF'), row=2, col=2)
643
+
644
+ else:
645
+ if plot_type == 'hist':
646
+ fig = px.histogram(data, nbins=bins, title=title)
647
+ elif plot_type == 'box':
648
+ fig = px.box(y=data, title=title)
649
+ elif plot_type == 'violin':
650
+ fig = px.violin(y=data, title=title, box=True)
651
+ else:
652
+ fig = px.histogram(data, nbins=bins, title=title)
653
+
654
+ return fig
531
655
 
532
656
  def plot_distribution(self,
533
657
  data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
@@ -536,7 +660,7 @@ class UtilsStats:
536
660
  backend: Optional[Literal['matplotlib', 'seaborn', 'plotly']] = "seaborn",
537
661
  bins: int = 30,
538
662
  figsize: Optional[Tuple[int, int]] = None,
539
- save_fig: Optional[bool] = None,
663
+ save_fig: Optional[bool] = False,
540
664
  filename: Optional[str] = None,
541
665
  **kwargs):
542
666
  """
@@ -568,7 +692,7 @@ class UtilsStats:
568
692
  """
569
693
  backend = backend or self._plot_backend
570
694
  figsize = figsize or self._default_figsize
571
- save_fig = save_fig if save_fig is not None else self._save_fig
695
+ self._save_fig = save_fig
572
696
 
573
697
  # Resolver datos
574
698
  data, source = self._resolve_data(data, column)
@@ -605,59 +729,20 @@ class UtilsStats:
605
729
  if save_fig and backend != 'plotly':
606
730
  self._save_figure(fig, filename)
607
731
 
608
- return fig
732
+ if backend == 'plotly':
733
+ return fig
609
734
 
610
735
  except Exception as e:
611
736
  print(f"Error en plot_distribution: {e}")
612
737
  raise
613
738
 
614
- def _plot_distribution_plotly(self, data, plot_type, bins, title, **kwargs):
615
- """Implementación con plotly"""
616
- try:
617
- import plotly.graph_objects as go
618
- import plotly.express as px
619
- from plotly.subplots import make_subplots
620
- except ImportError:
621
- raise ImportError("Plotly no está instalado. Instale con: pip install plotly")
622
-
623
- if plot_type == 'all':
624
- fig = make_subplots(
625
- rows=2, cols=2,
626
- subplot_titles=('Histograma', 'Box Plot', 'Violin Plot', 'Distribución Acumulada')
627
- )
628
-
629
- # Histograma
630
- fig.add_trace(go.Histogram(x=data, nbinsx=bins, name='Histograma'), row=1, col=1)
631
-
632
- # Box plot
633
- fig.add_trace(go.Box(y=data, name='Box Plot'), row=1, col=2)
634
-
635
- # Violin plot
636
- fig.add_trace(go.Violin(y=data, name='Violin Plot'), row=2, col=1)
637
-
638
- # Distribución acumulada
639
- hist, bin_edges = np.histogram(data, bins=bins, density=True)
640
- cdf = np.cumsum(hist * np.diff(bin_edges))
641
- fig.add_trace(go.Scatter(x=bin_edges[1:], y=cdf, name='CDF'), row=2, col=2)
642
-
643
- else:
644
- if plot_type == 'hist':
645
- fig = px.histogram(data, nbins=bins, title=title)
646
- elif plot_type == 'box':
647
- fig = px.box(y=data, title=title)
648
- elif plot_type == 'violin':
649
- fig = px.violin(y=data, title=title, box=True)
650
- else:
651
- fig = px.histogram(data, nbins=bins, title=title)
652
-
653
- return fig
654
-
655
739
  def plot_correlation_matrix(self,
656
740
  data: Union[pd.DataFrame, str, Path],
657
- method: str = 'pearson',
658
- backend: Optional[Literal['seaborn', 'plotly']] = None,
741
+ method: Literal['pearson', 'kendall', 'spearman'] = 'pearson',
742
+ backend: Optional[Literal['seaborn', 'plotly']] = "seaborn",
743
+ triangular: Optional[bool] = False,
659
744
  figsize: Optional[Tuple[int, int]] = None,
660
- save_fig: Optional[bool] = None,
745
+ save_fig: Optional[bool] = False,
661
746
  filename: Optional[str] = None,
662
747
  **kwargs):
663
748
  """
@@ -674,25 +759,32 @@ class UtilsStats:
674
759
  """
675
760
  backend = backend or self._plot_backend
676
761
  figsize = figsize or self._default_figsize
677
- save_fig = save_fig if save_fig is not None else self._save_fig
762
+ self.save_fig = save_fig
678
763
  filename = filename or "matriz_correlacion"
679
-
764
+
680
765
  # Resolver datos
681
766
  data, source = self._resolve_data(data)
682
767
 
683
768
  if not isinstance(data, pd.DataFrame):
684
769
  raise ValueError("Se requiere un DataFrame para calcular matriz de correlación")
770
+ else:
771
+ data = data.select_dtypes(include=['float64', 'int64'])
685
772
 
686
773
  # Calcular matriz de correlación
687
774
  corr_matrix = data.corr(method=method)
688
775
 
689
776
  if backend == 'seaborn':
690
777
  fig, ax = plt.subplots(figsize=figsize)
691
- mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
778
+ if triangular:
779
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
692
780
 
693
- sns.heatmap(corr_matrix, mask=mask, annot=True, fmt='.2f',
694
- cmap='coolwarm', center=0, ax=ax,
695
- square=True, linewidths=0.5, **kwargs)
781
+ sns.heatmap(corr_matrix, mask=mask, annot=True, fmt='.2f',
782
+ cmap='coolwarm', center=0, ax=ax,
783
+ square=True, linewidths=0.5, **kwargs)
784
+ else:
785
+ sns.heatmap(corr_matrix, annot=True, fmt='.2f',
786
+ cmap='coolwarm', center=0, ax=ax,
787
+ square=True, linewidths=0.5, **kwargs)
696
788
  ax.set_title(f'Matriz de Correlación ({method})', fontsize=14, pad=20)
697
789
  plt.tight_layout()
698
790
 
@@ -731,15 +823,15 @@ class UtilsStats:
731
823
  print(f"✓ Figura Plotly guardada: {filepath}")
732
824
  except Exception as e:
733
825
  print(f"✗ Error guardando figura Plotly: {e}")
734
-
735
- return fig
826
+ if backend == 'plotly':
827
+ return fig
736
828
 
737
829
  def plot_scatter_matrix(self,
738
830
  data: Union[pd.DataFrame, str, Path],
739
831
  columns: Optional[List[str]] = None,
740
832
  backend: Optional[Literal['seaborn', 'plotly', 'pandas']] = None,
741
833
  figsize: Optional[Tuple[int, int]] = None,
742
- save_fig: Optional[bool] = None,
834
+ save_fig: Optional[bool] = False,
743
835
  filename: Optional[str] = None,
744
836
  **kwargs):
745
837
  """
@@ -752,7 +844,7 @@ class UtilsStats:
752
844
  """
753
845
  backend = backend or self._plot_backend
754
846
  figsize = figsize or self._default_figsize
755
- save_fig = save_fig if save_fig is not None else self._save_fig
847
+ self.save_fig = save_fig
756
848
  filename = filename or "scatter_matrix"
757
849
 
758
850
  # Resolver datos
@@ -791,7 +883,8 @@ class UtilsStats:
791
883
  except Exception as e:
792
884
  print(f"✗ Error guardando figura Plotly: {e}")
793
885
 
794
- return fig
886
+ if backend == 'plotly':
887
+ return fig
795
888
 
796
889
  # ============= GRÁFICOS CON INTERVALOS DE CONFIANZA =============
797
890
 
@@ -802,7 +895,7 @@ class UtilsStats:
802
895
  ci_method: str = 'parametric',
803
896
  bins: int = 30,
804
897
  figsize: Optional[Tuple[int, int]] = None,
805
- save_fig: Optional[bool] = None,
898
+ save_fig: Optional[bool] = False,
806
899
  filename: Optional[str] = None,
807
900
  **kwargs) -> plt.Figure:
808
901
  """
@@ -838,7 +931,7 @@ class UtilsStats:
838
931
  x_range = np.linspace(data_array.min(), data_array.max(), 300)
839
932
 
840
933
  # ======= FIGURA =======
841
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize or (14, 6))
934
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize or (14, 6))
842
935
 
843
936
  # ============================================================
844
937
  # PANEL 1: HISTOGRAMA + KDE
@@ -903,109 +996,37 @@ class UtilsStats:
903
996
  plt.tight_layout()
904
997
 
905
998
  # Guardado opcional
906
- save_fig = save_fig if save_fig is not None else self._save_fig
999
+ self.save_fig = save_fig
907
1000
  if save_fig:
908
1001
  self._save_figure(fig, filename)
909
1002
 
910
- return fig
911
-
912
-
913
- def plot_multiple_distributions_with_ci(self,
914
- data_dict: dict,
915
- confidence_level: float = 0.95,
916
- figsize: Optional[Tuple[int, int]] = None,
917
- save_fig: Optional[bool] = None,
918
- filename: Optional[str] = None,
919
- **kwargs) -> plt.Figure:
920
- """
921
- Grafica múltiples distribuciones con sus intervalos de confianza
922
- """
923
- n_distributions = len(data_dict)
924
- fig, axes = plt.subplots(n_distributions, 2,
925
- figsize=figsize or (14, 5 * n_distributions))
926
-
927
- if n_distributions == 1:
928
- axes = axes.reshape(1, -1)
929
-
930
- colors = plt.cm.Set3(np.linspace(0, 1, n_distributions))
931
-
932
- for idx, (name, data) in enumerate(data_dict.items()):
933
- ax1, ax2 = axes[idx]
934
-
935
- if isinstance(data, pd.Series):
936
- data_array = data.dropna().values
937
- else:
938
- data_array = np.array(data)
939
- data_array = data_array[~np.isnan(data_array)]
940
-
941
- # Calcular estadísticas
942
- ci_result = self.calculate_confidence_intervals(data_array, confidence_level=confidence_level)
943
-
944
- # Gráfica izquierda: Distribución básica
945
- ax1.hist(data_array, bins=30, alpha=0.7, color=colors[idx],
946
- edgecolor='black', density=True)
947
-
948
- kde = stats.gaussian_kde(data_array)
949
- x_range = np.linspace(data_array.min(), data_array.max(), 200)
950
- ax1.plot(x_range, kde(x_range), 'k-', linewidth=2)
951
- ax1.axvline(ci_result['mean'], color='red', linestyle='--', linewidth=2)
952
-
953
- ax1.set_title(f'{name}\nMedia: {ci_result["mean"]:.2f}')
954
- ax1.grid(True, alpha=0.3)
955
-
956
- # Gráfica derecha: Con intervalos de confianza
957
- ax2.hist(data_array, bins=30, alpha=0.7, color=colors[idx],
958
- edgecolor='black', density=True)
959
- ax2.plot(x_range, kde(x_range), 'k-', linewidth=2)
960
-
961
- ax2.axvline(ci_result['mean'], color='red', linestyle='-', linewidth=3)
962
- ax2.axvspan(ci_result['ci_lower'], ci_result['ci_upper'],
963
- alpha=0.3, color='orange')
964
- ax2.axvline(ci_result['ci_lower'], color='orange', linestyle='--', linewidth=2)
965
- ax2.axvline(ci_result['ci_upper'], color='orange', linestyle='--', linewidth=2)
966
-
967
- ax2.set_title(f'{name} con IC {confidence_level*100}%')
968
- ax2.grid(True, alpha=0.3)
969
-
970
- plt.tight_layout()
971
-
972
- # Guardar figura si está activado
973
- save_fig = save_fig if save_fig is not None else self._save_fig
974
- if save_fig:
975
- filename = filename or "multiples_distribuciones_ci"
976
- self._save_figure(fig, filename)
977
-
978
- return fig
979
1003
 
980
1004
  # ============= MÉTODOS UTILITARIOS ADICIONALES =============
981
1005
 
982
- def get_descriptive_stats(self,
983
- data: Union[pd.DataFrame, pd.Series, np.ndarray, str, Path],
984
- column: Optional[str] = None) -> dict:
985
- """
986
- Obtiene estadísticas descriptivas completas
987
-
988
- Ahora acepta rutas de archivos
989
- """
990
- # Resolver datos
991
- data, source = self._resolve_data(data, column)
992
-
1006
+ def get_descriptive_stats(self, data, column=None):
1007
+
993
1008
  if isinstance(data, pd.DataFrame):
994
1009
  if column is None:
995
- raise ValueError("Debe especificar 'column' cuando data es DataFrame")
1010
+ raise ValueError("Debe especificarse una columna")
996
1011
  data_series = data[column]
997
- elif isinstance(data, pd.Series):
998
- data_series = data
999
1012
  else:
1000
1013
  data_series = pd.Series(data)
1001
-
1014
+
1002
1015
  data_clean = data_series.dropna()
1003
-
1016
+
1017
+ if len(data_clean) == 0:
1018
+ return {k: np.nan for k in [
1019
+ 'count','mean','median','mode','std','variance',
1020
+ 'min','max','q1','q3','iqr','skewness','kurtosis','range'
1021
+ ]}
1022
+
1023
+ mode_result = stats.mode(data_clean, keepdims=False)
1024
+
1004
1025
  return {
1005
1026
  'count': len(data_clean),
1006
1027
  'mean': np.mean(data_clean),
1007
1028
  'median': np.median(data_clean),
1008
- 'mode': stats.mode(data_clean)[0][0] if len(data_clean) > 0 else np.nan,
1029
+ 'mode': mode_result.mode,
1009
1030
  'std': np.std(data_clean, ddof=1),
1010
1031
  'variance': np.var(data_clean, ddof=1),
1011
1032
  'min': np.min(data_clean),
@@ -1017,7 +1038,6 @@ class UtilsStats:
1017
1038
  'kurtosis': stats.kurtosis(data_clean),
1018
1039
  'range': np.max(data_clean) - np.min(data_clean)
1019
1040
  }
1020
-
1021
1041
  def help(self):
1022
1042
  """
1023
1043
  Muestra ayuda completa de la clase DescriptiveStats
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: statslibx
3
- Version: 0.1.7
4
- Summary: StatsLibx - Librería de estadística descriptiva e inferencial
3
+ Version: 0.2.0
4
+ Summary: StatsLibx - Librería de estadística descriptiva, inferencial y computacional
5
5
  Author-email: Emmanuel Ascendra Perez <ascendraemmanuel@gmail.com>
6
6
  License: MIT
7
7
  Classifier: Development Status :: 3 - Alpha
@@ -24,11 +24,13 @@ Provides-Extra: advanced
24
24
  Requires-Dist: scikit-learn>=1.0; extra == "advanced"
25
25
  Requires-Dist: statsmodels>=0.13; extra == "advanced"
26
26
 
27
- # 📦 Descripción para PyPI (Plantilla Profesional)
27
+ # 📦 StatsLibX
28
28
 
29
29
  StatsLibX es un paquete de Python diseñado para proporcionar una solución sencilla, eficiente y flexible para manejar volumenes de datos.
30
30
 
31
- Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva e inferencial** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
31
+ Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y ligera que permita a desarrolladores y entusiastas integrar la **estadistica descriptiva, inferencial y computacional (En desarrollo)** sin complicaciones, con multiples funcionalidades y utilidades pensadas para el futuro.
32
+
33
+ GitHub del Proyecto: [https://github.com/GhostAnalyst30/StatsLibX](https://github.com/GhostAnalyst30/StatsLibX)
32
34
 
33
35
  ## ✨ Características principales
34
36
 
@@ -45,16 +47,28 @@ Este proyecto surge con la idea de ofrecer una alternativa moderna, intuitiva y
45
47
  ## 🚀 Ejemplo rápido
46
48
  ```python
47
49
  from statslibx import DescriptiveStats, InferentialStats, UtilsStats
50
+ from statslibx.datasets import load_iris()
51
+
52
+ data = load_iris()
48
53
 
49
54
  stats = DescriptiveStats(data) # InferentialStats(data), UtilsStats()
50
- stats.help()
55
+
56
+ stats.summary()
51
57
  ```
58
+ Para ver mas funciones: [https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb](https://github.com/GhostAnalyst30/StatsLibX/blob/main/how_use_statslibx.ipynb)
52
59
 
53
60
  ## 📦 Instalación
54
61
  ```bash
55
62
  pip install statslibx
56
63
  ```
57
64
 
65
+ ## 👩‍💻 ¡Usalo en la terminal! (De forma preliminar)
66
+ ```bash
67
+ statslibx describe .\archive.csv # Devuelve una descripcion de la data
68
+ statslibx quality .\archive.csv # Devuelve la calidad de los datos
69
+ statslibx preview .\archive.csv # Devuelve una visualizacion de los datos
70
+ ```
71
+
58
72
  🤝 Contribuciones
59
73
 
60
74
  ¡Todas las mejoras e ideas son bienvenidas!
@@ -0,0 +1,19 @@
1
+ statslibx/__init__.py,sha256=YUKUQhO1vUYvcUQmlz1ZtvU6MWNZERdAG55-trf25ZY,1500
2
+ statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
3
+ statslibx/computacional.py,sha256=z46bRUiH9a3ajxVTYE2sGO-pg20L87MdOKM3Y_Tcq44,4062
4
+ statslibx/descriptive.py,sha256=GrUR4QfstUeLTXdxKSZsmKaOJkDso-QH51hlwTUaubA,63513
5
+ statslibx/inferential.py,sha256=xiJCppezhWK4TrAARdOufuxjZcoGKsfHtRujKfuXbgg,83068
6
+ statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
7
+ statslibx/utils.py,sha256=gWXduW8LMN1q4ZwNggmodRsT9Rcsot-S82NsQiqrjUo,69992
8
+ statslibx/datasets/__init__.py,sha256=wiSp4qGwpILCiaN5vVuwWgKnbdELpbi5pxnNB9Wg2nI,7282
9
+ statslibx/datasets/course_completion.csv,sha256=jaqyxAh4YCsYuH5OFsjvGV7KUyM_7vQt6LgnqnNAFsI,22422135
10
+ statslibx/datasets/iris.csv,sha256=xSdC5QMVqZ-Vajg_rt91dVUmdfZAnvD5pHB23QhHmTA,3858
11
+ statslibx/datasets/penguins.csv,sha256=4HY2vYr3QmAJnqL4Z44uq7813vV5lAzHb2cGHuFsBsE,13478
12
+ statslibx/datasets/sp500_companies.csv,sha256=WKS72YOGnAbyLR6kD95fOpIYZt5oXGjPryyFVqLRF_k,803820
13
+ statslibx/datasets/titanic.csv,sha256=5seOS8ybyBMBCCWhgKZrsbu06m_OWyKtD9l0YXOImXU,29474
14
+ statslibx/preprocessing/__init__.py,sha256=ZwdwjBodxeOry-umJ__6yUSeubpRlZg41yve366ArkY,7395
15
+ statslibx-0.2.0.dist-info/METADATA,sha256=w7f-3RgizY3PHUSxoBl6YuHImHz2qFyillhZk82WUfE,2993
16
+ statslibx-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
17
+ statslibx-0.2.0.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
18
+ statslibx-0.2.0.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
19
+ statslibx-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,18 +0,0 @@
1
- statslibx/__init__.py,sha256=vXAOPdog5n_b64FRybiWI4VNA_eou7eQuZBcQiQz79E,1297
2
- statslibx/cli.py,sha256=DqXaoP85n9xgLDlFnEkeqj-HJG0_IKX0uSqxRcHbzII,1122
3
- statslibx/descriptive.py,sha256=UTb104Gho0uNeSALlukgrYwXrGMDwmIEy39-yvHuy8M,60184
4
- statslibx/inferential.py,sha256=0lpVAp2SiKDgWkH3z3JoVFAjMaXW2VboxtA2vwPwq04,49947
5
- statslibx/io.py,sha256=v7pxpmlEMeKyfXftl3WbkUtC9FOh1pymz7MmKPPNw98,493
6
- statslibx/utils.py,sha256=qDqF_XgvEJbdQURA2v0gF0sw0nNQR4-MFXDvVTl_00s,68480
7
- statslibx/datasets/__init__.py,sha256=HlOjJFalKVAycJEi7_J_OB7ss8jgSWpPQnsHTynt0uo,2273
8
- statslibx/datasets/course_completion.csv,sha256=jaqyxAh4YCsYuH5OFsjvGV7KUyM_7vQt6LgnqnNAFsI,22422135
9
- statslibx/datasets/iris.csv,sha256=xSdC5QMVqZ-Vajg_rt91dVUmdfZAnvD5pHB23QhHmTA,3858
10
- statslibx/datasets/penguins.csv,sha256=4HY2vYr3QmAJnqL4Z44uq7813vV5lAzHb2cGHuFsBsE,13478
11
- statslibx/datasets/sp500_companies.csv,sha256=WKS72YOGnAbyLR6kD95fOpIYZt5oXGjPryyFVqLRF_k,803820
12
- statslibx/datasets/titanic.csv,sha256=5seOS8ybyBMBCCWhgKZrsbu06m_OWyKtD9l0YXOImXU,29474
13
- statslibx/preprocessing/__init__.py,sha256=B6qI_KuqWf0FFnLLFafIaPOIM9ABo73InKCscSypdqI,7107
14
- statslibx-0.1.7.dist-info/METADATA,sha256=GN3chKZ7qSdoAKeD54rCxiwRoWk0wiFpLxHmxtc6Skc,2321
15
- statslibx-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- statslibx-0.1.7.dist-info/entry_points.txt,sha256=bkCY7JDWNCZFE3I4sjgJ2oGrUgoBBbCbYmWkBAymT70,49
17
- statslibx-0.1.7.dist-info/top_level.txt,sha256=eeYZXyFm0hIjuI0ba3wF6XW938Mv9tv7Nk9qgjYfCtU,10
18
- statslibx-0.1.7.dist-info/RECORD,,