statslibx 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +12 -8
- statslibx/computacional.py +2 -0
- statslibx/datasets/__init__.py +227 -54
- statslibx/descriptive.py +8 -9
- statslibx/inferential.py +746 -307
- statslibx/preprocessing/__init__.py +12 -5
- statslibx/probability.py +2 -0
- statslibx/utils.py +112 -150
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/METADATA +17 -3
- statslibx-0.1.8.dist-info/RECORD +15 -0
- statslibx/datasets/course_completion.csv +0 -100001
- statslibx/datasets/iris.csv +0 -151
- statslibx/datasets/penguins.csv +0 -345
- statslibx/datasets/sp500_companies.csv +0 -504
- statslibx/datasets/titanic.csv +0 -419
- statslibx-0.1.7.dist-info/RECORD +0 -18
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/WHEEL +0 -0
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/entry_points.txt +0 -0
- {statslibx-0.1.7.dist-info → statslibx-0.1.8.dist-info}/top_level.txt +0 -0
statslibx/inferential.py
CHANGED
|
@@ -1,32 +1,39 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
|
-
|
|
4
|
+
import polars as pl
|
|
5
|
+
from typing import Optional, Union, Literal, List, Dict, Any, Tuple
|
|
5
6
|
from datetime import datetime
|
|
6
7
|
from scipy import stats
|
|
7
8
|
import os
|
|
8
9
|
|
|
9
10
|
class InferentialStats:
|
|
10
11
|
"""
|
|
11
|
-
|
|
12
|
+
Class for inferential statistics (hypothesis tests, confidence intervals, etc.)
|
|
12
13
|
"""
|
|
13
14
|
|
|
14
15
|
def __init__(self, data: Union[pd.DataFrame, np.ndarray],
|
|
15
|
-
backend: Literal['pandas', 'polars'] = 'pandas'
|
|
16
|
+
backend: Literal['pandas', 'polars'] = 'pandas',
|
|
17
|
+
lang: Literal['es-ES', 'en-US'] = 'es-ES'):
|
|
16
18
|
"""
|
|
17
|
-
|
|
19
|
+
Initialize DataFrame
|
|
18
20
|
|
|
19
21
|
Parameters:
|
|
20
22
|
-----------
|
|
21
23
|
data : DataFrame o ndarray
|
|
22
|
-
|
|
24
|
+
Data to analyze
|
|
23
25
|
backend : str
|
|
24
|
-
'pandas'
|
|
26
|
+
'pandas' or 'polars' for processing
|
|
25
27
|
"""
|
|
26
28
|
|
|
27
29
|
if isinstance(data, str) and os.path.exists(data):
|
|
28
30
|
data = InferentialStats.from_file(data).data
|
|
29
31
|
|
|
32
|
+
if isinstance(data, pl.DataFrame):
|
|
33
|
+
raise TypeError(
|
|
34
|
+
"Polars aún no soportado. Use pandas.DataFrame."
|
|
35
|
+
)
|
|
36
|
+
|
|
30
37
|
if isinstance(data, np.ndarray):
|
|
31
38
|
if data.ndim == 1:
|
|
32
39
|
data = pd.DataFrame({'var': data})
|
|
@@ -36,8 +43,9 @@ class InferentialStats:
|
|
|
36
43
|
self.data = data
|
|
37
44
|
self.backend = backend
|
|
38
45
|
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
46
|
+
self.lang = lang
|
|
39
47
|
|
|
40
|
-
@
|
|
48
|
+
@classmethod
|
|
41
49
|
def from_file(path: str):
|
|
42
50
|
"""
|
|
43
51
|
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
@@ -76,14 +84,14 @@ class InferentialStats:
|
|
|
76
84
|
def confidence_interval(self, column: str, confidence: float = 0.95,
|
|
77
85
|
statistic: Literal['mean', 'median', 'proportion'] = 'mean') -> tuple:
|
|
78
86
|
"""
|
|
79
|
-
|
|
87
|
+
Confidence interval for different statistics
|
|
80
88
|
|
|
81
89
|
Parameters:
|
|
82
90
|
-----------
|
|
83
91
|
column : str
|
|
84
|
-
|
|
92
|
+
Column to analyze
|
|
85
93
|
confidence : float
|
|
86
|
-
|
|
94
|
+
Confidence level (default 0.95 = 95%)
|
|
87
95
|
statistic : str
|
|
88
96
|
'mean', 'median' o 'proportion'
|
|
89
97
|
|
|
@@ -128,9 +136,10 @@ class InferentialStats:
|
|
|
128
136
|
|
|
129
137
|
def t_test_1sample(self, column: str, popmean: float = None,
|
|
130
138
|
popmedian: float = None,
|
|
131
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided'
|
|
139
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided',
|
|
140
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
132
141
|
"""
|
|
133
|
-
|
|
142
|
+
One sample t test (for mean or median)
|
|
134
143
|
|
|
135
144
|
Parameters:
|
|
136
145
|
-----------
|
|
@@ -149,7 +158,7 @@ class InferentialStats:
|
|
|
149
158
|
|
|
150
159
|
if popmean is not None:
|
|
151
160
|
statistic, pvalue = stats.ttest_1samp(data, popmean, alternative=alternative)
|
|
152
|
-
|
|
161
|
+
|
|
153
162
|
return TestResult(
|
|
154
163
|
test_name='T-Test de Una Muestra (Media)',
|
|
155
164
|
statistic=statistic,
|
|
@@ -160,13 +169,14 @@ class InferentialStats:
|
|
|
160
169
|
'sample_mean': data.mean(),
|
|
161
170
|
'n': len(data),
|
|
162
171
|
'df': len(data) - 1
|
|
163
|
-
}
|
|
172
|
+
},
|
|
173
|
+
alpha=alpha
|
|
164
174
|
)
|
|
165
175
|
|
|
166
176
|
elif popmedian is not None:
|
|
167
177
|
# Wilcoxon signed-rank test para mediana
|
|
168
178
|
statistic, pvalue = stats.wilcoxon(data - popmedian, alternative=alternative)
|
|
169
|
-
|
|
179
|
+
|
|
170
180
|
return TestResult(
|
|
171
181
|
test_name='Wilcoxon Signed-Rank Test (Mediana)',
|
|
172
182
|
statistic=statistic,
|
|
@@ -184,7 +194,7 @@ class InferentialStats:
|
|
|
184
194
|
|
|
185
195
|
def t_test_2sample(self, column1: str, column2: str,
|
|
186
196
|
equal_var: bool = True,
|
|
187
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided') -> 'TestResult':
|
|
197
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
188
198
|
"""
|
|
189
199
|
Prueba t de dos muestras independientes
|
|
190
200
|
|
|
@@ -214,11 +224,12 @@ class InferentialStats:
|
|
|
214
224
|
'std1': data1.std(), 'std2': data2.std(),
|
|
215
225
|
'n1': len(data1), 'n2': len(data2),
|
|
216
226
|
'equal_var': equal_var
|
|
217
|
-
}
|
|
227
|
+
},
|
|
228
|
+
alpha=alpha
|
|
218
229
|
)
|
|
219
230
|
|
|
220
231
|
def t_test_paired(self, column1: str, column2: str,
|
|
221
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided') -> 'TestResult':
|
|
232
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
222
233
|
"""
|
|
223
234
|
Prueba t pareada
|
|
224
235
|
|
|
@@ -241,11 +252,12 @@ class InferentialStats:
|
|
|
241
252
|
statistic=statistic,
|
|
242
253
|
pvalue=pvalue,
|
|
243
254
|
alternative=alternative,
|
|
244
|
-
params={'mean_diff': (data1 - data2).mean(), 'n': len(data1)}
|
|
255
|
+
params={'mean_diff': (data1 - data2).mean(), 'n': len(data1)},
|
|
256
|
+
alpha=alpha
|
|
245
257
|
)
|
|
246
258
|
|
|
247
259
|
def mann_whitney_test(self, column1: str, column2: str,
|
|
248
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided') -> 'TestResult':
|
|
260
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
249
261
|
"""
|
|
250
262
|
Prueba de Mann-Whitney U (alternativa no paramétrica al t-test)
|
|
251
263
|
|
|
@@ -273,10 +285,12 @@ class InferentialStats:
|
|
|
273
285
|
'median2': data2.median(),
|
|
274
286
|
'n1': len(data1),
|
|
275
287
|
'n2': len(data2)
|
|
276
|
-
}
|
|
288
|
+
},
|
|
289
|
+
alpha=alpha
|
|
277
290
|
)
|
|
278
291
|
|
|
279
|
-
def chi_square_test(self, column1: str, column2: str
|
|
292
|
+
def chi_square_test(self, column1: str, column2: str,
|
|
293
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
280
294
|
"""
|
|
281
295
|
Prueba Chi-cuadrado de independencia
|
|
282
296
|
|
|
@@ -295,10 +309,12 @@ class InferentialStats:
|
|
|
295
309
|
statistic=chi2,
|
|
296
310
|
pvalue=pvalue,
|
|
297
311
|
alternative='two-sided',
|
|
298
|
-
params={'dof': dof, 'contingency_table': contingency_table}
|
|
312
|
+
params={'dof': dof, 'contingency_table': contingency_table},
|
|
313
|
+
alpha=alpha
|
|
299
314
|
)
|
|
300
315
|
|
|
301
|
-
def anova_oneway(self, column: str, groups: str
|
|
316
|
+
def anova_oneway(self, column: str, groups: str,
|
|
317
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
302
318
|
"""
|
|
303
319
|
ANOVA de un factor
|
|
304
320
|
|
|
@@ -310,11 +326,16 @@ class InferentialStats:
|
|
|
310
326
|
Variable de agrupación (categórica)
|
|
311
327
|
"""
|
|
312
328
|
from scipy import stats
|
|
329
|
+
clean_data = self.data[[column, groups]].dropna()
|
|
313
330
|
|
|
314
|
-
groups_data = [group[column].values
|
|
331
|
+
groups_data = [group[column].values
|
|
332
|
+
for _, group in clean_data.groupby(groups)
|
|
333
|
+
if len(group) > 1 and group[column].var() > 0
|
|
334
|
+
]
|
|
335
|
+
|
|
315
336
|
statistic, pvalue = stats.f_oneway(*groups_data)
|
|
316
337
|
|
|
317
|
-
return TestResult(
|
|
338
|
+
return TestResult(
|
|
318
339
|
test_name='ANOVA de Un Factor',
|
|
319
340
|
statistic=statistic,
|
|
320
341
|
pvalue=pvalue,
|
|
@@ -322,10 +343,12 @@ class InferentialStats:
|
|
|
322
343
|
params={
|
|
323
344
|
'groups': len(groups_data),
|
|
324
345
|
'n_total': sum(len(g) for g in groups_data)
|
|
325
|
-
}
|
|
346
|
+
},
|
|
347
|
+
alpha=alpha
|
|
326
348
|
)
|
|
327
349
|
|
|
328
|
-
def kruskal_wallis_test(self, column: str, groups: str
|
|
350
|
+
def kruskal_wallis_test(self, column: str, groups: str,
|
|
351
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
329
352
|
"""
|
|
330
353
|
Prueba de Kruskal-Wallis (ANOVA no paramétrico)
|
|
331
354
|
|
|
@@ -337,8 +360,13 @@ class InferentialStats:
|
|
|
337
360
|
Variable de agrupación (categórica)
|
|
338
361
|
"""
|
|
339
362
|
from scipy import stats
|
|
363
|
+
|
|
364
|
+
clean_data = self.data[[column, groups]].dropna()
|
|
340
365
|
|
|
341
|
-
groups_data = [group[column].values
|
|
366
|
+
groups_data = [group[column].values
|
|
367
|
+
for _, group in clean_data.groupby(groups)
|
|
368
|
+
if len(group) > 1 and group[column].var() > 0
|
|
369
|
+
]
|
|
342
370
|
statistic, pvalue = stats.kruskal(*groups_data)
|
|
343
371
|
|
|
344
372
|
return TestResult(
|
|
@@ -349,12 +377,14 @@ class InferentialStats:
|
|
|
349
377
|
params={
|
|
350
378
|
'groups': len(groups_data),
|
|
351
379
|
'n_total': sum(len(g) for g in groups_data)
|
|
352
|
-
}
|
|
380
|
+
},
|
|
381
|
+
alpha=alpha
|
|
353
382
|
)
|
|
354
383
|
|
|
355
384
|
def normality_test(self, column: str,
|
|
356
385
|
method: Literal['shapiro', 'ks', 'anderson', 'jarque_bera', 'all'] = 'shapiro',
|
|
357
|
-
test_statistic: Literal['mean', 'median', 'mode'] = 'mean'
|
|
386
|
+
test_statistic: Literal['mean', 'median', 'mode'] = 'mean',
|
|
387
|
+
alpha: float = 0.05) -> Union['TestResult', dict]:
|
|
358
388
|
"""
|
|
359
389
|
Prueba de normalidad con múltiples métodos y estadísticos
|
|
360
390
|
|
|
@@ -396,6 +426,9 @@ class InferentialStats:
|
|
|
396
426
|
scale = np.std(data, ddof=1)
|
|
397
427
|
else:
|
|
398
428
|
raise ValueError(f"test_statistic '{test_statistic}' no reconocido")
|
|
429
|
+
|
|
430
|
+
critical_values = None
|
|
431
|
+
significance_levels = None
|
|
399
432
|
|
|
400
433
|
if method == 'all':
|
|
401
434
|
results = {}
|
|
@@ -423,13 +456,13 @@ class InferentialStats:
|
|
|
423
456
|
|
|
424
457
|
# Anderson-Darling
|
|
425
458
|
anderson_result = stats.anderson(data, dist='norm')
|
|
426
|
-
results['anderson_darling'] =
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
459
|
+
results['anderson_darling'] = TestResult(
|
|
460
|
+
test_name=f'Anderson-Darling ({test_statistic})',
|
|
461
|
+
statistic=anderson_result.statistic,
|
|
462
|
+
critical_values=anderson_result.critical_values,
|
|
463
|
+
significance_levels=anderson_result.significance_level,
|
|
464
|
+
params={'n': n, 'test_statistic': test_statistic, 'loc': loc, 'scale': scale}
|
|
465
|
+
)
|
|
433
466
|
|
|
434
467
|
# Jarque-Bera
|
|
435
468
|
stat_jb, p_jb = stats.jarque_bera(data)
|
|
@@ -462,14 +495,12 @@ class InferentialStats:
|
|
|
462
495
|
|
|
463
496
|
elif method == 'anderson':
|
|
464
497
|
anderson_result = stats.anderson(data, dist='norm')
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
'interpretation': self._interpret_anderson(anderson_result)
|
|
472
|
-
}
|
|
498
|
+
test_name = f'Anderson-Darling ({test_statistic})'
|
|
499
|
+
pvalue = None
|
|
500
|
+
statistic = anderson_result.statistic
|
|
501
|
+
critical_values = anderson_result.critical_values
|
|
502
|
+
significance_levels = anderson_result.significance_level
|
|
503
|
+
params = {'n': n, 'test_statistic': test_statistic, 'loc': loc, 'scale': scale}
|
|
473
504
|
|
|
474
505
|
elif method == 'jarque_bera':
|
|
475
506
|
statistic, pvalue = stats.jarque_bera(data)
|
|
@@ -489,25 +520,19 @@ class InferentialStats:
|
|
|
489
520
|
statistic=statistic,
|
|
490
521
|
pvalue=pvalue,
|
|
491
522
|
alternative='two-sided',
|
|
492
|
-
params=params
|
|
523
|
+
params=params,
|
|
524
|
+
critical_values=critical_values,
|
|
525
|
+
significance_levels=significance_levels,
|
|
526
|
+
alpha=alpha
|
|
493
527
|
)
|
|
494
|
-
|
|
495
|
-
def _interpret_anderson(self, anderson_result):
|
|
496
|
-
"""Interpreta resultados de Anderson-Darling"""
|
|
497
|
-
interpretations = []
|
|
498
|
-
for i, (crit_val, sig_level) in enumerate(zip(anderson_result.critical_values,
|
|
499
|
-
anderson_result.significance_level)):
|
|
500
|
-
if anderson_result.statistic < crit_val:
|
|
501
|
-
interpretations.append(f"No se rechaza normalidad al {sig_level}% de significancia")
|
|
502
|
-
else:
|
|
503
|
-
interpretations.append(f"Se RECHAZA normalidad al {sig_level}% de significancia")
|
|
504
|
-
return interpretations
|
|
505
528
|
|
|
506
529
|
def hypothesis_test(
|
|
507
530
|
self,
|
|
508
531
|
method: Literal["mean", "difference_mean", "proportion", "variance"] = "mean",
|
|
509
532
|
column1: str = None,
|
|
510
533
|
column2: str = None,
|
|
534
|
+
pop_mean: float = None,
|
|
535
|
+
pop_proportion: Union[float, Tuple[float, float]] = 0.5,
|
|
511
536
|
alpha: float = 0.05,
|
|
512
537
|
homoscedasticity: Literal["levene", "bartlett", "var_test"] = "levene") -> Dict[str, Any]:
|
|
513
538
|
|
|
@@ -522,11 +547,14 @@ class InferentialStats:
|
|
|
522
547
|
Columnas numéricas a comparar
|
|
523
548
|
alpha : float
|
|
524
549
|
Nivel de significancia (default 0.05)
|
|
550
|
+
pop_mean : float
|
|
551
|
+
Media poblacional
|
|
552
|
+
pop_proportion : float
|
|
553
|
+
Proporción poblacional (default 0.5)
|
|
525
554
|
homoscedasticity : str
|
|
526
555
|
Método de homocedasticidad
|
|
527
556
|
'levene', 'bartlett' o 'var_test'
|
|
528
557
|
"""
|
|
529
|
-
|
|
530
558
|
data = self.data
|
|
531
559
|
|
|
532
560
|
if column1 is None:
|
|
@@ -547,7 +575,7 @@ class InferentialStats:
|
|
|
547
575
|
# --- MAIN HYPOTHESIS TESTS ---
|
|
548
576
|
if method == "mean":
|
|
549
577
|
# One-sample t-test
|
|
550
|
-
t_stat, p_value = stats.ttest_1samp(x, popmean=
|
|
578
|
+
t_stat, p_value = stats.ttest_1samp(x, popmean=pop_mean)
|
|
551
579
|
test_name = "One-sample t-test"
|
|
552
580
|
|
|
553
581
|
elif method == "difference_mean":
|
|
@@ -558,13 +586,46 @@ class InferentialStats:
|
|
|
558
586
|
|
|
559
587
|
elif method == "proportion":
|
|
560
588
|
# Proportion test (z-test)
|
|
561
|
-
|
|
589
|
+
|
|
590
|
+
x = np.asarray(x)
|
|
591
|
+
|
|
592
|
+
# Caso 1: datos ya binarios
|
|
593
|
+
unique_vals = np.unique(x)
|
|
594
|
+
if set(unique_vals).issubset({0, 1}):
|
|
595
|
+
|
|
596
|
+
if pop_proportion is None:
|
|
597
|
+
raise ValueError("Debe especificarse pop_proportion")
|
|
598
|
+
|
|
599
|
+
pop_p = pop_proportion
|
|
600
|
+
|
|
601
|
+
# Caso 2: datos continuos → binarizar
|
|
602
|
+
else:
|
|
603
|
+
if not isinstance(pop_proportion, tuple):
|
|
604
|
+
raise ValueError(
|
|
605
|
+
"Para datos continuos, pop_proportion debe ser (p0, binizar_value)"
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
pop_p, binizar_value = pop_proportion
|
|
609
|
+
x = (x > binizar_value).astype(int)
|
|
610
|
+
|
|
611
|
+
if not (0 < pop_p < 1):
|
|
612
|
+
raise ValueError("pop_proportion debe estar entre 0 y 1")
|
|
613
|
+
|
|
562
614
|
n = len(x)
|
|
563
|
-
|
|
615
|
+
p_hat = np.mean(x)
|
|
616
|
+
|
|
617
|
+
if n * pop_p < 5 or n * (1 - pop_p) < 5:
|
|
618
|
+
raise ValueError(
|
|
619
|
+
"Condiciones del Z-test no cumplidas: np0 y n(1-p0) deben ser ≥ 5"
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
z_stat = (p_hat - pop_p) / np.sqrt(pop_p * (1 - pop_p) / n)
|
|
564
623
|
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
|
|
624
|
+
|
|
565
625
|
t_stat = z_stat
|
|
566
626
|
test_name = "Proportion Z-test"
|
|
567
627
|
|
|
628
|
+
|
|
568
629
|
elif method == "variance":
|
|
569
630
|
# Classic F-test
|
|
570
631
|
var_x = np.var(x, ddof=1)
|
|
@@ -577,15 +638,19 @@ class InferentialStats:
|
|
|
577
638
|
t_stat = F
|
|
578
639
|
test_name = "Variance F-test"
|
|
579
640
|
|
|
580
|
-
|
|
581
|
-
"
|
|
582
|
-
|
|
583
|
-
"
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
641
|
+
if p_value < alpha:
|
|
642
|
+
self.interpretation = "Se RECHAZA la hipótesis nula"
|
|
643
|
+
else:
|
|
644
|
+
self.interpretation = ("Se RECHAZA la hipotesis alternativa")
|
|
645
|
+
return TestResult(
|
|
646
|
+
test_name=test_name,
|
|
647
|
+
statistic=t_stat,
|
|
648
|
+
pvalue=p_value,
|
|
649
|
+
alternative='two-sided',
|
|
650
|
+
alpha=alpha,
|
|
651
|
+
homo_result=homo_result
|
|
652
|
+
)
|
|
653
|
+
|
|
589
654
|
def _homoscedasticity_test(
|
|
590
655
|
self,
|
|
591
656
|
x,
|
|
@@ -617,8 +682,8 @@ class InferentialStats:
|
|
|
617
682
|
|
|
618
683
|
def variance_test(self, column1: str, column2: str,
|
|
619
684
|
method: Literal['levene', 'bartlett', 'var_test'] = 'levene',
|
|
620
|
-
center: Literal['mean', 'median', 'trimmed'] = 'median'
|
|
621
|
-
) -> 'TestResult':
|
|
685
|
+
center: Literal['mean', 'median', 'trimmed'] = 'median',
|
|
686
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
622
687
|
"""
|
|
623
688
|
Prueba de igualdad de varianzas entre dos columnas.
|
|
624
689
|
|
|
@@ -690,23 +755,38 @@ class InferentialStats:
|
|
|
690
755
|
statistic=statistic,
|
|
691
756
|
pvalue=pvalue,
|
|
692
757
|
alternative='two-sided',
|
|
693
|
-
params=params
|
|
758
|
+
params=params,
|
|
759
|
+
alpha=alpha
|
|
694
760
|
)
|
|
695
761
|
|
|
696
762
|
|
|
697
763
|
def help(self):
|
|
698
764
|
"""
|
|
699
|
-
Muestra ayuda completa de la clase
|
|
765
|
+
Muestra ayuda completa de la clase DescriptiveStats
|
|
766
|
+
|
|
767
|
+
Parametros / Parameters:
|
|
768
|
+
------------------------
|
|
769
|
+
lang: str
|
|
770
|
+
Idioma Usuario: Codigo de Idioma (es-Es) o "Español"
|
|
771
|
+
User Language: Languaje Code (en-Us) or "English"
|
|
700
772
|
"""
|
|
701
|
-
|
|
773
|
+
|
|
774
|
+
if self.lang in ["en-US", "English", "english"]:
|
|
775
|
+
self.lang = "en-US"
|
|
776
|
+
else:
|
|
777
|
+
self.lang = "es-ES"
|
|
778
|
+
help_text = " "
|
|
779
|
+
match self.lang:
|
|
780
|
+
case "es-ES":
|
|
781
|
+
help_text = """
|
|
702
782
|
╔════════════════════════════════════════════════════════════════════════════╗
|
|
703
783
|
║ 🔬 CLASE InferentialStats - AYUDA COMPLETA ║
|
|
704
784
|
╚════════════════════════════════════════════════════════════════════════════╝
|
|
705
785
|
|
|
706
786
|
📝 DESCRIPCIÓN:
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
787
|
+
Clase para estadística inferencial: pruebas de hipótesis, intervalos de
|
|
788
|
+
confianza y pruebas de normalidad. Permite realizar inferencias sobre
|
|
789
|
+
poblaciones a partir de muestras de datos.
|
|
710
790
|
|
|
711
791
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
712
792
|
|
|
@@ -716,291 +796,595 @@ class InferentialStats:
|
|
|
716
796
|
│ 1. 📊 INTERVALOS DE CONFIANZA │
|
|
717
797
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
718
798
|
|
|
719
|
-
|
|
799
|
+
• .confidence_interval(column, confidence=0.95, statistic='mean')
|
|
720
800
|
|
|
721
801
|
Calcula intervalos de confianza para diferentes estadísticos
|
|
722
802
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
803
|
+
Parámetros:
|
|
804
|
+
column : Columna a analizar (str)
|
|
805
|
+
confidence : Nivel de confianza (float, default 0.95 = 95%)
|
|
806
|
+
statistic : 'mean', 'median' o 'proportion'
|
|
807
|
+
|
|
808
|
+
Retorna: (lower_bound, upper_bound, point_estimate)
|
|
729
809
|
|
|
730
810
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
731
811
|
│ 2. 🧪 PRUEBAS DE HIPÓTESIS - UNA MUESTRA │
|
|
732
812
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
733
813
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
Prueba t de una muestra (o Wilcoxon para mediana)
|
|
814
|
+
• .t_test_1sample(column, popmean=None, popmedian=None,
|
|
815
|
+
alternative='two-sided')
|
|
738
816
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
817
|
+
Prueba t de una muestra (o Wilcoxon para mediana)
|
|
818
|
+
|
|
819
|
+
Parámetros:
|
|
820
|
+
column : Columna a analizar
|
|
821
|
+
popmean : Media poblacional hipotética (para t-test)
|
|
822
|
+
popmedian : Mediana poblacional hipotética (para Wilcoxon)
|
|
823
|
+
alternative : 'two-sided', 'less', 'greater'
|
|
744
824
|
|
|
745
825
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
746
826
|
│ 3. 🧪 PRUEBAS DE HIPÓTESIS - DOS MUESTRAS │
|
|
747
827
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
748
828
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
829
|
+
🔹 Pruebas Paramétricas:
|
|
830
|
+
|
|
831
|
+
• .t_test_2sample(column1, column2, equal_var=True,
|
|
832
|
+
alternative='two-sided')
|
|
833
|
+
Prueba t de dos muestras independientes
|
|
834
|
+
|
|
835
|
+
• .t_test_paired(column1, column2, alternative='two-sided')
|
|
836
|
+
Prueba t pareada (muestras dependientes)
|
|
837
|
+
|
|
838
|
+
🔹 Pruebas No Paramétricas:
|
|
839
|
+
|
|
840
|
+
• .mann_whitney_test(column1, column2, alternative='two-sided')
|
|
841
|
+
Alternativa no paramétrica al t-test de dos muestras
|
|
842
|
+
|
|
843
|
+
🔹 Pruebas Extras:
|
|
844
|
+
• .hypothesis_test(method='mean', column1=None, column2=None,
|
|
845
|
+
alpha=0.05, homoscedasticity='levene')
|
|
846
|
+
• .variance_test(column1, column2, method='levene', center='median')
|
|
767
847
|
|
|
768
848
|
|
|
769
849
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
770
850
|
│ 4. 🧪 PRUEBAS PARA MÚLTIPLES GRUPOS │
|
|
771
851
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
772
852
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
853
|
+
🔹 Pruebas Paramétricas:
|
|
854
|
+
|
|
855
|
+
• .anova_oneway(column, groups)
|
|
856
|
+
ANOVA de un factor para comparar múltiples grupos
|
|
857
|
+
|
|
858
|
+
🔹 Pruebas No Paramétricas:
|
|
859
|
+
|
|
860
|
+
• .kruskal_wallis_test(column, groups)
|
|
861
|
+
Alternativa no paramétrica a ANOVA
|
|
782
862
|
|
|
783
863
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
784
864
|
│ 5. 🧪 PRUEBAS PARA VARIABLES CATEGÓRICAS │
|
|
785
865
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
786
866
|
|
|
787
|
-
|
|
788
|
-
|
|
867
|
+
• .chi_square_test(column1, column2)
|
|
868
|
+
Prueba Chi-cuadrado de independencia entre variables categóricas
|
|
789
869
|
|
|
790
870
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
791
871
|
│ 6. 📈 PRUEBAS DE NORMALIDAD │
|
|
792
872
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
793
873
|
|
|
794
|
-
|
|
874
|
+
• .normality_test(column, method='shapiro', test_statistic='mean')
|
|
795
875
|
|
|
796
|
-
|
|
876
|
+
Prueba si los datos siguen una distribución normal
|
|
797
877
|
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
878
|
+
Métodos disponibles:
|
|
879
|
+
'shapiro' : Shapiro-Wilk (mejor para n ≤ 5000)
|
|
880
|
+
'ks' : Kolmogorov-Smirnov
|
|
881
|
+
'anderson' : Anderson-Darling
|
|
882
|
+
'jarque_bera' : Jarque-Bera (basado en asimetría y curtosis)
|
|
883
|
+
'all' : Ejecuta todos los tests
|
|
804
884
|
|
|
805
|
-
|
|
885
|
+
test_statistic: 'mean', 'median' o 'mode' para centrar la distribución
|
|
806
886
|
|
|
807
887
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
808
888
|
|
|
809
889
|
💡 EJEMPLOS DE USO:
|
|
810
890
|
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
891
|
+
┌─ Ejemplo 1: Intervalos de Confianza ────────────────────────────────────┐
|
|
892
|
+
│ from inferential import InferentialStats │
|
|
893
|
+
│ import pandas as pd │
|
|
894
|
+
│ │
|
|
895
|
+
│ df = pd.read_csv('datos.csv') │
|
|
896
|
+
│ inf_stats = InferentialStats(df) │
|
|
897
|
+
│ │
|
|
898
|
+
│ # IC para la media (95%) │
|
|
899
|
+
│ lower, upper, mean = inf_stats.confidence_interval( │
|
|
900
|
+
│ 'salario', │
|
|
901
|
+
│ confidence=0.95, │
|
|
902
|
+
│ statistic='mean' │
|
|
903
|
+
│ ) │
|
|
904
|
+
│ print(f"IC 95%: [{lower:.2f}, {upper:.2f}]") │
|
|
905
|
+
│ │
|
|
906
|
+
│ # IC para la mediana (bootstrap) │
|
|
907
|
+
│ lower, upper, median = inf_stats.confidence_interval( │
|
|
908
|
+
│ 'edad', │
|
|
909
|
+
│ confidence=0.99, │
|
|
910
|
+
│ statistic='median' │
|
|
911
|
+
│ ) │
|
|
912
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
913
|
+
|
|
914
|
+
┌─ Ejemplo 2: Prueba t de Una Muestra ────────────────────────────────────┐
|
|
915
|
+
│ # H0: μ = 50000 (la media salarial es 50000) │
|
|
916
|
+
│ # H1: μ ≠ 50000 (prueba bilateral) │
|
|
917
|
+
│ │
|
|
918
|
+
│ resultado = inf_stats.t_test_1sample( │
|
|
919
|
+
│ column='salario', │
|
|
920
|
+
│ popmean=50000, │
|
|
921
|
+
│ alternative='two-sided' │
|
|
922
|
+
│ ) │
|
|
923
|
+
│ │
|
|
924
|
+
│ print(resultado) │
|
|
925
|
+
│ # Muestra: estadístico t, valor p, interpretación │
|
|
926
|
+
│ │
|
|
927
|
+
│ # Prueba unilateral │
|
|
928
|
+
│ resultado = inf_stats.t_test_1sample( │
|
|
929
|
+
│ column='salario', │
|
|
930
|
+
│ popmean=50000, │
|
|
931
|
+
│ alternative='greater' # H1: μ > 50000 │
|
|
932
|
+
│ ) │
|
|
933
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
934
|
+
|
|
935
|
+
┌─ Ejemplo 3: Comparación de Dos Grupos ──────────────────────────────────┐
|
|
936
|
+
│ # Prueba t independiente │
|
|
937
|
+
│ resultado = inf_stats.t_test_2sample( │
|
|
938
|
+
│ column1='salario_hombres', │
|
|
939
|
+
│ column2='salario_mujeres', │
|
|
940
|
+
│ equal_var=True, │
|
|
941
|
+
│ alternative='two-sided' │
|
|
942
|
+
│ ) │
|
|
943
|
+
│ print(resultado) │
|
|
944
|
+
│ │
|
|
945
|
+
│ # Prueba Mann-Whitney (no paramétrica) │
|
|
946
|
+
│ resultado = inf_stats.mann_whitney_test( │
|
|
947
|
+
│ column1='salario_grupo_a', │
|
|
948
|
+
│ column2='salario_grupo_b', │
|
|
949
|
+
│ alternative='two-sided' │
|
|
950
|
+
│ ) │
|
|
951
|
+
│ │
|
|
952
|
+
│ # Prueba t pareada (mediciones antes/después) │
|
|
953
|
+
│ resultado = inf_stats.t_test_paired( │
|
|
954
|
+
│ column1='peso_antes', │
|
|
955
|
+
│ column2='peso_despues', │
|
|
956
|
+
│ alternative='two-sided' │
|
|
957
|
+
│ ) │
|
|
958
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
959
|
+
|
|
960
|
+
┌─ Ejemplo 4: ANOVA y Kruskal-Wallis ─────────────────────────────────────┐
|
|
961
|
+
│ # ANOVA para comparar múltiples grupos │
|
|
962
|
+
│ resultado = inf_stats.anova_oneway( │
|
|
963
|
+
│ column='rendimiento', │
|
|
964
|
+
│ groups='departamento' │
|
|
965
|
+
│ ) │
|
|
966
|
+
│ print(resultado) │
|
|
967
|
+
│ │
|
|
968
|
+
│ # Kruskal-Wallis (alternativa no paramétrica) │
|
|
969
|
+
│ resultado = inf_stats.kruskal_wallis_test( │
|
|
970
|
+
│ column='satisfaccion', │
|
|
971
|
+
│ groups='categoria' │
|
|
972
|
+
│ ) │
|
|
973
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
974
|
+
|
|
975
|
+
┌─ Ejemplo 5: Chi-Cuadrado ───────────────────────────────────────────────┐
|
|
976
|
+
│ # Probar independencia entre variables categóricas │
|
|
977
|
+
│ resultado = inf_stats.chi_square_test( │
|
|
978
|
+
│ column1='genero', │
|
|
979
|
+
│ column2='preferencia_producto' │
|
|
980
|
+
│ ) │
|
|
981
|
+
│ print(resultado) │
|
|
982
|
+
│ │
|
|
983
|
+
│ # El resultado incluye la tabla de contingencia │
|
|
984
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
985
|
+
|
|
986
|
+
┌─ Ejemplo 6: Pruebas de Normalidad ──────────────────────────────────────┐
|
|
987
|
+
│ # Shapiro-Wilk (recomendado para n ≤ 5000) │
|
|
988
|
+
│ resultado = inf_stats.normality_test( │
|
|
989
|
+
│ column='edad', │
|
|
990
|
+
│ method='shapiro', │
|
|
991
|
+
│ test_statistic='mean' │
|
|
992
|
+
│ ) │
|
|
993
|
+
│ print(resultado) │
|
|
994
|
+
│ │
|
|
995
|
+
│ # Kolmogorov-Smirnov │
|
|
996
|
+
│ resultado = inf_stats.normality_test( │
|
|
997
|
+
│ column='salario', │
|
|
998
|
+
│ method='ks' │
|
|
999
|
+
│ ) │
|
|
1000
|
+
│ │
|
|
1001
|
+
│ # Ejecutar todos los tests │
|
|
1002
|
+
│ resultados = inf_stats.normality_test( │
|
|
1003
|
+
│ column='ingresos', │
|
|
1004
|
+
│ method='all', │
|
|
1005
|
+
│ test_statistic='median' │
|
|
1006
|
+
│ ) │
|
|
1007
|
+
│ │
|
|
1008
|
+
│ # Acceder a cada test │
|
|
1009
|
+
│ print(resultados['shapiro']) │
|
|
1010
|
+
│ print(resultados['kolmogorov_smirnov']) │
|
|
1011
|
+
│ print(resultados['anderson_darling']) │
|
|
1012
|
+
│ print(resultados['jarque_bera']) │
|
|
1013
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
934
1014
|
|
|
935
1015
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
936
1016
|
|
|
937
1017
|
📊 GUÍA DE SELECCIÓN DE PRUEBAS:
|
|
938
1018
|
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
1019
|
+
┌─ Comparar Una Muestra vs Valor de Referencia ───────────────────────────┐
|
|
1020
|
+
│ Datos normales → t_test_1sample (con popmean) │
|
|
1021
|
+
│ Datos no normales → t_test_1sample (con popmedian, usa Wilcoxon) │
|
|
1022
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
943
1023
|
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
1024
|
+
┌─ Comparar Dos Grupos Independientes ────────────────────────────────────┐
|
|
1025
|
+
│ Datos normales → t_test_2sample │
|
|
1026
|
+
│ Datos no normales → mann_whitney_test │
|
|
1027
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
948
1028
|
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
1029
|
+
┌─ Comparar Dos Grupos Pareados ──────────────────────────────────────────┐
|
|
1030
|
+
│ Datos normales → t_test_paired │
|
|
1031
|
+
│ Datos no normales → (use scipy.stats.wilcoxon directamente) │
|
|
1032
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
953
1033
|
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
1034
|
+
┌─ Comparar Múltiples Grupos ─────────────────────────────────────────────┐
|
|
1035
|
+
│ Datos normales → anova_oneway │
|
|
1036
|
+
│ Datos no normales → kruskal_wallis_test │
|
|
1037
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
958
1038
|
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
1039
|
+
┌─ Probar Independencia entre Categóricas ────────────────────────────────┐
|
|
1040
|
+
│ Variables categóricas → chi_square_test │
|
|
1041
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
962
1042
|
|
|
963
1043
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
964
1044
|
|
|
965
1045
|
🎯 CARACTERÍSTICAS CLAVE:
|
|
966
1046
|
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1047
|
+
✓ Pruebas paramétricas y no paramétricas
|
|
1048
|
+
✓ Intervalos de confianza con múltiples métodos
|
|
1049
|
+
✓ Pruebas de normalidad completas
|
|
1050
|
+
✓ Interpretación automática de resultados
|
|
1051
|
+
✓ Manejo automático de valores faltantes
|
|
1052
|
+
✓ Salidas formateadas profesionales
|
|
1053
|
+
✓ Soporte para análisis bilateral y unilateral
|
|
974
1054
|
|
|
975
1055
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
976
1056
|
|
|
977
1057
|
⚠️ INTERPRETACIÓN DE RESULTADOS:
|
|
978
1058
|
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
1059
|
+
• Valor p < 0.05: Se rechaza H0 (evidencia significativa)
|
|
1060
|
+
• Valor p ≥ 0.05: No se rechaza H0 (evidencia insuficiente)
|
|
1061
|
+
• IC que no incluye el valor nulo: Evidencia contra H0
|
|
982
1062
|
|
|
983
1063
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
984
1064
|
|
|
985
1065
|
📚 DOCUMENTACIÓN ADICIONAL:
|
|
986
|
-
|
|
987
|
-
|
|
1066
|
+
Para más información sobre métodos específicos, use:
|
|
1067
|
+
help(InferentialStats.nombre_metodo)
|
|
988
1068
|
|
|
989
1069
|
╚════════════════════════════════════════════════════════════════════════════╝
|
|
990
1070
|
"""
|
|
1071
|
+
case "en-US":
|
|
1072
|
+
help_text = """
|
|
1073
|
+
╔════════════════════════════════════════════════════════════════════════════╗
|
|
1074
|
+
║ 🔬 CLASS InferentialStats - COMPLETE HELP ║
|
|
1075
|
+
╚════════════════════════════════════════════════════════════════════════════╝
|
|
1076
|
+
|
|
1077
|
+
📝 DESCRIPTION:
|
|
1078
|
+
Class for inferential statistics: hypothesis tests, intervals
|
|
1079
|
+
confidence and normality tests. Allows inferences to be made about
|
|
1080
|
+
populations from data samples.
|
|
1081
|
+
|
|
1082
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1083
|
+
|
|
1084
|
+
📋 MAIN METHODS:
|
|
1085
|
+
|
|
1086
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1087
|
+
│ 1. 📊 CONFIDENCE INTERVALS │
|
|
1088
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1089
|
+
|
|
1090
|
+
• .confidence_interval(column, confidence=0.95, statistic='mean')
|
|
1091
|
+
|
|
1092
|
+
Calculate confidence intervals for different statistics
|
|
1093
|
+
|
|
1094
|
+
Parameters:
|
|
1095
|
+
column : Column to analyze (str)
|
|
1096
|
+
confidence : Confidence level (float, default 0.95 = 95%)
|
|
1097
|
+
statistic : 'mean', 'median' or 'proportion'
|
|
1098
|
+
|
|
1099
|
+
Return: (lower_bound, upper_bound, point_estimate)
|
|
1100
|
+
|
|
1101
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1102
|
+
│ 2. 🧪 HYPOTHESIS TESTING - A SAMPLE │
|
|
1103
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1104
|
+
|
|
1105
|
+
• .t_test_1sample(column, popmean=None, popmedian=None,
|
|
1106
|
+
alternative='two-sided')
|
|
1107
|
+
|
|
1108
|
+
One sample t test (or Wilcoxon for median)
|
|
1109
|
+
|
|
1110
|
+
Parameters:
|
|
1111
|
+
column : Column to analyze
|
|
1112
|
+
popmean : Hypothetical population mean (for t-test)
|
|
1113
|
+
popmedian : Hypothetical population median (for Wilcoxon)
|
|
1114
|
+
alternative : 'two-sided', 'less', 'greater'
|
|
1115
|
+
|
|
1116
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1117
|
+
│ 3. 🧪 HYPOTHESIS TESTING - TWO SAMPLES │
|
|
1118
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1119
|
+
|
|
1120
|
+
🔹 Parametric Tests:
|
|
1121
|
+
|
|
1122
|
+
• .t_test_2sample(column1, column2, equal_var=True,
|
|
1123
|
+
alternative='two-sided')
|
|
1124
|
+
Two independent samples t test
|
|
1125
|
+
|
|
1126
|
+
• .t_test_paired(column1, column2, alternative='two-sided')
|
|
1127
|
+
Paired t test (dependent samples)
|
|
1128
|
+
|
|
1129
|
+
🔹 Non-Parametric Tests:
|
|
1130
|
+
|
|
1131
|
+
• .mann_whitney_test(column1, column2, alternative='two-sided')
|
|
1132
|
+
Non-parametric alternative to the two-sample t-test
|
|
1133
|
+
|
|
1134
|
+
🔹 Extra Tests:
|
|
1135
|
+
• .hypothesis_test(method='mean', column1=None, column2=None,
|
|
1136
|
+
alpha=0.05, homoscedasticity='levene')
|
|
1137
|
+
• .variance_test(column1, column2, method='levene', center='median')
|
|
1138
|
+
|
|
1139
|
+
|
|
1140
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1141
|
+
│ 4. 🧪 TESTING FOR MULTIPLE GROUPS │
|
|
1142
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1143
|
+
|
|
1144
|
+
🔹 Parametric Tests:
|
|
1145
|
+
|
|
1146
|
+
• .anova_oneway(column, groups)
|
|
1147
|
+
One-way ANOVA to compare multiple groups
|
|
1148
|
+
|
|
1149
|
+
🔹 Non-Parametric Tests:
|
|
1150
|
+
|
|
1151
|
+
• .kruskal_wallis_test(column, groups)
|
|
1152
|
+
Non-parametric alternative to ANOVA
|
|
1153
|
+
|
|
1154
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1155
|
+
│ 5. 🧪 TESTS FOR CATEGORICAL VARIABLES │
|
|
1156
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1157
|
+
|
|
1158
|
+
• .chi_square_test(column1, column2)
|
|
1159
|
+
Chi-square test of independence between categorical variables
|
|
1160
|
+
|
|
1161
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1162
|
+
│ 6. 📈 NORMALITY TESTS │
|
|
1163
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1164
|
+
|
|
1165
|
+
• .normality_test(column, method='shapiro', test_statistic='mean')
|
|
1166
|
+
|
|
1167
|
+
Tests whether the data follows a normal distribution
|
|
1168
|
+
|
|
1169
|
+
Available methods:
|
|
1170
|
+
'shapiro' : Shapiro-Wilk (best for n ≤ 5000)
|
|
1171
|
+
'ks' : Kolmogorov-Smirnov
|
|
1172
|
+
'anderson' : Anderson-Darling
|
|
1173
|
+
'jarque_bera' : Jarque-Bera (based on skewness and kurtosis)
|
|
1174
|
+
'all' : Run all tests
|
|
1175
|
+
|
|
1176
|
+
test_statistic: 'mean', 'median' o 'mode' to focus the distribution
|
|
1177
|
+
|
|
1178
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1179
|
+
|
|
1180
|
+
💡 EXAMPLES OF USE:
|
|
1181
|
+
|
|
1182
|
+
┌─ Example 1: Confidence Intervals ───────────────────────────────────────┐
|
|
1183
|
+
│ from inferential import InferentialStats │
|
|
1184
|
+
│ import pandas as pd │
|
|
1185
|
+
│ │
|
|
1186
|
+
│ df = pd.read_csv('data.csv') │
|
|
1187
|
+
│ inf_stats = InferentialStats(df) │
|
|
1188
|
+
│ │
|
|
1189
|
+
│ # CI for mean (95%) │
|
|
1190
|
+
│ lower, upper, mean = inf_stats.confidence_interval( │
|
|
1191
|
+
│ 'salario', │
|
|
1192
|
+
│ confidence=0.95, │
|
|
1193
|
+
│ statistic='mean' │
|
|
1194
|
+
│ ) │
|
|
1195
|
+
│ print(f"IC 95%: [{lower:.2f}, {upper:.2f}]") │
|
|
1196
|
+
│ │
|
|
1197
|
+
│ # CI for the median (bootstrap) │
|
|
1198
|
+
│ lower, upper, median = inf_stats.confidence_interval( │
|
|
1199
|
+
│ 'edad', │
|
|
1200
|
+
│ confidence=0.99, │
|
|
1201
|
+
│ statistic='median' │
|
|
1202
|
+
│ ) │
|
|
1203
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1204
|
+
|
|
1205
|
+
┌─ Example 2: One Sample t-test ──────────────────────────────────────────┐
|
|
1206
|
+
│ # H0: μ = 50000 (the average salary is 50,000) │
|
|
1207
|
+
│ # H1: μ ≠ 50000 (two-sided test) │
|
|
1208
|
+
│ │
|
|
1209
|
+
│ result = inf_stats.t_test_1sample( │
|
|
1210
|
+
│ column='salary', │
|
|
1211
|
+
│ popmean=50000, │
|
|
1212
|
+
│ alternative='two-sided' │
|
|
1213
|
+
│ ) │
|
|
1214
|
+
│ │
|
|
1215
|
+
│ print(result) │
|
|
1216
|
+
│ # Sample: t-statistic, p-value, interpretation │
|
|
1217
|
+
│ │
|
|
1218
|
+
│ # One-sided test │
|
|
1219
|
+
│ result = inf_stats.t_test_1sample( │
|
|
1220
|
+
│ column='salary', │
|
|
1221
|
+
│ popmean=50000, │
|
|
1222
|
+
│ alternative='greater' # H1: μ > 50000 │
|
|
1223
|
+
│ ) │
|
|
1224
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1225
|
+
|
|
1226
|
+
┌─ Example 3: Comparison of Two Groups ───────────────────────────────────┐
|
|
1227
|
+
│ # Independent t test │
|
|
1228
|
+
│ result = inf_stats.t_test_2sample( │
|
|
1229
|
+
│ column1='men_salary', │
|
|
1230
|
+
│ column2='women_salary', │
|
|
1231
|
+
│ equal_var=True, │
|
|
1232
|
+
│ alternative='two-sided' │
|
|
1233
|
+
│ ) │
|
|
1234
|
+
│ print(result) │
|
|
1235
|
+
│ │
|
|
1236
|
+
│ # Mann-Whitney test (non-parametric) │
|
|
1237
|
+
│ result = inf_stats.mann_whitney_test( │
|
|
1238
|
+
│ column1='salary_group_a', │
|
|
1239
|
+
│ column2='salary_group_b', │
|
|
1240
|
+
│ alternative='two-sided' │
|
|
1241
|
+
│ ) │
|
|
1242
|
+
│ │
|
|
1243
|
+
│ # Paired t-test (before/after measurements) │
|
|
1244
|
+
│ result = inf_stats.t_test_paired( │
|
|
1245
|
+
│ column1='weight_before', │
|
|
1246
|
+
│ column2='after_weight', │
|
|
1247
|
+
│ alternative='two-sided' │
|
|
1248
|
+
│) │
|
|
1249
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1250
|
+
|
|
1251
|
+
┌─ Example 4: ANOVA and Kruskal-Wallis ───────────────────────────────────┐
|
|
1252
|
+
│ # ANOVA to compare multiple groups │
|
|
1253
|
+
│ result = inf_stats.anova_oneway( │
|
|
1254
|
+
│ column='performance', │
|
|
1255
|
+
│ groups='department' │
|
|
1256
|
+
│ ) │
|
|
1257
|
+
│ print(result) │
|
|
1258
|
+
│ │
|
|
1259
|
+
│ # Kruskal-Wallis (non-parametric alternative) │
|
|
1260
|
+
│ result = inf_stats.kruskal_wallis_test( │
|
|
1261
|
+
│ column='satisfaction', │
|
|
1262
|
+
│ groups='category' │
|
|
1263
|
+
│) │
|
|
1264
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1265
|
+
|
|
1266
|
+
┌─ Example 5: Chi-Square ─────────────────────────────────────────────────┐
|
|
1267
|
+
│ # Test independence between categorical variables │
|
|
1268
|
+
│ result = inf_stats.chi_square_test( │
|
|
1269
|
+
│ column1='gender', │
|
|
1270
|
+
│ column2='product_preference' │
|
|
1271
|
+
│ ) │
|
|
1272
|
+
│ print(result) │
|
|
1273
|
+
│ │
|
|
1274
|
+
│ # The result includes the contingency table │
|
|
1275
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1276
|
+
|
|
1277
|
+
┌─ Example 6: Normality Tests ────────────────────────────────────────────┐
|
|
1278
|
+
│ # Shapiro-Wilk (recommended for n ≤ 5000) │
|
|
1279
|
+
│ result = inf_stats.normality_test( │
|
|
1280
|
+
│ column='age', │
|
|
1281
|
+
│ method='shapiro', │
|
|
1282
|
+
│ test_statistic='mean' │
|
|
1283
|
+
│ ) │
|
|
1284
|
+
│ print(result) │
|
|
1285
|
+
│ │
|
|
1286
|
+
│ # Kolmogorov-Smirnov │
|
|
1287
|
+
│ result = inf_stats.normality_test( │
|
|
1288
|
+
│ column='salary', │
|
|
1289
|
+
│ method='ks' │
|
|
1290
|
+
│ ) │
|
|
1291
|
+
│ │
|
|
1292
|
+
│ # Run all tests │
|
|
1293
|
+
│ results = inf_stats.normality_test( │
|
|
1294
|
+
│ column='income', │
|
|
1295
|
+
│ method='all', │
|
|
1296
|
+
│ test_statistic='median' │
|
|
1297
|
+
│ ) │
|
|
1298
|
+
│ │
|
|
1299
|
+
│ # Access each test │
|
|
1300
|
+
│ print(results['shapiro']) │
|
|
1301
|
+
│ print(results['kolmogorov_smirnov']) │
|
|
1302
|
+
│ print(results['anderson_darling']) │
|
|
1303
|
+
│ print(results['jarque_bera']) │
|
|
1304
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1305
|
+
|
|
1306
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1307
|
+
|
|
1308
|
+
📊 GUÍA DE SELECCIÓN DE PRUEBAS:
|
|
1309
|
+
|
|
1310
|
+
┌─ Compare A Sample vs Reference Value ───────────────────────────────────┐
|
|
1311
|
+
│ Normal data → t_test_1sample (with mean) │
|
|
1312
|
+
│ Non-normal data → t_test_1sample (with popmedian, uses Wilcoxon) │
|
|
1313
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1314
|
+
|
|
1315
|
+
┌─ Compare Two Independent Groups ────────────────────────────────────────┐
|
|
1316
|
+
│ Normal data → t_test_2sample │
|
|
1317
|
+
│ Non-normal data → mann_whitney_test │
|
|
1318
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1319
|
+
|
|
1320
|
+
┌─ Compare Two Paired Groups ─────────────────────────────────────────────┐
|
|
1321
|
+
│ Normal data → t_test_paired │
|
|
1322
|
+
│ Non-normal data → (use scipy.stats.wilcoxon directly) │
|
|
1323
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1324
|
+
|
|
1325
|
+
┌─ Compare Multiple Groups ───────────────────────────────────────────────┐
|
|
1326
|
+
│ Normal data → anova_oneway │
|
|
1327
|
+
│ Non-normal data → kruskal_wallis_test │
|
|
1328
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1329
|
+
|
|
1330
|
+
┌─ Testing Independence between Categories ───────────────────────────────┐
|
|
1331
|
+
│ Categorical variables → chi_square_test │
|
|
1332
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1333
|
+
|
|
1334
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1335
|
+
|
|
1336
|
+
🎯 KEY FEATURES:
|
|
1337
|
+
|
|
1338
|
+
✓ Parametric and non-parametric tests
|
|
1339
|
+
✓ Confidence intervals with multiple methods
|
|
1340
|
+
✓ Complete normality tests
|
|
1341
|
+
✓ Automatic interpretation of results
|
|
1342
|
+
✓ Automatic handling of missing values
|
|
1343
|
+
✓ Professional formatted outputs
|
|
1344
|
+
✓ Support for bilateral and unilateral analysis
|
|
1345
|
+
|
|
1346
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1347
|
+
|
|
1348
|
+
⚠️ INTERPRETATION OF RESULTS:
|
|
1349
|
+
|
|
1350
|
+
• P value < 0.05: H0 is rejected (significant evidence)
|
|
1351
|
+
• P value ≥ 0.05: H0 is not rejected (insufficient evidence)
|
|
1352
|
+
• CI that does not include the null value: Evidence against H0
|
|
1353
|
+
|
|
1354
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1355
|
+
|
|
1356
|
+
📚 ADDITIONAL DOCUMENTATION:
|
|
1357
|
+
For more information on specific methods, use:
|
|
1358
|
+
help(InferentialStats.method_name)
|
|
1359
|
+
|
|
1360
|
+
╚════════════════════════════════════════════════════════════════════════════╝
|
|
1361
|
+
"""
|
|
991
1362
|
print(help_text)
|
|
992
1363
|
|
|
993
1364
|
@dataclass
|
|
994
1365
|
class TestResult:
|
|
995
1366
|
"""Clase para resultados de pruebas de hipótesis"""
|
|
996
1367
|
|
|
997
|
-
def __init__(self, test_name: str, statistic: float,
|
|
998
|
-
|
|
1368
|
+
def __init__(self, test_name: str, statistic: float, alpha: float = 0.05,
|
|
1369
|
+
params: dict = None, pvalue: float = None,
|
|
1370
|
+
alternative: str = None, critical_values=None,
|
|
1371
|
+
significance_levels=None, homo_result=None):
|
|
999
1372
|
self.test_name = test_name
|
|
1000
1373
|
self.statistic = statistic
|
|
1001
1374
|
self.pvalue = pvalue
|
|
1002
1375
|
self.alternative = alternative
|
|
1003
1376
|
self.params = params
|
|
1377
|
+
self.critical_values = critical_values
|
|
1378
|
+
self.significance_levels = significance_levels
|
|
1379
|
+
self.interpretation = "Aun no hay interpretacion"
|
|
1380
|
+
self.homo_result = homo_result
|
|
1381
|
+
self.alpha = alpha
|
|
1382
|
+
|
|
1383
|
+
if self.pvalue is not None:
|
|
1384
|
+
if self.pvalue < self.alpha:
|
|
1385
|
+
self.interpretation = "Se RECHAZA la hipótesis nula"
|
|
1386
|
+
else:
|
|
1387
|
+
self.interpretation = "Se RECHAZA la hipótesis alternativa"
|
|
1004
1388
|
|
|
1005
1389
|
def __repr__(self):
|
|
1006
1390
|
return self._format_output()
|
|
@@ -1014,28 +1398,83 @@ class TestResult:
|
|
|
1014
1398
|
output.append(f"Fecha: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
1015
1399
|
output.append(f"Hipótesis Alternativa: {self.alternative}")
|
|
1016
1400
|
output.append("-" * 80)
|
|
1017
|
-
|
|
1401
|
+
|
|
1018
1402
|
output.append("\nRESULTADOS:")
|
|
1019
1403
|
output.append("-" * 80)
|
|
1020
1404
|
output.append(f"{'Estadístico':<40} {self.statistic:>20.6f}")
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1405
|
+
|
|
1406
|
+
# Mostrar valores críticos o p-value
|
|
1407
|
+
if self.critical_values is not None and self.significance_levels is not None:
|
|
1408
|
+
output.append("Valores Críticos:")
|
|
1409
|
+
for sl, cv in zip(self.significance_levels, self.critical_values):
|
|
1410
|
+
output.append(f" α = {sl:>6.3f} → {cv:.6f}")
|
|
1411
|
+
elif self.pvalue is not None:
|
|
1412
|
+
output.append(f"{'Valor p':<40} {self.pvalue:>20.6e}")
|
|
1413
|
+
|
|
1414
|
+
# -------------------------
|
|
1415
|
+
# INTERPRETACIÓN
|
|
1416
|
+
# -------------------------
|
|
1030
1417
|
output.append("\nINTERPRETACIÓN:")
|
|
1031
1418
|
output.append("-" * 80)
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1419
|
+
|
|
1420
|
+
alpha = 0.05
|
|
1421
|
+
|
|
1422
|
+
# Caso tests con p-value
|
|
1423
|
+
if self.pvalue is not None:
|
|
1424
|
+
output.append(f"Alpha = {alpha}")
|
|
1425
|
+
|
|
1426
|
+
if self.pvalue < alpha:
|
|
1427
|
+
output.append("❌ Se RECHAZA la hipótesis nula")
|
|
1428
|
+
else:
|
|
1429
|
+
output.append("✔️ No hay evidencia suficiente para rechazar la hipótesis nula")
|
|
1430
|
+
|
|
1431
|
+
# Caso tests con valores críticos (ej. Anderson-Darling)
|
|
1432
|
+
else:
|
|
1433
|
+
# Protección mínima
|
|
1434
|
+
if self.significance_levels is None or self.critical_values is None:
|
|
1435
|
+
output.append("Resultado no disponible")
|
|
1436
|
+
else:
|
|
1437
|
+
idx = min(
|
|
1438
|
+
range(len(self.significance_levels)),
|
|
1439
|
+
key=lambda i: abs(self.significance_levels[i] - alpha)
|
|
1440
|
+
)
|
|
1441
|
+
|
|
1442
|
+
critical_value = self.critical_values[idx]
|
|
1443
|
+
|
|
1444
|
+
output.append(f"Nivel de significancia (α) = {alpha}")
|
|
1445
|
+
output.append(f"Estadístico A² = {self.statistic:.4f}")
|
|
1446
|
+
output.append(f"Valor crítico = {critical_value:.4f}")
|
|
1447
|
+
|
|
1448
|
+
if self.statistic > critical_value:
|
|
1449
|
+
output.append("❌ Se RECHAZA la hipótesis nula")
|
|
1450
|
+
else:
|
|
1451
|
+
output.append("✔️ No hay evidencia suficiente para rechazar la hipótesis nula")
|
|
1452
|
+
|
|
1453
|
+
# -------------------------
|
|
1454
|
+
# HOMOCEDASTICIDAD
|
|
1455
|
+
# -------------------------
|
|
1456
|
+
if isinstance(self.homo_result, dict):
|
|
1457
|
+
homo = self.homo_result
|
|
1458
|
+
|
|
1459
|
+
if isinstance(homo, dict):
|
|
1460
|
+
output.append("\nTEST DE HOMOCEDASTICIDAD:")
|
|
1461
|
+
output.append(f"Método: {homo['method']}")
|
|
1462
|
+
output.append(f"Estadístico: {homo['statistic']:.6f}")
|
|
1463
|
+
output.append(f"Valor p: {homo['p_value']:.6e}")
|
|
1464
|
+
|
|
1465
|
+
if homo.get("equal_var") is True:
|
|
1466
|
+
output.append("✔️ Se asume igualdad de varianzas")
|
|
1467
|
+
elif homo.get("equal_var") is False:
|
|
1468
|
+
output.append("❌ No se asume igualdad de varianzas")
|
|
1469
|
+
|
|
1470
|
+
# -------------------------
|
|
1471
|
+
# PARÁMETROS
|
|
1472
|
+
# -------------------------
|
|
1473
|
+
if isinstance(self.params, dict):
|
|
1474
|
+
output.append("\nPARÁMETROS:")
|
|
1475
|
+
output.append("-" * 80)
|
|
1476
|
+
for k, v in self.params.items():
|
|
1477
|
+
output.append(f"{k:<40} {str(v):>20}")
|
|
1478
|
+
|
|
1040
1479
|
output.append("=" * 80)
|
|
1041
1480
|
return "\n".join(output)
|