statslibx 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +12 -8
- statslibx/computacional.py +126 -0
- statslibx/datasets/__init__.py +243 -54
- statslibx/descriptive.py +80 -15
- statslibx/inferential.py +812 -312
- statslibx/preprocessing/__init__.py +12 -5
- statslibx/utils.py +183 -163
- {statslibx-0.1.7.dist-info → statslibx-0.2.0.dist-info}/METADATA +19 -5
- statslibx-0.2.0.dist-info/RECORD +19 -0
- {statslibx-0.1.7.dist-info → statslibx-0.2.0.dist-info}/WHEEL +1 -1
- statslibx-0.1.7.dist-info/RECORD +0 -18
- {statslibx-0.1.7.dist-info → statslibx-0.2.0.dist-info}/entry_points.txt +0 -0
- {statslibx-0.1.7.dist-info → statslibx-0.2.0.dist-info}/top_level.txt +0 -0
statslibx/inferential.py
CHANGED
|
@@ -1,32 +1,96 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
|
-
|
|
4
|
+
import polars as pl
|
|
5
|
+
from typing import Optional, Union, Literal, List, Dict, Any, Tuple
|
|
5
6
|
from datetime import datetime
|
|
6
7
|
from scipy import stats
|
|
7
8
|
import os
|
|
8
9
|
|
|
9
10
|
class InferentialStats:
|
|
10
|
-
"""
|
|
11
|
-
|
|
11
|
+
"""
|
|
12
|
+
InferentialStats
|
|
13
|
+
A class for performing inferential statistical analysis, including hypothesis tests, confidence intervals,
|
|
14
|
+
normality tests, and more. This class supports operations on pandas DataFrame or numpy arrays.
|
|
15
|
+
Attributes:
|
|
16
|
+
-----------
|
|
17
|
+
data : pd.DataFrame
|
|
18
|
+
The dataset to analyze.
|
|
19
|
+
The backend used for processing ('pandas' or 'polars').
|
|
20
|
+
sep : str
|
|
21
|
+
Separator for reading files.
|
|
22
|
+
decimal : str
|
|
23
|
+
Decimal separator for reading files.
|
|
24
|
+
thousand : str
|
|
25
|
+
Thousand separator for reading files.
|
|
26
|
+
lang : str
|
|
27
|
+
Language for help and error messages ('es-ES' or 'en-US').
|
|
28
|
+
|
|
29
|
+
Methods:
|
|
30
|
+
--------
|
|
31
|
+
from_file(path: str):
|
|
32
|
+
Load data from a file and return an instance of InferentialStats.
|
|
33
|
+
|
|
34
|
+
confidence_interval(column: str, confidence: float = 0.95, statistic: Literal['mean', 'median', 'proportion'] = 'mean') -> tuple:
|
|
35
|
+
Calculate confidence intervals for mean, median, or proportion.
|
|
36
|
+
|
|
37
|
+
t_test_1sample(column: str, popmean: float = None, popmedian: float = None, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
38
|
+
Perform a one-sample t-test or Wilcoxon signed-rank test for median.
|
|
39
|
+
|
|
40
|
+
t_test_2sample(column1: str, column2: str, equal_var: bool = True, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
41
|
+
Perform a two-sample independent t-test.
|
|
42
|
+
|
|
43
|
+
t_test_paired(column1: str, column2: str, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
44
|
+
Perform a paired t-test for dependent samples.
|
|
45
|
+
|
|
46
|
+
mann_whitney_test(column1: str, column2: str, alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
47
|
+
Perform the Mann-Whitney U test, a non-parametric alternative to the two-sample t-test.
|
|
48
|
+
|
|
49
|
+
chi_square_test(column1: str, column2: str, alpha: float = 0.05) -> 'TestResult':
|
|
50
|
+
Perform a Chi-square test of independence between two categorical variables.
|
|
51
|
+
|
|
52
|
+
anova_oneway(column: str, groups: str, alpha: float = 0.05) -> 'TestResult':
|
|
53
|
+
Perform a one-way ANOVA test to compare means across multiple groups.
|
|
54
|
+
|
|
55
|
+
kruskal_wallis_test(column: str, groups: str, alpha: float = 0.05) -> 'TestResult':
|
|
56
|
+
Perform the Kruskal-Wallis test, a non-parametric alternative to one-way ANOVA.
|
|
57
|
+
|
|
58
|
+
normality_test(column: str, method: Literal['shapiro', 'ks', 'anderson', 'jarque_bera', 'all'] = 'shapiro', test_statistic: Literal['mean', 'median', 'mode'] = 'mean', alpha: float = 0.05) -> Union['TestResult', dict]:
|
|
59
|
+
Perform normality tests using various methods.
|
|
60
|
+
|
|
61
|
+
hypothesis_test(method: Literal["mean", "difference_mean", "proportion", "variance"] = "mean", column1: str = None, column2: str = None, pop_mean: float = None, pop_proportion: Union[float, Tuple[float, float]] = 0.5, alpha: float = 0.05, homoscedasticity: Literal["levene", "bartlett", "var_test"] = "levene") -> Dict[str, Any]:
|
|
62
|
+
Perform hypothesis testing for mean, difference of means, proportion, or variance.
|
|
63
|
+
|
|
64
|
+
variance_test(column1: str, column2: str, method: Literal['levene', 'bartlett', 'var_test'] = 'levene', center: Literal['mean', 'median', 'trimmed'] = 'median', alpha: float = 0.05) -> 'TestResult':
|
|
65
|
+
Perform a test for equality of variances between two columns.
|
|
66
|
+
|
|
67
|
+
help():
|
|
68
|
+
Display a detailed help guide for the InferentialStats class and its methods.
|
|
12
69
|
"""
|
|
13
70
|
|
|
14
71
|
def __init__(self, data: Union[pd.DataFrame, np.ndarray],
|
|
15
|
-
backend: Literal['pandas', 'polars'] = 'pandas'
|
|
72
|
+
backend: Literal['pandas', 'polars'] = 'pandas',
|
|
73
|
+
sep: str = None, decimal: str = None, thousand: str = None,
|
|
74
|
+
lang: Literal['es-ES', 'en-US'] = 'es-ES'):
|
|
16
75
|
"""
|
|
17
|
-
|
|
76
|
+
Initialize DataFrame
|
|
18
77
|
|
|
19
78
|
Parameters:
|
|
20
79
|
-----------
|
|
21
80
|
data : DataFrame o ndarray
|
|
22
|
-
|
|
81
|
+
Data to analyze
|
|
23
82
|
backend : str
|
|
24
|
-
'pandas'
|
|
83
|
+
'pandas' or 'polars' for processing
|
|
25
84
|
"""
|
|
26
85
|
|
|
27
86
|
if isinstance(data, str) and os.path.exists(data):
|
|
28
87
|
data = InferentialStats.from_file(data).data
|
|
29
88
|
|
|
89
|
+
if isinstance(data, pl.DataFrame):
|
|
90
|
+
raise TypeError(
|
|
91
|
+
"Polars aún no soportado. Use pandas.DataFrame."
|
|
92
|
+
)
|
|
93
|
+
|
|
30
94
|
if isinstance(data, np.ndarray):
|
|
31
95
|
if data.ndim == 1:
|
|
32
96
|
data = pd.DataFrame({'var': data})
|
|
@@ -36,26 +100,31 @@ class InferentialStats:
|
|
|
36
100
|
self.data = data
|
|
37
101
|
self.backend = backend
|
|
38
102
|
self._numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
|
|
103
|
+
self.sep = sep
|
|
104
|
+
self.decimal = decimal
|
|
105
|
+
self.thousand = thousand
|
|
106
|
+
self.lang = lang
|
|
39
107
|
|
|
40
|
-
@
|
|
108
|
+
@classmethod
|
|
41
109
|
def from_file(path: str):
|
|
42
110
|
"""
|
|
43
111
|
Carga automática de archivos y devuelve instancia de Intelligence.
|
|
44
112
|
Soporta CSV, Excel, TXT, JSON, Parquet, Feather, TSV.
|
|
45
113
|
"""
|
|
114
|
+
|
|
46
115
|
if not os.path.exists(path):
|
|
47
|
-
raise FileNotFoundError(f"Archivo no encontrado: {path}")
|
|
116
|
+
raise FileNotFoundError(f"Archivo no encontrado / File not found: {path}")
|
|
48
117
|
|
|
49
118
|
ext = os.path.splitext(path)[1].lower()
|
|
50
119
|
|
|
51
120
|
if ext == ".csv":
|
|
52
|
-
df = pd.read_csv(path)
|
|
121
|
+
df = pd.read_csv(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
53
122
|
|
|
54
123
|
elif ext in [".xlsx", ".xls"]:
|
|
55
|
-
df = pd.read_excel(path)
|
|
124
|
+
df = pd.read_excel(path, decimal=self.decimal, thousand=self.thousand)
|
|
56
125
|
|
|
57
126
|
elif ext in [".txt", ".tsv"]:
|
|
58
|
-
df = pd.read_table(path)
|
|
127
|
+
df = pd.read_table(path, sep=self.sep, decimal=self.decimal, thousand=self.thousand)
|
|
59
128
|
|
|
60
129
|
elif ext == ".json":
|
|
61
130
|
df = pd.read_json(path)
|
|
@@ -76,14 +145,14 @@ class InferentialStats:
|
|
|
76
145
|
def confidence_interval(self, column: str, confidence: float = 0.95,
|
|
77
146
|
statistic: Literal['mean', 'median', 'proportion'] = 'mean') -> tuple:
|
|
78
147
|
"""
|
|
79
|
-
|
|
148
|
+
Confidence interval for different statistics
|
|
80
149
|
|
|
81
150
|
Parameters:
|
|
82
151
|
-----------
|
|
83
152
|
column : str
|
|
84
|
-
|
|
153
|
+
Column to analyze
|
|
85
154
|
confidence : float
|
|
86
|
-
|
|
155
|
+
Confidence level (default 0.95 = 95%)
|
|
87
156
|
statistic : str
|
|
88
157
|
'mean', 'median' o 'proportion'
|
|
89
158
|
|
|
@@ -128,9 +197,10 @@ class InferentialStats:
|
|
|
128
197
|
|
|
129
198
|
def t_test_1sample(self, column: str, popmean: float = None,
|
|
130
199
|
popmedian: float = None,
|
|
131
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided'
|
|
200
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided',
|
|
201
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
132
202
|
"""
|
|
133
|
-
|
|
203
|
+
One sample t test (for mean or median)
|
|
134
204
|
|
|
135
205
|
Parameters:
|
|
136
206
|
-----------
|
|
@@ -149,7 +219,7 @@ class InferentialStats:
|
|
|
149
219
|
|
|
150
220
|
if popmean is not None:
|
|
151
221
|
statistic, pvalue = stats.ttest_1samp(data, popmean, alternative=alternative)
|
|
152
|
-
|
|
222
|
+
|
|
153
223
|
return TestResult(
|
|
154
224
|
test_name='T-Test de Una Muestra (Media)',
|
|
155
225
|
statistic=statistic,
|
|
@@ -160,13 +230,14 @@ class InferentialStats:
|
|
|
160
230
|
'sample_mean': data.mean(),
|
|
161
231
|
'n': len(data),
|
|
162
232
|
'df': len(data) - 1
|
|
163
|
-
}
|
|
233
|
+
},
|
|
234
|
+
alpha=alpha
|
|
164
235
|
)
|
|
165
236
|
|
|
166
237
|
elif popmedian is not None:
|
|
167
238
|
# Wilcoxon signed-rank test para mediana
|
|
168
239
|
statistic, pvalue = stats.wilcoxon(data - popmedian, alternative=alternative)
|
|
169
|
-
|
|
240
|
+
|
|
170
241
|
return TestResult(
|
|
171
242
|
test_name='Wilcoxon Signed-Rank Test (Mediana)',
|
|
172
243
|
statistic=statistic,
|
|
@@ -184,7 +255,7 @@ class InferentialStats:
|
|
|
184
255
|
|
|
185
256
|
def t_test_2sample(self, column1: str, column2: str,
|
|
186
257
|
equal_var: bool = True,
|
|
187
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided') -> 'TestResult':
|
|
258
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
188
259
|
"""
|
|
189
260
|
Prueba t de dos muestras independientes
|
|
190
261
|
|
|
@@ -214,11 +285,12 @@ class InferentialStats:
|
|
|
214
285
|
'std1': data1.std(), 'std2': data2.std(),
|
|
215
286
|
'n1': len(data1), 'n2': len(data2),
|
|
216
287
|
'equal_var': equal_var
|
|
217
|
-
}
|
|
288
|
+
},
|
|
289
|
+
alpha=alpha
|
|
218
290
|
)
|
|
219
291
|
|
|
220
292
|
def t_test_paired(self, column1: str, column2: str,
|
|
221
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided') -> 'TestResult':
|
|
293
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
222
294
|
"""
|
|
223
295
|
Prueba t pareada
|
|
224
296
|
|
|
@@ -241,11 +313,12 @@ class InferentialStats:
|
|
|
241
313
|
statistic=statistic,
|
|
242
314
|
pvalue=pvalue,
|
|
243
315
|
alternative=alternative,
|
|
244
|
-
params={'mean_diff': (data1 - data2).mean(), 'n': len(data1)}
|
|
316
|
+
params={'mean_diff': (data1 - data2).mean(), 'n': len(data1)},
|
|
317
|
+
alpha=alpha
|
|
245
318
|
)
|
|
246
319
|
|
|
247
320
|
def mann_whitney_test(self, column1: str, column2: str,
|
|
248
|
-
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided') -> 'TestResult':
|
|
321
|
+
alternative: Literal['two-sided', 'less', 'greater'] = 'two-sided', alpha: float = 0.05) -> 'TestResult':
|
|
249
322
|
"""
|
|
250
323
|
Prueba de Mann-Whitney U (alternativa no paramétrica al t-test)
|
|
251
324
|
|
|
@@ -273,10 +346,12 @@ class InferentialStats:
|
|
|
273
346
|
'median2': data2.median(),
|
|
274
347
|
'n1': len(data1),
|
|
275
348
|
'n2': len(data2)
|
|
276
|
-
}
|
|
349
|
+
},
|
|
350
|
+
alpha=alpha
|
|
277
351
|
)
|
|
278
352
|
|
|
279
|
-
def chi_square_test(self, column1: str, column2: str
|
|
353
|
+
def chi_square_test(self, column1: str, column2: str,
|
|
354
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
280
355
|
"""
|
|
281
356
|
Prueba Chi-cuadrado de independencia
|
|
282
357
|
|
|
@@ -295,10 +370,12 @@ class InferentialStats:
|
|
|
295
370
|
statistic=chi2,
|
|
296
371
|
pvalue=pvalue,
|
|
297
372
|
alternative='two-sided',
|
|
298
|
-
params={'dof': dof, 'contingency_table': contingency_table}
|
|
373
|
+
params={'dof': dof, 'contingency_table': contingency_table},
|
|
374
|
+
alpha=alpha
|
|
299
375
|
)
|
|
300
376
|
|
|
301
|
-
def anova_oneway(self, column: str, groups: str
|
|
377
|
+
def anova_oneway(self, column: str, groups: str,
|
|
378
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
302
379
|
"""
|
|
303
380
|
ANOVA de un factor
|
|
304
381
|
|
|
@@ -310,11 +387,16 @@ class InferentialStats:
|
|
|
310
387
|
Variable de agrupación (categórica)
|
|
311
388
|
"""
|
|
312
389
|
from scipy import stats
|
|
390
|
+
clean_data = self.data[[column, groups]].dropna()
|
|
313
391
|
|
|
314
|
-
groups_data = [group[column].values
|
|
392
|
+
groups_data = [group[column].values
|
|
393
|
+
for _, group in clean_data.groupby(groups)
|
|
394
|
+
if len(group) > 1 and group[column].var() > 0
|
|
395
|
+
]
|
|
396
|
+
|
|
315
397
|
statistic, pvalue = stats.f_oneway(*groups_data)
|
|
316
398
|
|
|
317
|
-
return TestResult(
|
|
399
|
+
return TestResult(
|
|
318
400
|
test_name='ANOVA de Un Factor',
|
|
319
401
|
statistic=statistic,
|
|
320
402
|
pvalue=pvalue,
|
|
@@ -322,10 +404,12 @@ class InferentialStats:
|
|
|
322
404
|
params={
|
|
323
405
|
'groups': len(groups_data),
|
|
324
406
|
'n_total': sum(len(g) for g in groups_data)
|
|
325
|
-
}
|
|
407
|
+
},
|
|
408
|
+
alpha=alpha
|
|
326
409
|
)
|
|
327
410
|
|
|
328
|
-
def kruskal_wallis_test(self, column: str, groups: str
|
|
411
|
+
def kruskal_wallis_test(self, column: str, groups: str,
|
|
412
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
329
413
|
"""
|
|
330
414
|
Prueba de Kruskal-Wallis (ANOVA no paramétrico)
|
|
331
415
|
|
|
@@ -337,8 +421,13 @@ class InferentialStats:
|
|
|
337
421
|
Variable de agrupación (categórica)
|
|
338
422
|
"""
|
|
339
423
|
from scipy import stats
|
|
424
|
+
|
|
425
|
+
clean_data = self.data[[column, groups]].dropna()
|
|
340
426
|
|
|
341
|
-
groups_data = [group[column].values
|
|
427
|
+
groups_data = [group[column].values
|
|
428
|
+
for _, group in clean_data.groupby(groups)
|
|
429
|
+
if len(group) > 1 and group[column].var() > 0
|
|
430
|
+
]
|
|
342
431
|
statistic, pvalue = stats.kruskal(*groups_data)
|
|
343
432
|
|
|
344
433
|
return TestResult(
|
|
@@ -349,12 +438,14 @@ class InferentialStats:
|
|
|
349
438
|
params={
|
|
350
439
|
'groups': len(groups_data),
|
|
351
440
|
'n_total': sum(len(g) for g in groups_data)
|
|
352
|
-
}
|
|
441
|
+
},
|
|
442
|
+
alpha=alpha
|
|
353
443
|
)
|
|
354
444
|
|
|
355
445
|
def normality_test(self, column: str,
|
|
356
446
|
method: Literal['shapiro', 'ks', 'anderson', 'jarque_bera', 'all'] = 'shapiro',
|
|
357
|
-
test_statistic: Literal['mean', 'median', 'mode'] = 'mean'
|
|
447
|
+
test_statistic: Literal['mean', 'median', 'mode'] = 'mean',
|
|
448
|
+
alpha: float = 0.05) -> Union['TestResult', dict]:
|
|
358
449
|
"""
|
|
359
450
|
Prueba de normalidad con múltiples métodos y estadísticos
|
|
360
451
|
|
|
@@ -396,6 +487,9 @@ class InferentialStats:
|
|
|
396
487
|
scale = np.std(data, ddof=1)
|
|
397
488
|
else:
|
|
398
489
|
raise ValueError(f"test_statistic '{test_statistic}' no reconocido")
|
|
490
|
+
|
|
491
|
+
critical_values = None
|
|
492
|
+
significance_levels = None
|
|
399
493
|
|
|
400
494
|
if method == 'all':
|
|
401
495
|
results = {}
|
|
@@ -423,13 +517,13 @@ class InferentialStats:
|
|
|
423
517
|
|
|
424
518
|
# Anderson-Darling
|
|
425
519
|
anderson_result = stats.anderson(data, dist='norm')
|
|
426
|
-
results['anderson_darling'] =
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
520
|
+
results['anderson_darling'] = TestResult(
|
|
521
|
+
test_name=f'Anderson-Darling ({test_statistic})',
|
|
522
|
+
statistic=anderson_result.statistic,
|
|
523
|
+
critical_values=anderson_result.critical_values,
|
|
524
|
+
significance_levels=anderson_result.significance_level,
|
|
525
|
+
params={'n': n, 'test_statistic': test_statistic, 'loc': loc, 'scale': scale}
|
|
526
|
+
)
|
|
433
527
|
|
|
434
528
|
# Jarque-Bera
|
|
435
529
|
stat_jb, p_jb = stats.jarque_bera(data)
|
|
@@ -462,14 +556,12 @@ class InferentialStats:
|
|
|
462
556
|
|
|
463
557
|
elif method == 'anderson':
|
|
464
558
|
anderson_result = stats.anderson(data, dist='norm')
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
'interpretation': self._interpret_anderson(anderson_result)
|
|
472
|
-
}
|
|
559
|
+
test_name = f'Anderson-Darling ({test_statistic})'
|
|
560
|
+
pvalue = None
|
|
561
|
+
statistic = anderson_result.statistic
|
|
562
|
+
critical_values = anderson_result.critical_values
|
|
563
|
+
significance_levels = anderson_result.significance_level
|
|
564
|
+
params = {'n': n, 'test_statistic': test_statistic, 'loc': loc, 'scale': scale}
|
|
473
565
|
|
|
474
566
|
elif method == 'jarque_bera':
|
|
475
567
|
statistic, pvalue = stats.jarque_bera(data)
|
|
@@ -489,25 +581,19 @@ class InferentialStats:
|
|
|
489
581
|
statistic=statistic,
|
|
490
582
|
pvalue=pvalue,
|
|
491
583
|
alternative='two-sided',
|
|
492
|
-
params=params
|
|
584
|
+
params=params,
|
|
585
|
+
critical_values=critical_values,
|
|
586
|
+
significance_levels=significance_levels,
|
|
587
|
+
alpha=alpha
|
|
493
588
|
)
|
|
494
|
-
|
|
495
|
-
def _interpret_anderson(self, anderson_result):
|
|
496
|
-
"""Interpreta resultados de Anderson-Darling"""
|
|
497
|
-
interpretations = []
|
|
498
|
-
for i, (crit_val, sig_level) in enumerate(zip(anderson_result.critical_values,
|
|
499
|
-
anderson_result.significance_level)):
|
|
500
|
-
if anderson_result.statistic < crit_val:
|
|
501
|
-
interpretations.append(f"No se rechaza normalidad al {sig_level}% de significancia")
|
|
502
|
-
else:
|
|
503
|
-
interpretations.append(f"Se RECHAZA normalidad al {sig_level}% de significancia")
|
|
504
|
-
return interpretations
|
|
505
589
|
|
|
506
590
|
def hypothesis_test(
|
|
507
591
|
self,
|
|
508
592
|
method: Literal["mean", "difference_mean", "proportion", "variance"] = "mean",
|
|
509
593
|
column1: str = None,
|
|
510
594
|
column2: str = None,
|
|
595
|
+
pop_mean: float = None,
|
|
596
|
+
pop_proportion: Union[float, Tuple[float, float]] = 0.5,
|
|
511
597
|
alpha: float = 0.05,
|
|
512
598
|
homoscedasticity: Literal["levene", "bartlett", "var_test"] = "levene") -> Dict[str, Any]:
|
|
513
599
|
|
|
@@ -522,11 +608,14 @@ class InferentialStats:
|
|
|
522
608
|
Columnas numéricas a comparar
|
|
523
609
|
alpha : float
|
|
524
610
|
Nivel de significancia (default 0.05)
|
|
611
|
+
pop_mean : float
|
|
612
|
+
Media poblacional
|
|
613
|
+
pop_proportion : float
|
|
614
|
+
Proporción poblacional (default 0.5)
|
|
525
615
|
homoscedasticity : str
|
|
526
616
|
Método de homocedasticidad
|
|
527
617
|
'levene', 'bartlett' o 'var_test'
|
|
528
618
|
"""
|
|
529
|
-
|
|
530
619
|
data = self.data
|
|
531
620
|
|
|
532
621
|
if column1 is None:
|
|
@@ -547,7 +636,7 @@ class InferentialStats:
|
|
|
547
636
|
# --- MAIN HYPOTHESIS TESTS ---
|
|
548
637
|
if method == "mean":
|
|
549
638
|
# One-sample t-test
|
|
550
|
-
t_stat, p_value = stats.ttest_1samp(x, popmean=
|
|
639
|
+
t_stat, p_value = stats.ttest_1samp(x, popmean=pop_mean)
|
|
551
640
|
test_name = "One-sample t-test"
|
|
552
641
|
|
|
553
642
|
elif method == "difference_mean":
|
|
@@ -558,13 +647,46 @@ class InferentialStats:
|
|
|
558
647
|
|
|
559
648
|
elif method == "proportion":
|
|
560
649
|
# Proportion test (z-test)
|
|
561
|
-
|
|
650
|
+
|
|
651
|
+
x = np.asarray(x)
|
|
652
|
+
|
|
653
|
+
# Caso 1: datos ya binarios
|
|
654
|
+
unique_vals = np.unique(x)
|
|
655
|
+
if set(unique_vals).issubset({0, 1}):
|
|
656
|
+
|
|
657
|
+
if pop_proportion is None:
|
|
658
|
+
raise ValueError("Debe especificarse pop_proportion")
|
|
659
|
+
|
|
660
|
+
pop_p = pop_proportion
|
|
661
|
+
|
|
662
|
+
# Caso 2: datos continuos → binarizar
|
|
663
|
+
else:
|
|
664
|
+
if not isinstance(pop_proportion, tuple):
|
|
665
|
+
raise ValueError(
|
|
666
|
+
"Para datos continuos, pop_proportion debe ser (p0, binizar_value)"
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
pop_p, binizar_value = pop_proportion
|
|
670
|
+
x = (x > binizar_value).astype(int)
|
|
671
|
+
|
|
672
|
+
if not (0 < pop_p < 1):
|
|
673
|
+
raise ValueError("pop_proportion debe estar entre 0 y 1")
|
|
674
|
+
|
|
562
675
|
n = len(x)
|
|
563
|
-
|
|
676
|
+
p_hat = np.mean(x)
|
|
677
|
+
|
|
678
|
+
if n * pop_p < 5 or n * (1 - pop_p) < 5:
|
|
679
|
+
raise ValueError(
|
|
680
|
+
"Condiciones del Z-test no cumplidas: np0 y n(1-p0) deben ser ≥ 5"
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
z_stat = (p_hat - pop_p) / np.sqrt(pop_p * (1 - pop_p) / n)
|
|
564
684
|
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
|
|
685
|
+
|
|
565
686
|
t_stat = z_stat
|
|
566
687
|
test_name = "Proportion Z-test"
|
|
567
688
|
|
|
689
|
+
|
|
568
690
|
elif method == "variance":
|
|
569
691
|
# Classic F-test
|
|
570
692
|
var_x = np.var(x, ddof=1)
|
|
@@ -577,15 +699,19 @@ class InferentialStats:
|
|
|
577
699
|
t_stat = F
|
|
578
700
|
test_name = "Variance F-test"
|
|
579
701
|
|
|
580
|
-
|
|
581
|
-
"
|
|
582
|
-
|
|
583
|
-
"
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
702
|
+
if p_value < alpha:
|
|
703
|
+
self.interpretation = "Se RECHAZA la hipótesis nula"
|
|
704
|
+
else:
|
|
705
|
+
self.interpretation = ("Se RECHAZA la hipotesis alternativa")
|
|
706
|
+
return TestResult(
|
|
707
|
+
test_name=test_name,
|
|
708
|
+
statistic=t_stat,
|
|
709
|
+
pvalue=p_value,
|
|
710
|
+
alternative='two-sided',
|
|
711
|
+
alpha=alpha,
|
|
712
|
+
homo_result=homo_result
|
|
713
|
+
)
|
|
714
|
+
|
|
589
715
|
def _homoscedasticity_test(
|
|
590
716
|
self,
|
|
591
717
|
x,
|
|
@@ -617,8 +743,8 @@ class InferentialStats:
|
|
|
617
743
|
|
|
618
744
|
def variance_test(self, column1: str, column2: str,
|
|
619
745
|
method: Literal['levene', 'bartlett', 'var_test'] = 'levene',
|
|
620
|
-
center: Literal['mean', 'median', 'trimmed'] = 'median'
|
|
621
|
-
) -> 'TestResult':
|
|
746
|
+
center: Literal['mean', 'median', 'trimmed'] = 'median',
|
|
747
|
+
alpha: float = 0.05) -> 'TestResult':
|
|
622
748
|
"""
|
|
623
749
|
Prueba de igualdad de varianzas entre dos columnas.
|
|
624
750
|
|
|
@@ -690,23 +816,38 @@ class InferentialStats:
|
|
|
690
816
|
statistic=statistic,
|
|
691
817
|
pvalue=pvalue,
|
|
692
818
|
alternative='two-sided',
|
|
693
|
-
params=params
|
|
819
|
+
params=params,
|
|
820
|
+
alpha=alpha
|
|
694
821
|
)
|
|
695
822
|
|
|
696
823
|
|
|
697
824
|
def help(self):
|
|
698
825
|
"""
|
|
699
|
-
Muestra ayuda completa de la clase
|
|
826
|
+
Muestra ayuda completa de la clase DescriptiveStats
|
|
827
|
+
|
|
828
|
+
Parametros / Parameters:
|
|
829
|
+
------------------------
|
|
830
|
+
lang: str
|
|
831
|
+
Idioma Usuario: Codigo de Idioma (es-Es) o "Español"
|
|
832
|
+
User Language: Languaje Code (en-Us) or "English"
|
|
700
833
|
"""
|
|
701
|
-
|
|
834
|
+
|
|
835
|
+
if self.lang in ["en-US", "English", "english"]:
|
|
836
|
+
self.lang = "en-US"
|
|
837
|
+
else:
|
|
838
|
+
self.lang = "es-ES"
|
|
839
|
+
help_text = " "
|
|
840
|
+
match self.lang:
|
|
841
|
+
case "es-ES":
|
|
842
|
+
help_text = """
|
|
702
843
|
╔════════════════════════════════════════════════════════════════════════════╗
|
|
703
844
|
║ 🔬 CLASE InferentialStats - AYUDA COMPLETA ║
|
|
704
845
|
╚════════════════════════════════════════════════════════════════════════════╝
|
|
705
846
|
|
|
706
847
|
📝 DESCRIPCIÓN:
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
848
|
+
Clase para estadística inferencial: pruebas de hipótesis, intervalos de
|
|
849
|
+
confianza y pruebas de normalidad. Permite realizar inferencias sobre
|
|
850
|
+
poblaciones a partir de muestras de datos.
|
|
710
851
|
|
|
711
852
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
712
853
|
|
|
@@ -716,291 +857,595 @@ class InferentialStats:
|
|
|
716
857
|
│ 1. 📊 INTERVALOS DE CONFIANZA │
|
|
717
858
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
718
859
|
|
|
719
|
-
|
|
860
|
+
• .confidence_interval(column, confidence=0.95, statistic='mean')
|
|
720
861
|
|
|
721
862
|
Calcula intervalos de confianza para diferentes estadísticos
|
|
722
863
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
864
|
+
Parámetros:
|
|
865
|
+
column : Columna a analizar (str)
|
|
866
|
+
confidence : Nivel de confianza (float, default 0.95 = 95%)
|
|
867
|
+
statistic : 'mean', 'median' o 'proportion'
|
|
868
|
+
|
|
869
|
+
Retorna: (lower_bound, upper_bound, point_estimate)
|
|
729
870
|
|
|
730
871
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
731
872
|
│ 2. 🧪 PRUEBAS DE HIPÓTESIS - UNA MUESTRA │
|
|
732
873
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
733
874
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
Prueba t de una muestra (o Wilcoxon para mediana)
|
|
875
|
+
• .t_test_1sample(column, popmean=None, popmedian=None,
|
|
876
|
+
alternative='two-sided')
|
|
738
877
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
878
|
+
Prueba t de una muestra (o Wilcoxon para mediana)
|
|
879
|
+
|
|
880
|
+
Parámetros:
|
|
881
|
+
column : Columna a analizar
|
|
882
|
+
popmean : Media poblacional hipotética (para t-test)
|
|
883
|
+
popmedian : Mediana poblacional hipotética (para Wilcoxon)
|
|
884
|
+
alternative : 'two-sided', 'less', 'greater'
|
|
744
885
|
|
|
745
886
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
746
887
|
│ 3. 🧪 PRUEBAS DE HIPÓTESIS - DOS MUESTRAS │
|
|
747
888
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
748
889
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
890
|
+
🔹 Pruebas Paramétricas:
|
|
891
|
+
|
|
892
|
+
• .t_test_2sample(column1, column2, equal_var=True,
|
|
893
|
+
alternative='two-sided')
|
|
894
|
+
Prueba t de dos muestras independientes
|
|
895
|
+
|
|
896
|
+
• .t_test_paired(column1, column2, alternative='two-sided')
|
|
897
|
+
Prueba t pareada (muestras dependientes)
|
|
898
|
+
|
|
899
|
+
🔹 Pruebas No Paramétricas:
|
|
900
|
+
|
|
901
|
+
• .mann_whitney_test(column1, column2, alternative='two-sided')
|
|
902
|
+
Alternativa no paramétrica al t-test de dos muestras
|
|
903
|
+
|
|
904
|
+
🔹 Pruebas Extras:
|
|
905
|
+
• .hypothesis_test(method='mean', column1=None, column2=None,
|
|
906
|
+
alpha=0.05, homoscedasticity='levene')
|
|
907
|
+
• .variance_test(column1, column2, method='levene', center='median')
|
|
767
908
|
|
|
768
909
|
|
|
769
910
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
770
911
|
│ 4. 🧪 PRUEBAS PARA MÚLTIPLES GRUPOS │
|
|
771
912
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
772
913
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
914
|
+
🔹 Pruebas Paramétricas:
|
|
915
|
+
|
|
916
|
+
• .anova_oneway(column, groups)
|
|
917
|
+
ANOVA de un factor para comparar múltiples grupos
|
|
918
|
+
|
|
919
|
+
🔹 Pruebas No Paramétricas:
|
|
920
|
+
|
|
921
|
+
• .kruskal_wallis_test(column, groups)
|
|
922
|
+
Alternativa no paramétrica a ANOVA
|
|
782
923
|
|
|
783
924
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
784
925
|
│ 5. 🧪 PRUEBAS PARA VARIABLES CATEGÓRICAS │
|
|
785
926
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
786
927
|
|
|
787
|
-
|
|
788
|
-
|
|
928
|
+
• .chi_square_test(column1, column2)
|
|
929
|
+
Prueba Chi-cuadrado de independencia entre variables categóricas
|
|
789
930
|
|
|
790
931
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
791
932
|
│ 6. 📈 PRUEBAS DE NORMALIDAD │
|
|
792
933
|
└────────────────────────────────────────────────────────────────────────────┘
|
|
793
934
|
|
|
794
|
-
|
|
935
|
+
• .normality_test(column, method='shapiro', test_statistic='mean')
|
|
795
936
|
|
|
796
|
-
|
|
937
|
+
Prueba si los datos siguen una distribución normal
|
|
797
938
|
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
939
|
+
Métodos disponibles:
|
|
940
|
+
'shapiro' : Shapiro-Wilk (mejor para n ≤ 5000)
|
|
941
|
+
'ks' : Kolmogorov-Smirnov
|
|
942
|
+
'anderson' : Anderson-Darling
|
|
943
|
+
'jarque_bera' : Jarque-Bera (basado en asimetría y curtosis)
|
|
944
|
+
'all' : Ejecuta todos los tests
|
|
804
945
|
|
|
805
|
-
|
|
946
|
+
test_statistic: 'mean', 'median' o 'mode' para centrar la distribución
|
|
806
947
|
|
|
807
948
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
808
949
|
|
|
809
950
|
💡 EJEMPLOS DE USO:
|
|
810
951
|
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
952
|
+
┌─ Ejemplo 1: Intervalos de Confianza ────────────────────────────────────┐
|
|
953
|
+
│ from inferential import InferentialStats │
|
|
954
|
+
│ import pandas as pd │
|
|
955
|
+
│ │
|
|
956
|
+
│ df = pd.read_csv('datos.csv') │
|
|
957
|
+
│ inf_stats = InferentialStats(df) │
|
|
958
|
+
│ │
|
|
959
|
+
│ # IC para la media (95%) │
|
|
960
|
+
│ lower, upper, mean = inf_stats.confidence_interval( │
|
|
961
|
+
│ 'salario', │
|
|
962
|
+
│ confidence=0.95, │
|
|
963
|
+
│ statistic='mean' │
|
|
964
|
+
│ ) │
|
|
965
|
+
│ print(f"IC 95%: [{lower:.2f}, {upper:.2f}]") │
|
|
966
|
+
│ │
|
|
967
|
+
│ # IC para la mediana (bootstrap) │
|
|
968
|
+
│ lower, upper, median = inf_stats.confidence_interval( │
|
|
969
|
+
│ 'edad', │
|
|
970
|
+
│ confidence=0.99, │
|
|
971
|
+
│ statistic='median' │
|
|
972
|
+
│ ) │
|
|
973
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
974
|
+
|
|
975
|
+
┌─ Ejemplo 2: Prueba t de Una Muestra ────────────────────────────────────┐
|
|
976
|
+
│ # H0: μ = 50000 (la media salarial es 50000) │
|
|
977
|
+
│ # H1: μ ≠ 50000 (prueba bilateral) │
|
|
978
|
+
│ │
|
|
979
|
+
│ resultado = inf_stats.t_test_1sample( │
|
|
980
|
+
│ column='salario', │
|
|
981
|
+
│ popmean=50000, │
|
|
982
|
+
│ alternative='two-sided' │
|
|
983
|
+
│ ) │
|
|
984
|
+
│ │
|
|
985
|
+
│ print(resultado) │
|
|
986
|
+
│ # Muestra: estadístico t, valor p, interpretación │
|
|
987
|
+
│ │
|
|
988
|
+
│ # Prueba unilateral │
|
|
989
|
+
│ resultado = inf_stats.t_test_1sample( │
|
|
990
|
+
│ column='salario', │
|
|
991
|
+
│ popmean=50000, │
|
|
992
|
+
│ alternative='greater' # H1: μ > 50000 │
|
|
993
|
+
│ ) │
|
|
994
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
995
|
+
|
|
996
|
+
┌─ Ejemplo 3: Comparación de Dos Grupos ──────────────────────────────────┐
|
|
997
|
+
│ # Prueba t independiente │
|
|
998
|
+
│ resultado = inf_stats.t_test_2sample( │
|
|
999
|
+
│ column1='salario_hombres', │
|
|
1000
|
+
│ column2='salario_mujeres', │
|
|
1001
|
+
│ equal_var=True, │
|
|
1002
|
+
│ alternative='two-sided' │
|
|
1003
|
+
│ ) │
|
|
1004
|
+
│ print(resultado) │
|
|
1005
|
+
│ │
|
|
1006
|
+
│ # Prueba Mann-Whitney (no paramétrica) │
|
|
1007
|
+
│ resultado = inf_stats.mann_whitney_test( │
|
|
1008
|
+
│ column1='salario_grupo_a', │
|
|
1009
|
+
│ column2='salario_grupo_b', │
|
|
1010
|
+
│ alternative='two-sided' │
|
|
1011
|
+
│ ) │
|
|
1012
|
+
│ │
|
|
1013
|
+
│ # Prueba t pareada (mediciones antes/después) │
|
|
1014
|
+
│ resultado = inf_stats.t_test_paired( │
|
|
1015
|
+
│ column1='peso_antes', │
|
|
1016
|
+
│ column2='peso_despues', │
|
|
1017
|
+
│ alternative='two-sided' │
|
|
1018
|
+
│ ) │
|
|
1019
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1020
|
+
|
|
1021
|
+
┌─ Ejemplo 4: ANOVA y Kruskal-Wallis ─────────────────────────────────────┐
|
|
1022
|
+
│ # ANOVA para comparar múltiples grupos │
|
|
1023
|
+
│ resultado = inf_stats.anova_oneway( │
|
|
1024
|
+
│ column='rendimiento', │
|
|
1025
|
+
│ groups='departamento' │
|
|
1026
|
+
│ ) │
|
|
1027
|
+
│ print(resultado) │
|
|
1028
|
+
│ │
|
|
1029
|
+
│ # Kruskal-Wallis (alternativa no paramétrica) │
|
|
1030
|
+
│ resultado = inf_stats.kruskal_wallis_test( │
|
|
1031
|
+
│ column='satisfaccion', │
|
|
1032
|
+
│ groups='categoria' │
|
|
1033
|
+
│ ) │
|
|
1034
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1035
|
+
|
|
1036
|
+
┌─ Ejemplo 5: Chi-Cuadrado ───────────────────────────────────────────────┐
|
|
1037
|
+
│ # Probar independencia entre variables categóricas │
|
|
1038
|
+
│ resultado = inf_stats.chi_square_test( │
|
|
1039
|
+
│ column1='genero', │
|
|
1040
|
+
│ column2='preferencia_producto' │
|
|
1041
|
+
│ ) │
|
|
1042
|
+
│ print(resultado) │
|
|
1043
|
+
│ │
|
|
1044
|
+
│ # El resultado incluye la tabla de contingencia │
|
|
1045
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1046
|
+
|
|
1047
|
+
┌─ Ejemplo 6: Pruebas de Normalidad ──────────────────────────────────────┐
|
|
1048
|
+
│ # Shapiro-Wilk (recomendado para n ≤ 5000) │
|
|
1049
|
+
│ resultado = inf_stats.normality_test( │
|
|
1050
|
+
│ column='edad', │
|
|
1051
|
+
│ method='shapiro', │
|
|
1052
|
+
│ test_statistic='mean' │
|
|
1053
|
+
│ ) │
|
|
1054
|
+
│ print(resultado) │
|
|
1055
|
+
│ │
|
|
1056
|
+
│ # Kolmogorov-Smirnov │
|
|
1057
|
+
│ resultado = inf_stats.normality_test( │
|
|
1058
|
+
│ column='salario', │
|
|
1059
|
+
│ method='ks' │
|
|
1060
|
+
│ ) │
|
|
1061
|
+
│ │
|
|
1062
|
+
│ # Ejecutar todos los tests │
|
|
1063
|
+
│ resultados = inf_stats.normality_test( │
|
|
1064
|
+
│ column='ingresos', │
|
|
1065
|
+
│ method='all', │
|
|
1066
|
+
│ test_statistic='median' │
|
|
1067
|
+
│ ) │
|
|
1068
|
+
│ │
|
|
1069
|
+
│ # Acceder a cada test │
|
|
1070
|
+
│ print(resultados['shapiro']) │
|
|
1071
|
+
│ print(resultados['kolmogorov_smirnov']) │
|
|
1072
|
+
│ print(resultados['anderson_darling']) │
|
|
1073
|
+
│ print(resultados['jarque_bera']) │
|
|
1074
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
934
1075
|
|
|
935
1076
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
936
1077
|
|
|
937
1078
|
📊 GUÍA DE SELECCIÓN DE PRUEBAS:
|
|
938
1079
|
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
1080
|
+
┌─ Comparar Una Muestra vs Valor de Referencia ───────────────────────────┐
|
|
1081
|
+
│ Datos normales → t_test_1sample (con popmean) │
|
|
1082
|
+
│ Datos no normales → t_test_1sample (con popmedian, usa Wilcoxon) │
|
|
1083
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
943
1084
|
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
1085
|
+
┌─ Comparar Dos Grupos Independientes ────────────────────────────────────┐
|
|
1086
|
+
│ Datos normales → t_test_2sample │
|
|
1087
|
+
│ Datos no normales → mann_whitney_test │
|
|
1088
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
948
1089
|
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
1090
|
+
┌─ Comparar Dos Grupos Pareados ──────────────────────────────────────────┐
|
|
1091
|
+
│ Datos normales → t_test_paired │
|
|
1092
|
+
│ Datos no normales → (use scipy.stats.wilcoxon directamente) │
|
|
1093
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
953
1094
|
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
1095
|
+
┌─ Comparar Múltiples Grupos ─────────────────────────────────────────────┐
|
|
1096
|
+
│ Datos normales → anova_oneway │
|
|
1097
|
+
│ Datos no normales → kruskal_wallis_test │
|
|
1098
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
958
1099
|
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
1100
|
+
┌─ Probar Independencia entre Categóricas ────────────────────────────────┐
|
|
1101
|
+
│ Variables categóricas → chi_square_test │
|
|
1102
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
962
1103
|
|
|
963
1104
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
964
1105
|
|
|
965
1106
|
🎯 CARACTERÍSTICAS CLAVE:
|
|
966
1107
|
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1108
|
+
✓ Pruebas paramétricas y no paramétricas
|
|
1109
|
+
✓ Intervalos de confianza con múltiples métodos
|
|
1110
|
+
✓ Pruebas de normalidad completas
|
|
1111
|
+
✓ Interpretación automática de resultados
|
|
1112
|
+
✓ Manejo automático de valores faltantes
|
|
1113
|
+
✓ Salidas formateadas profesionales
|
|
1114
|
+
✓ Soporte para análisis bilateral y unilateral
|
|
974
1115
|
|
|
975
1116
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
976
1117
|
|
|
977
1118
|
⚠️ INTERPRETACIÓN DE RESULTADOS:
|
|
978
1119
|
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
1120
|
+
• Valor p < 0.05: Se rechaza H0 (evidencia significativa)
|
|
1121
|
+
• Valor p ≥ 0.05: No se rechaza H0 (evidencia insuficiente)
|
|
1122
|
+
• IC que no incluye el valor nulo: Evidencia contra H0
|
|
982
1123
|
|
|
983
1124
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
984
1125
|
|
|
985
1126
|
📚 DOCUMENTACIÓN ADICIONAL:
|
|
986
|
-
|
|
987
|
-
|
|
1127
|
+
Para más información sobre métodos específicos, use:
|
|
1128
|
+
help(InferentialStats.nombre_metodo)
|
|
988
1129
|
|
|
989
1130
|
╚════════════════════════════════════════════════════════════════════════════╝
|
|
990
1131
|
"""
|
|
1132
|
+
case "en-US":
|
|
1133
|
+
help_text = """
|
|
1134
|
+
╔════════════════════════════════════════════════════════════════════════════╗
|
|
1135
|
+
║ 🔬 CLASS InferentialStats - COMPLETE HELP ║
|
|
1136
|
+
╚════════════════════════════════════════════════════════════════════════════╝
|
|
1137
|
+
|
|
1138
|
+
📝 DESCRIPTION:
|
|
1139
|
+
Class for inferential statistics: hypothesis tests, intervals
|
|
1140
|
+
confidence and normality tests. Allows inferences to be made about
|
|
1141
|
+
populations from data samples.
|
|
1142
|
+
|
|
1143
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1144
|
+
|
|
1145
|
+
📋 MAIN METHODS:
|
|
1146
|
+
|
|
1147
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1148
|
+
│ 1. 📊 CONFIDENCE INTERVALS │
|
|
1149
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1150
|
+
|
|
1151
|
+
• .confidence_interval(column, confidence=0.95, statistic='mean')
|
|
1152
|
+
|
|
1153
|
+
Calculate confidence intervals for different statistics
|
|
1154
|
+
|
|
1155
|
+
Parameters:
|
|
1156
|
+
column : Column to analyze (str)
|
|
1157
|
+
confidence : Confidence level (float, default 0.95 = 95%)
|
|
1158
|
+
statistic : 'mean', 'median' or 'proportion'
|
|
1159
|
+
|
|
1160
|
+
Return: (lower_bound, upper_bound, point_estimate)
|
|
1161
|
+
|
|
1162
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1163
|
+
│ 2. 🧪 HYPOTHESIS TESTING - A SAMPLE │
|
|
1164
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1165
|
+
|
|
1166
|
+
• .t_test_1sample(column, popmean=None, popmedian=None,
|
|
1167
|
+
alternative='two-sided')
|
|
1168
|
+
|
|
1169
|
+
One sample t test (or Wilcoxon for median)
|
|
1170
|
+
|
|
1171
|
+
Parameters:
|
|
1172
|
+
column : Column to analyze
|
|
1173
|
+
popmean : Hypothetical population mean (for t-test)
|
|
1174
|
+
popmedian : Hypothetical population median (for Wilcoxon)
|
|
1175
|
+
alternative : 'two-sided', 'less', 'greater'
|
|
1176
|
+
|
|
1177
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1178
|
+
│ 3. 🧪 HYPOTHESIS TESTING - TWO SAMPLES │
|
|
1179
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1180
|
+
|
|
1181
|
+
🔹 Parametric Tests:
|
|
1182
|
+
|
|
1183
|
+
• .t_test_2sample(column1, column2, equal_var=True,
|
|
1184
|
+
alternative='two-sided')
|
|
1185
|
+
Two independent samples t test
|
|
1186
|
+
|
|
1187
|
+
• .t_test_paired(column1, column2, alternative='two-sided')
|
|
1188
|
+
Paired t test (dependent samples)
|
|
1189
|
+
|
|
1190
|
+
🔹 Non-Parametric Tests:
|
|
1191
|
+
|
|
1192
|
+
• .mann_whitney_test(column1, column2, alternative='two-sided')
|
|
1193
|
+
Non-parametric alternative to the two-sample t-test
|
|
1194
|
+
|
|
1195
|
+
🔹 Extra Tests:
|
|
1196
|
+
• .hypothesis_test(method='mean', column1=None, column2=None,
|
|
1197
|
+
alpha=0.05, homoscedasticity='levene')
|
|
1198
|
+
• .variance_test(column1, column2, method='levene', center='median')
|
|
1199
|
+
|
|
1200
|
+
|
|
1201
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1202
|
+
│ 4. 🧪 TESTING FOR MULTIPLE GROUPS │
|
|
1203
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1204
|
+
|
|
1205
|
+
🔹 Parametric Tests:
|
|
1206
|
+
|
|
1207
|
+
• .anova_oneway(column, groups)
|
|
1208
|
+
One-way ANOVA to compare multiple groups
|
|
1209
|
+
|
|
1210
|
+
🔹 Non-Parametric Tests:
|
|
1211
|
+
|
|
1212
|
+
• .kruskal_wallis_test(column, groups)
|
|
1213
|
+
Non-parametric alternative to ANOVA
|
|
1214
|
+
|
|
1215
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1216
|
+
│ 5. 🧪 TESTS FOR CATEGORICAL VARIABLES │
|
|
1217
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1218
|
+
|
|
1219
|
+
• .chi_square_test(column1, column2)
|
|
1220
|
+
Chi-square test of independence between categorical variables
|
|
1221
|
+
|
|
1222
|
+
┌────────────────────────────────────────────────────────────────────────────┐
|
|
1223
|
+
│ 6. 📈 NORMALITY TESTS │
|
|
1224
|
+
└────────────────────────────────────────────────────────────────────────────┘
|
|
1225
|
+
|
|
1226
|
+
• .normality_test(column, method='shapiro', test_statistic='mean')
|
|
1227
|
+
|
|
1228
|
+
Tests whether the data follows a normal distribution
|
|
1229
|
+
|
|
1230
|
+
Available methods:
|
|
1231
|
+
'shapiro' : Shapiro-Wilk (best for n ≤ 5000)
|
|
1232
|
+
'ks' : Kolmogorov-Smirnov
|
|
1233
|
+
'anderson' : Anderson-Darling
|
|
1234
|
+
'jarque_bera' : Jarque-Bera (based on skewness and kurtosis)
|
|
1235
|
+
'all' : Run all tests
|
|
1236
|
+
|
|
1237
|
+
test_statistic: 'mean', 'median' o 'mode' to focus the distribution
|
|
1238
|
+
|
|
1239
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1240
|
+
|
|
1241
|
+
💡 EXAMPLES OF USE:
|
|
1242
|
+
|
|
1243
|
+
┌─ Example 1: Confidence Intervals ───────────────────────────────────────┐
|
|
1244
|
+
│ from inferential import InferentialStats │
|
|
1245
|
+
│ import pandas as pd │
|
|
1246
|
+
│ │
|
|
1247
|
+
│ df = pd.read_csv('data.csv') │
|
|
1248
|
+
│ inf_stats = InferentialStats(df) │
|
|
1249
|
+
│ │
|
|
1250
|
+
│ # CI for mean (95%) │
|
|
1251
|
+
│ lower, upper, mean = inf_stats.confidence_interval( │
|
|
1252
|
+
│ 'salario', │
|
|
1253
|
+
│ confidence=0.95, │
|
|
1254
|
+
│ statistic='mean' │
|
|
1255
|
+
│ ) │
|
|
1256
|
+
│ print(f"IC 95%: [{lower:.2f}, {upper:.2f}]") │
|
|
1257
|
+
│ │
|
|
1258
|
+
│ # CI for the median (bootstrap) │
|
|
1259
|
+
│ lower, upper, median = inf_stats.confidence_interval( │
|
|
1260
|
+
│ 'edad', │
|
|
1261
|
+
│ confidence=0.99, │
|
|
1262
|
+
│ statistic='median' │
|
|
1263
|
+
│ ) │
|
|
1264
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1265
|
+
|
|
1266
|
+
┌─ Example 2: One Sample t-test ──────────────────────────────────────────┐
|
|
1267
|
+
│ # H0: μ = 50000 (the average salary is 50,000) │
|
|
1268
|
+
│ # H1: μ ≠ 50000 (two-sided test) │
|
|
1269
|
+
│ │
|
|
1270
|
+
│ result = inf_stats.t_test_1sample( │
|
|
1271
|
+
│ column='salary', │
|
|
1272
|
+
│ popmean=50000, │
|
|
1273
|
+
│ alternative='two-sided' │
|
|
1274
|
+
│ ) │
|
|
1275
|
+
│ │
|
|
1276
|
+
│ print(result) │
|
|
1277
|
+
│ # Sample: t-statistic, p-value, interpretation │
|
|
1278
|
+
│ │
|
|
1279
|
+
│ # One-sided test │
|
|
1280
|
+
│ result = inf_stats.t_test_1sample( │
|
|
1281
|
+
│ column='salary', │
|
|
1282
|
+
│ popmean=50000, │
|
|
1283
|
+
│ alternative='greater' # H1: μ > 50000 │
|
|
1284
|
+
│ ) │
|
|
1285
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1286
|
+
|
|
1287
|
+
┌─ Example 3: Comparison of Two Groups ───────────────────────────────────┐
|
|
1288
|
+
│ # Independent t test │
|
|
1289
|
+
│ result = inf_stats.t_test_2sample( │
|
|
1290
|
+
│ column1='men_salary', │
|
|
1291
|
+
│ column2='women_salary', │
|
|
1292
|
+
│ equal_var=True, │
|
|
1293
|
+
│ alternative='two-sided' │
|
|
1294
|
+
│ ) │
|
|
1295
|
+
│ print(result) │
|
|
1296
|
+
│ │
|
|
1297
|
+
│ # Mann-Whitney test (non-parametric) │
|
|
1298
|
+
│ result = inf_stats.mann_whitney_test( │
|
|
1299
|
+
│ column1='salary_group_a', │
|
|
1300
|
+
│ column2='salary_group_b', │
|
|
1301
|
+
│ alternative='two-sided' │
|
|
1302
|
+
│ ) │
|
|
1303
|
+
│ │
|
|
1304
|
+
│ # Paired t-test (before/after measurements) │
|
|
1305
|
+
│ result = inf_stats.t_test_paired( │
|
|
1306
|
+
│ column1='weight_before', │
|
|
1307
|
+
│ column2='after_weight', │
|
|
1308
|
+
│ alternative='two-sided' │
|
|
1309
|
+
│) │
|
|
1310
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1311
|
+
|
|
1312
|
+
┌─ Example 4: ANOVA and Kruskal-Wallis ───────────────────────────────────┐
|
|
1313
|
+
│ # ANOVA to compare multiple groups │
|
|
1314
|
+
│ result = inf_stats.anova_oneway( │
|
|
1315
|
+
│ column='performance', │
|
|
1316
|
+
│ groups='department' │
|
|
1317
|
+
│ ) │
|
|
1318
|
+
│ print(result) │
|
|
1319
|
+
│ │
|
|
1320
|
+
│ # Kruskal-Wallis (non-parametric alternative) │
|
|
1321
|
+
│ result = inf_stats.kruskal_wallis_test( │
|
|
1322
|
+
│ column='satisfaction', │
|
|
1323
|
+
│ groups='category' │
|
|
1324
|
+
│) │
|
|
1325
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1326
|
+
|
|
1327
|
+
┌─ Example 5: Chi-Square ─────────────────────────────────────────────────┐
|
|
1328
|
+
│ # Test independence between categorical variables │
|
|
1329
|
+
│ result = inf_stats.chi_square_test( │
|
|
1330
|
+
│ column1='gender', │
|
|
1331
|
+
│ column2='product_preference' │
|
|
1332
|
+
│ ) │
|
|
1333
|
+
│ print(result) │
|
|
1334
|
+
│ │
|
|
1335
|
+
│ # The result includes the contingency table │
|
|
1336
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1337
|
+
|
|
1338
|
+
┌─ Example 6: Normality Tests ────────────────────────────────────────────┐
|
|
1339
|
+
│ # Shapiro-Wilk (recommended for n ≤ 5000) │
|
|
1340
|
+
│ result = inf_stats.normality_test( │
|
|
1341
|
+
│ column='age', │
|
|
1342
|
+
│ method='shapiro', │
|
|
1343
|
+
│ test_statistic='mean' │
|
|
1344
|
+
│ ) │
|
|
1345
|
+
│ print(result) │
|
|
1346
|
+
│ │
|
|
1347
|
+
│ # Kolmogorov-Smirnov │
|
|
1348
|
+
│ result = inf_stats.normality_test( │
|
|
1349
|
+
│ column='salary', │
|
|
1350
|
+
│ method='ks' │
|
|
1351
|
+
│ ) │
|
|
1352
|
+
│ │
|
|
1353
|
+
│ # Run all tests │
|
|
1354
|
+
│ results = inf_stats.normality_test( │
|
|
1355
|
+
│ column='income', │
|
|
1356
|
+
│ method='all', │
|
|
1357
|
+
│ test_statistic='median' │
|
|
1358
|
+
│ ) │
|
|
1359
|
+
│ │
|
|
1360
|
+
│ # Access each test │
|
|
1361
|
+
│ print(results['shapiro']) │
|
|
1362
|
+
│ print(results['kolmogorov_smirnov']) │
|
|
1363
|
+
│ print(results['anderson_darling']) │
|
|
1364
|
+
│ print(results['jarque_bera']) │
|
|
1365
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1366
|
+
|
|
1367
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1368
|
+
|
|
1369
|
+
📊 GUÍA DE SELECCIÓN DE PRUEBAS:
|
|
1370
|
+
|
|
1371
|
+
┌─ Compare A Sample vs Reference Value ───────────────────────────────────┐
|
|
1372
|
+
│ Normal data → t_test_1sample (with mean) │
|
|
1373
|
+
│ Non-normal data → t_test_1sample (with popmedian, uses Wilcoxon) │
|
|
1374
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1375
|
+
|
|
1376
|
+
┌─ Compare Two Independent Groups ────────────────────────────────────────┐
|
|
1377
|
+
│ Normal data → t_test_2sample │
|
|
1378
|
+
│ Non-normal data → mann_whitney_test │
|
|
1379
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1380
|
+
|
|
1381
|
+
┌─ Compare Two Paired Groups ─────────────────────────────────────────────┐
|
|
1382
|
+
│ Normal data → t_test_paired │
|
|
1383
|
+
│ Non-normal data → (use scipy.stats.wilcoxon directly) │
|
|
1384
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1385
|
+
|
|
1386
|
+
┌─ Compare Multiple Groups ───────────────────────────────────────────────┐
|
|
1387
|
+
│ Normal data → anova_oneway │
|
|
1388
|
+
│ Non-normal data → kruskal_wallis_test │
|
|
1389
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1390
|
+
|
|
1391
|
+
┌─ Testing Independence between Categories ───────────────────────────────┐
|
|
1392
|
+
│ Categorical variables → chi_square_test │
|
|
1393
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
1394
|
+
|
|
1395
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1396
|
+
|
|
1397
|
+
🎯 KEY FEATURES:
|
|
1398
|
+
|
|
1399
|
+
✓ Parametric and non-parametric tests
|
|
1400
|
+
✓ Confidence intervals with multiple methods
|
|
1401
|
+
✓ Complete normality tests
|
|
1402
|
+
✓ Automatic interpretation of results
|
|
1403
|
+
✓ Automatic handling of missing values
|
|
1404
|
+
✓ Professional formatted outputs
|
|
1405
|
+
✓ Support for bilateral and unilateral analysis
|
|
1406
|
+
|
|
1407
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1408
|
+
|
|
1409
|
+
⚠️ INTERPRETATION OF RESULTS:
|
|
1410
|
+
|
|
1411
|
+
• P value < 0.05: H0 is rejected (significant evidence)
|
|
1412
|
+
• P value ≥ 0.05: H0 is not rejected (insufficient evidence)
|
|
1413
|
+
• CI that does not include the null value: Evidence against H0
|
|
1414
|
+
|
|
1415
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1416
|
+
|
|
1417
|
+
📚 ADDITIONAL DOCUMENTATION:
|
|
1418
|
+
For more information on specific methods, use:
|
|
1419
|
+
help(InferentialStats.method_name)
|
|
1420
|
+
|
|
1421
|
+
╚════════════════════════════════════════════════════════════════════════════╝
|
|
1422
|
+
"""
|
|
991
1423
|
print(help_text)
|
|
992
1424
|
|
|
993
1425
|
@dataclass
|
|
994
1426
|
class TestResult:
|
|
995
1427
|
"""Clase para resultados de pruebas de hipótesis"""
|
|
996
1428
|
|
|
997
|
-
def __init__(self, test_name: str, statistic: float,
|
|
998
|
-
|
|
1429
|
+
def __init__(self, test_name: str, statistic: float, alpha: float = 0.05,
|
|
1430
|
+
params: dict = None, pvalue: float = None,
|
|
1431
|
+
alternative: str = None, critical_values=None,
|
|
1432
|
+
significance_levels=None, homo_result=None):
|
|
999
1433
|
self.test_name = test_name
|
|
1000
1434
|
self.statistic = statistic
|
|
1001
1435
|
self.pvalue = pvalue
|
|
1002
1436
|
self.alternative = alternative
|
|
1003
1437
|
self.params = params
|
|
1438
|
+
self.critical_values = critical_values
|
|
1439
|
+
self.significance_levels = significance_levels
|
|
1440
|
+
self.interpretation = "Aun no hay interpretacion"
|
|
1441
|
+
self.homo_result = homo_result
|
|
1442
|
+
self.alpha = alpha
|
|
1443
|
+
|
|
1444
|
+
if self.pvalue is not None:
|
|
1445
|
+
if self.pvalue < self.alpha:
|
|
1446
|
+
self.interpretation = "Se RECHAZA la hipótesis nula"
|
|
1447
|
+
else:
|
|
1448
|
+
self.interpretation = "Se RECHAZA la hipótesis alternativa"
|
|
1004
1449
|
|
|
1005
1450
|
def __repr__(self):
|
|
1006
1451
|
return self._format_output()
|
|
@@ -1014,28 +1459,83 @@ class TestResult:
|
|
|
1014
1459
|
output.append(f"Fecha: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
1015
1460
|
output.append(f"Hipótesis Alternativa: {self.alternative}")
|
|
1016
1461
|
output.append("-" * 80)
|
|
1017
|
-
|
|
1462
|
+
|
|
1018
1463
|
output.append("\nRESULTADOS:")
|
|
1019
1464
|
output.append("-" * 80)
|
|
1020
1465
|
output.append(f"{'Estadístico':<40} {self.statistic:>20.6f}")
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1466
|
+
|
|
1467
|
+
# Mostrar valores críticos o p-value
|
|
1468
|
+
if self.critical_values is not None and self.significance_levels is not None:
|
|
1469
|
+
output.append("Valores Críticos:")
|
|
1470
|
+
for sl, cv in zip(self.significance_levels, self.critical_values):
|
|
1471
|
+
output.append(f" α = {sl:>6.3f} → {cv:.6f}")
|
|
1472
|
+
elif self.pvalue is not None:
|
|
1473
|
+
output.append(f"{'Valor p':<40} {self.pvalue:>20.6e}")
|
|
1474
|
+
|
|
1475
|
+
# -------------------------
|
|
1476
|
+
# INTERPRETACIÓN
|
|
1477
|
+
# -------------------------
|
|
1030
1478
|
output.append("\nINTERPRETACIÓN:")
|
|
1031
1479
|
output.append("-" * 80)
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1480
|
+
|
|
1481
|
+
alpha = 0.05
|
|
1482
|
+
|
|
1483
|
+
# Caso tests con p-value
|
|
1484
|
+
if self.pvalue is not None:
|
|
1485
|
+
output.append(f"Alpha = {alpha}")
|
|
1486
|
+
|
|
1487
|
+
if self.pvalue < alpha:
|
|
1488
|
+
output.append("❌ Se RECHAZA la hipótesis nula")
|
|
1489
|
+
else:
|
|
1490
|
+
output.append("✔️ No hay evidencia suficiente para rechazar la hipótesis nula")
|
|
1491
|
+
|
|
1492
|
+
# Caso tests con valores críticos (ej. Anderson-Darling)
|
|
1493
|
+
else:
|
|
1494
|
+
# Protección mínima
|
|
1495
|
+
if self.significance_levels is None or self.critical_values is None:
|
|
1496
|
+
output.append("Resultado no disponible")
|
|
1497
|
+
else:
|
|
1498
|
+
idx = min(
|
|
1499
|
+
range(len(self.significance_levels)),
|
|
1500
|
+
key=lambda i: abs(self.significance_levels[i] - alpha)
|
|
1501
|
+
)
|
|
1502
|
+
|
|
1503
|
+
critical_value = self.critical_values[idx]
|
|
1504
|
+
|
|
1505
|
+
output.append(f"Nivel de significancia (α) = {alpha}")
|
|
1506
|
+
output.append(f"Estadístico A² = {self.statistic:.4f}")
|
|
1507
|
+
output.append(f"Valor crítico = {critical_value:.4f}")
|
|
1508
|
+
|
|
1509
|
+
if self.statistic > critical_value:
|
|
1510
|
+
output.append("❌ Se RECHAZA la hipótesis nula")
|
|
1511
|
+
else:
|
|
1512
|
+
output.append("✔️ No hay evidencia suficiente para rechazar la hipótesis nula")
|
|
1513
|
+
|
|
1514
|
+
# -------------------------
|
|
1515
|
+
# HOMOCEDASTICIDAD
|
|
1516
|
+
# -------------------------
|
|
1517
|
+
if isinstance(self.homo_result, dict):
|
|
1518
|
+
homo = self.homo_result
|
|
1519
|
+
|
|
1520
|
+
if isinstance(homo, dict):
|
|
1521
|
+
output.append("\nTEST DE HOMOCEDASTICIDAD:")
|
|
1522
|
+
output.append(f"Método: {homo['method']}")
|
|
1523
|
+
output.append(f"Estadístico: {homo['statistic']:.6f}")
|
|
1524
|
+
output.append(f"Valor p: {homo['p_value']:.6e}")
|
|
1525
|
+
|
|
1526
|
+
if homo.get("equal_var") is True:
|
|
1527
|
+
output.append("✔️ Se asume igualdad de varianzas")
|
|
1528
|
+
elif homo.get("equal_var") is False:
|
|
1529
|
+
output.append("❌ No se asume igualdad de varianzas")
|
|
1530
|
+
|
|
1531
|
+
# -------------------------
|
|
1532
|
+
# PARÁMETROS
|
|
1533
|
+
# -------------------------
|
|
1534
|
+
if isinstance(self.params, dict):
|
|
1535
|
+
output.append("\nPARÁMETROS:")
|
|
1536
|
+
output.append("-" * 80)
|
|
1537
|
+
for k, v in self.params.items():
|
|
1538
|
+
output.append(f"{k:<40} {str(v):>20}")
|
|
1539
|
+
|
|
1040
1540
|
output.append("=" * 80)
|
|
1041
1541
|
return "\n".join(output)
|