statslibx 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statslibx/__init__.py +2 -2
- statslibx/descriptive.py +235 -148
- statslibx/inferential.py +100 -72
- statslibx/utils.py +427 -60
- {statslibx-0.1.5.dist-info → statslibx-0.1.6.dist-info}/METADATA +1 -1
- {statslibx-0.1.5.dist-info → statslibx-0.1.6.dist-info}/RECORD +8 -8
- {statslibx-0.1.5.dist-info → statslibx-0.1.6.dist-info}/WHEEL +0 -0
- {statslibx-0.1.5.dist-info → statslibx-0.1.6.dist-info}/top_level.txt +0 -0
statslibx/__init__.py
CHANGED
statslibx/descriptive.py
CHANGED
|
@@ -2,7 +2,13 @@ import numpy as np
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from typing import Optional, Union, Literal, List
|
|
4
4
|
from datetime import datetime
|
|
5
|
+
import flet as ft
|
|
5
6
|
import os
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import seaborn as sns
|
|
9
|
+
import io
|
|
10
|
+
import base64
|
|
11
|
+
import plotly.express as px
|
|
6
12
|
|
|
7
13
|
class DescriptiveStats:
|
|
8
14
|
"""
|
|
@@ -213,137 +219,58 @@ class DescriptiveStats:
|
|
|
213
219
|
# ============= REGRESIÓN LINEAL =============
|
|
214
220
|
|
|
215
221
|
def linear_regression(self,
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
222
|
+
X: Union[str, List[str]],
|
|
223
|
+
y: str,
|
|
224
|
+
engine: Literal['statsmodels', 'scikit-learn'] = 'statsmodels',
|
|
225
|
+
fit_intercept: bool = True,
|
|
226
|
+
show_plot: bool = False,
|
|
227
|
+
plot_backend: str = 'seaborn',
|
|
228
|
+
handle_missing: Literal['drop', 'error', 'warn'] = 'drop') -> tuple:
|
|
223
229
|
"""
|
|
224
|
-
Regresión lineal simple o múltiple
|
|
225
|
-
|
|
226
|
-
Parameters:
|
|
227
|
-
-----------
|
|
228
|
-
y : str
|
|
229
|
-
Variable dependiente
|
|
230
|
-
X : str o list
|
|
231
|
-
Variable(s) independiente(s)
|
|
232
|
-
engine : str
|
|
233
|
-
'statsmodels' o 'scikit-learn'
|
|
234
|
-
fit_intercept : bool
|
|
235
|
-
Si incluir intercepto
|
|
236
|
-
show_plot : bool
|
|
237
|
-
Mostrar gráficos diagnósticos
|
|
238
|
-
plot_backend : str
|
|
239
|
-
Backend para visualización
|
|
240
|
-
|
|
241
|
-
Returns:
|
|
242
|
-
--------
|
|
243
|
-
LinearRegressionResult
|
|
244
|
-
Objeto con resultados y método summary()
|
|
230
|
+
Regresión lineal simple o múltiple con opción de mostrar gráfico.
|
|
231
|
+
Siempre devuelve un tuple: (LinearRegressionResult, figura o None)
|
|
245
232
|
"""
|
|
246
233
|
if isinstance(X, str):
|
|
247
234
|
X = [X]
|
|
248
|
-
|
|
249
|
-
# Verificar
|
|
250
|
-
missing_columns = []
|
|
251
|
-
if y not in self.data.columns:
|
|
252
|
-
missing_columns.append(y)
|
|
253
|
-
for x_col in X:
|
|
254
|
-
if x_col not in self.data.columns:
|
|
255
|
-
missing_columns.append(x_col)
|
|
256
|
-
|
|
235
|
+
|
|
236
|
+
# Verificar columnas
|
|
237
|
+
missing_columns = [col for col in [y] + X if col not in self.data.columns]
|
|
257
238
|
if missing_columns:
|
|
258
239
|
raise ValueError(f"Columnas no encontradas: {missing_columns}")
|
|
259
|
-
|
|
260
|
-
#
|
|
240
|
+
|
|
241
|
+
# Preparar datos
|
|
261
242
|
regression_data = self.data[[y] + X].copy()
|
|
262
|
-
|
|
263
|
-
# Manejar valores infinitos
|
|
264
243
|
numeric_cols = regression_data.select_dtypes(include=[np.number]).columns
|
|
265
244
|
for col in numeric_cols:
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
regression_data[col] = regression_data[col].replace([np.inf, -np.inf], np.nan)
|
|
271
|
-
|
|
272
|
-
# Manejar valores faltantes
|
|
273
|
-
missing_before = regression_data.isnull().sum()
|
|
274
|
-
total_missing = missing_before.sum()
|
|
275
|
-
|
|
276
|
-
if total_missing > 0:
|
|
277
|
-
missing_info = "\n".join([f" - {col}: {missing_before[col]} missing"
|
|
278
|
-
for col in missing_before[missing_before > 0].index])
|
|
279
|
-
|
|
245
|
+
regression_data[col] = regression_data[col].replace([np.inf, -np.inf], np.nan)
|
|
246
|
+
|
|
247
|
+
# Manejo de valores faltantes
|
|
248
|
+
if regression_data.isnull().any().any():
|
|
280
249
|
if handle_missing == 'error':
|
|
281
|
-
raise ValueError(
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
regression_data_clean = regression_data.dropna()
|
|
290
|
-
|
|
291
|
-
else:
|
|
292
|
-
raise ValueError(f"Método de manejo de missing values no reconocido: {handle_missing}")
|
|
293
|
-
|
|
294
|
-
# Informar sobre la limpieza
|
|
295
|
-
rows_before = len(regression_data)
|
|
296
|
-
rows_after = len(regression_data_clean)
|
|
297
|
-
rows_removed = rows_before - rows_after
|
|
298
|
-
|
|
299
|
-
if rows_removed > 0:
|
|
300
|
-
print(f"Limpieza de datos: {rows_removed} filas eliminadas ({rows_after} filas restantes)")
|
|
301
|
-
|
|
302
|
-
if rows_after < len(X) + 1: # +1 para el intercepto
|
|
303
|
-
raise ValueError(
|
|
304
|
-
f"Muy pocas filas después de limpieza: {rows_after}. "
|
|
305
|
-
f"Se necesitan al menos {len(X) + 1} filas para regresión."
|
|
306
|
-
)
|
|
307
|
-
else:
|
|
308
|
-
regression_data_clean = regression_data
|
|
309
|
-
|
|
310
|
-
# Extraer datos limpios
|
|
311
|
-
X_data = regression_data_clean[X].values
|
|
312
|
-
y_data = regression_data_clean[y].values
|
|
313
|
-
|
|
314
|
-
# Validar que los datos son numéricos
|
|
315
|
-
if not np.issubdtype(X_data.dtype, np.number):
|
|
316
|
-
raise ValueError("Las variables independientes deben ser numéricas")
|
|
317
|
-
if not np.issubdtype(y_data.dtype, np.number):
|
|
318
|
-
raise ValueError("La variable dependiente debe ser numérica")
|
|
319
|
-
|
|
320
|
-
# Validar que no hay más missing values
|
|
321
|
-
if np.isnan(X_data).any() or np.isnan(y_data).any():
|
|
322
|
-
raise ValueError("Todavía hay valores NaN después de la limpieza")
|
|
323
|
-
|
|
324
|
-
# Validar que no hay valores infinitos
|
|
325
|
-
if np.isinf(X_data).any() or np.isinf(y_data).any():
|
|
326
|
-
raise ValueError("Todavía hay valores infinitos después de la limpieza")
|
|
327
|
-
|
|
328
|
-
# Crear y ajustar el modelo
|
|
329
|
-
result = LinearRegressionResult(
|
|
330
|
-
X_data, y_data, X, y,
|
|
331
|
-
engine=engine,
|
|
332
|
-
fit_intercept=fit_intercept
|
|
333
|
-
)
|
|
250
|
+
raise ValueError("Datos contienen valores faltantes")
|
|
251
|
+
regression_data = regression_data.dropna()
|
|
252
|
+
|
|
253
|
+
X_data = regression_data[X].values
|
|
254
|
+
y_data = regression_data[y].values
|
|
255
|
+
|
|
256
|
+
# Ajustar modelo
|
|
257
|
+
result = LinearRegressionResult(X_data, y_data, X, y, engine=engine, fit_intercept=fit_intercept)
|
|
334
258
|
result.fit()
|
|
335
259
|
result.show_plot = show_plot
|
|
336
260
|
result.plot_backend = plot_backend
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
261
|
+
|
|
262
|
+
figura = None
|
|
263
|
+
# Graficar si es regresión simple
|
|
264
|
+
if show_plot and len(X) == 1 and plot_backend.lower() == 'seaborn':
|
|
265
|
+
import matplotlib.pyplot as plt
|
|
266
|
+
g = sns.lmplot(x=X[0], y=y, data=regression_data, ci=None)
|
|
267
|
+
g.figure.suptitle(f"Regresión lineal: {y} ~ {X[0]}", y=1.02)
|
|
268
|
+
plt.tight_layout()
|
|
269
|
+
figura = g.figure
|
|
270
|
+
|
|
271
|
+
return result, figura
|
|
272
|
+
|
|
273
|
+
|
|
347
274
|
|
|
348
275
|
def help(self):
|
|
349
276
|
"""
|
|
@@ -405,6 +332,15 @@ class DescriptiveStats:
|
|
|
405
332
|
|
|
406
333
|
Incluye: conteo, media, mediana, moda, desv. est., varianza,
|
|
407
334
|
mínimo, Q1, Q3, máximo, IQR, asimetría, curtosis
|
|
335
|
+
• .summary().to_dataframe(format)
|
|
336
|
+
Format:
|
|
337
|
+
- Wide
|
|
338
|
+
- Long
|
|
339
|
+
- Compact
|
|
340
|
+
|
|
341
|
+
• .summary().to_categorical_summary()
|
|
342
|
+
• .summary().to_styled_df()
|
|
343
|
+
|
|
408
344
|
|
|
409
345
|
┌────────────────────────────────────────────────────────────────────────────┐
|
|
410
346
|
│ 4. 📈 REGRESIÓN LINEAL │
|
|
@@ -599,14 +535,151 @@ class DescriptiveSummary:
|
|
|
599
535
|
output.append("=" * 100)
|
|
600
536
|
return "\n".join(output)
|
|
601
537
|
|
|
538
|
+
def to_dataframe(self, format='wide'):
|
|
539
|
+
"""
|
|
540
|
+
Convierte los resultados a DataFrame.
|
|
541
|
+
|
|
542
|
+
Parameters:
|
|
543
|
+
-----------
|
|
544
|
+
format : str, default 'wide'
|
|
545
|
+
- 'wide': Variables en columnas, estadísticas en filas
|
|
546
|
+
- 'long': Formato largo (variable, estadística, valor)
|
|
547
|
+
- 'compact': Variables en filas, estadísticas en columnas
|
|
548
|
+
"""
|
|
549
|
+
if format == 'wide':
|
|
550
|
+
return self._to_wide_df()
|
|
551
|
+
elif format == 'long':
|
|
552
|
+
return self._to_long_df()
|
|
553
|
+
elif format == 'compact':
|
|
554
|
+
return self._to_compact_df()
|
|
555
|
+
else:
|
|
556
|
+
raise ValueError("format debe ser 'wide', 'long' o 'compact'")
|
|
557
|
+
|
|
558
|
+
def _to_wide_df(self):
|
|
559
|
+
"""
|
|
560
|
+
Formato ancho: Variables en columnas, estadísticas en filas.
|
|
561
|
+
|
|
562
|
+
Ejemplo:
|
|
563
|
+
Variable1 Variable2 Variable3
|
|
564
|
+
count 150.0 150.0 150.0
|
|
565
|
+
mean 5.8 3.1 3.8
|
|
566
|
+
median 5.8 3.0 4.0
|
|
567
|
+
...
|
|
568
|
+
"""
|
|
569
|
+
df = pd.DataFrame(self.results)
|
|
570
|
+
|
|
571
|
+
# Ordenar índice por categorías
|
|
572
|
+
order = [
|
|
573
|
+
'count', 'mean', 'median', 'mode', # Tendencia central
|
|
574
|
+
'std', 'variance', 'iqr', # Dispersión
|
|
575
|
+
'min', 'q1', 'q3', 'max', # Cuartiles
|
|
576
|
+
'skewness', 'kurtosis' # Forma
|
|
577
|
+
]
|
|
578
|
+
|
|
579
|
+
# Reordenar filas según el orden definido
|
|
580
|
+
df = df.reindex([stat for stat in order if stat in df.index])
|
|
581
|
+
|
|
582
|
+
return df
|
|
583
|
+
|
|
584
|
+
def _to_compact_df(self):
|
|
585
|
+
"""
|
|
586
|
+
Formato compacto: Variables en filas, estadísticas en columnas.
|
|
587
|
+
|
|
588
|
+
Ejemplo:
|
|
589
|
+
count mean median mode std variance ...
|
|
590
|
+
Var1 150.0 5.8 5.8 5.0 0.8 0.68 ...
|
|
591
|
+
Var2 150.0 3.1 3.0 3.0 0.4 0.19 ...
|
|
592
|
+
Var3 150.0 3.8 4.0 1.0 1.8 3.11 ...
|
|
593
|
+
"""
|
|
594
|
+
df_data = []
|
|
595
|
+
|
|
596
|
+
for var_name, stats in self.results.items():
|
|
597
|
+
row = {'Variable': var_name}
|
|
598
|
+
row.update(stats)
|
|
599
|
+
df_data.append(row)
|
|
600
|
+
|
|
601
|
+
df = pd.DataFrame(df_data)
|
|
602
|
+
df = df.set_index('Variable')
|
|
603
|
+
|
|
604
|
+
# Ordenar columnas por categorías
|
|
605
|
+
order = [
|
|
606
|
+
'count', 'mean', 'median', 'mode',
|
|
607
|
+
'std', 'variance', 'iqr',
|
|
608
|
+
'min', 'q1', 'q3', 'max',
|
|
609
|
+
'skewness', 'kurtosis'
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
df = df[[col for col in order if col in df.columns]]
|
|
613
|
+
|
|
614
|
+
return df
|
|
615
|
+
|
|
616
|
+
def _to_long_df(self):
|
|
617
|
+
"""
|
|
618
|
+
Formato largo: Una fila por cada combinación variable-estadística.
|
|
619
|
+
|
|
620
|
+
Ejemplo:
|
|
621
|
+
Variable Estadistica Valor
|
|
622
|
+
0 Var1 count 150.00
|
|
623
|
+
1 Var1 mean 5.84
|
|
624
|
+
2 Var1 median 5.80
|
|
625
|
+
...
|
|
626
|
+
"""
|
|
627
|
+
data = []
|
|
628
|
+
|
|
629
|
+
for var_name, stats in self.results.items():
|
|
630
|
+
for stat_name, value in stats.items():
|
|
631
|
+
data.append({
|
|
632
|
+
'Variable': var_name,
|
|
633
|
+
'Estadistica': stat_name,
|
|
634
|
+
'Valor': value
|
|
635
|
+
})
|
|
636
|
+
|
|
637
|
+
return pd.DataFrame(data)
|
|
638
|
+
|
|
639
|
+
def to_styled_df(self):
|
|
640
|
+
"""
|
|
641
|
+
Devuelve un DataFrame con formato wide y estilo aplicado.
|
|
642
|
+
Útil para notebooks de Jupyter.
|
|
643
|
+
"""
|
|
644
|
+
df = self._to_wide_df()
|
|
645
|
+
|
|
646
|
+
styled = df.style.format("{:.4f}") \
|
|
647
|
+
.background_gradient(cmap='YlOrRd', axis=1) \
|
|
648
|
+
.set_caption(f"Estadística Descriptiva - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
649
|
+
|
|
650
|
+
return styled
|
|
651
|
+
|
|
652
|
+
def to_categorical_summary(self):
|
|
653
|
+
"""
|
|
654
|
+
Crea un resumen organizado por categorías de estadísticas.
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
--------
|
|
658
|
+
dict of DataFrames
|
|
659
|
+
"""
|
|
660
|
+
df_wide = self._to_wide_df()
|
|
661
|
+
|
|
662
|
+
return {
|
|
663
|
+
'Tendencia Central': df_wide.loc[['count', 'mean', 'median', 'mode']],
|
|
664
|
+
'Dispersión': df_wide.loc[['std', 'variance', 'iqr']],
|
|
665
|
+
'Cuartiles': df_wide.loc[['min', 'q1', 'q3', 'max']],
|
|
666
|
+
'Forma': df_wide.loc[['skewness', 'kurtosis']]
|
|
667
|
+
}
|
|
668
|
+
|
|
602
669
|
|
|
603
670
|
import numpy as np
|
|
604
671
|
from datetime import datetime
|
|
605
672
|
|
|
606
673
|
|
|
674
|
+
import numpy as np
|
|
675
|
+
import pandas as pd
|
|
676
|
+
from datetime import datetime
|
|
677
|
+
import matplotlib.pyplot as plt
|
|
678
|
+
import seaborn as sns
|
|
679
|
+
|
|
607
680
|
class LinearRegressionResult:
|
|
608
681
|
"""Clase para resultados de regresión lineal"""
|
|
609
|
-
|
|
682
|
+
|
|
610
683
|
def __init__(self, X, y, X_names, y_name, engine='statsmodels', fit_intercept=True):
|
|
611
684
|
self.X = X
|
|
612
685
|
self.y = y
|
|
@@ -618,7 +691,7 @@ class LinearRegressionResult:
|
|
|
618
691
|
self.results = None
|
|
619
692
|
self.show_plot = False
|
|
620
693
|
self.plot_backend = 'seaborn'
|
|
621
|
-
|
|
694
|
+
|
|
622
695
|
# Atributos que se llenarán después del fit
|
|
623
696
|
self.coef_ = None
|
|
624
697
|
self.intercept_ = None
|
|
@@ -633,7 +706,7 @@ class LinearRegressionResult:
|
|
|
633
706
|
self.std_errors = None
|
|
634
707
|
self.t_values = None
|
|
635
708
|
self.p_values = None
|
|
636
|
-
|
|
709
|
+
|
|
637
710
|
def fit(self):
|
|
638
711
|
"""Ajustar el modelo"""
|
|
639
712
|
if self.engine == 'statsmodels':
|
|
@@ -643,7 +716,7 @@ class LinearRegressionResult:
|
|
|
643
716
|
X = sm.add_constant(X)
|
|
644
717
|
self.model = sm.OLS(self.y, X)
|
|
645
718
|
self.results = self.model.fit()
|
|
646
|
-
|
|
719
|
+
|
|
647
720
|
# Extraer atributos
|
|
648
721
|
if self.fit_intercept:
|
|
649
722
|
self.intercept_ = self.results.params[0]
|
|
@@ -657,7 +730,7 @@ class LinearRegressionResult:
|
|
|
657
730
|
self.std_errors = self.results.bse
|
|
658
731
|
self.t_values = self.results.tvalues
|
|
659
732
|
self.p_values = self.results.pvalues
|
|
660
|
-
|
|
733
|
+
|
|
661
734
|
self.r_squared = self.results.rsquared
|
|
662
735
|
self.adj_r_squared = self.results.rsquared_adj
|
|
663
736
|
self.f_statistic = self.results.fvalue
|
|
@@ -666,24 +739,24 @@ class LinearRegressionResult:
|
|
|
666
739
|
self.bic = self.results.bic
|
|
667
740
|
self.residuals = self.results.resid
|
|
668
741
|
self.predictions = self.results.fittedvalues
|
|
669
|
-
|
|
742
|
+
|
|
670
743
|
else: # scikit-learn
|
|
671
744
|
from sklearn.linear_model import LinearRegression
|
|
672
745
|
self.model = LinearRegression(fit_intercept=self.fit_intercept)
|
|
673
746
|
self.model.fit(self.X, self.y)
|
|
674
|
-
|
|
747
|
+
|
|
675
748
|
self.coef_ = self.model.coef_
|
|
676
749
|
self.intercept_ = self.model.intercept_
|
|
677
|
-
self.r_squared = self.model.score(self.X, self.y)
|
|
678
750
|
self.predictions = self.model.predict(self.X)
|
|
679
751
|
self.residuals = self.y - self.predictions
|
|
680
|
-
|
|
681
|
-
|
|
752
|
+
self.r_squared = self.model.score(self.X, self.y)
|
|
753
|
+
|
|
754
|
+
# Calcular R^2 ajustado
|
|
682
755
|
n, k = self.X.shape
|
|
683
756
|
self.adj_r_squared = 1 - (1 - self.r_squared) * (n - 1) / (n - k - 1)
|
|
684
|
-
|
|
757
|
+
|
|
685
758
|
return self
|
|
686
|
-
|
|
759
|
+
|
|
687
760
|
def predict(self, X_new):
|
|
688
761
|
"""Hacer predicciones con nuevos datos"""
|
|
689
762
|
if self.engine == 'statsmodels':
|
|
@@ -693,16 +766,12 @@ class LinearRegressionResult:
|
|
|
693
766
|
return self.results.predict(X_new)
|
|
694
767
|
else:
|
|
695
768
|
return self.model.predict(X_new)
|
|
696
|
-
|
|
769
|
+
|
|
697
770
|
def summary(self):
|
|
698
771
|
"""Mostrar resumen estilo OLS"""
|
|
699
772
|
return self.__repr__()
|
|
700
|
-
|
|
773
|
+
|
|
701
774
|
def __repr__(self):
|
|
702
|
-
return self._format_output()
|
|
703
|
-
|
|
704
|
-
def _format_output(self):
|
|
705
|
-
"""Formato estilo OLS de statsmodels"""
|
|
706
775
|
output = []
|
|
707
776
|
output.append("=" * 100)
|
|
708
777
|
output.append("RESULTADOS DE REGRESIÓN LINEAL".center(100))
|
|
@@ -712,7 +781,7 @@ class LinearRegressionResult:
|
|
|
712
781
|
output.append(f"Motor: {self.engine}")
|
|
713
782
|
output.append(f"Fecha: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
714
783
|
output.append("-" * 100)
|
|
715
|
-
|
|
784
|
+
|
|
716
785
|
# Información del modelo
|
|
717
786
|
output.append("\nINFORMACIÓN DEL MODELO:")
|
|
718
787
|
output.append("-" * 100)
|
|
@@ -720,24 +789,22 @@ class LinearRegressionResult:
|
|
|
720
789
|
output.append("-" * 100)
|
|
721
790
|
output.append(f"{'R-cuadrado':<50} {self.r_squared:>20.6f}")
|
|
722
791
|
output.append(f"{'R-cuadrado Ajustado':<50} {self.adj_r_squared:>20.6f}")
|
|
723
|
-
|
|
792
|
+
|
|
724
793
|
if self.f_statistic is not None:
|
|
725
794
|
output.append(f"{'Estadístico F':<50} {self.f_statistic:>20.6f}")
|
|
726
795
|
output.append(f"{'Prob (F-estadístico)':<50} {self.f_pvalue:>20.6e}")
|
|
727
|
-
|
|
796
|
+
|
|
728
797
|
if self.aic is not None:
|
|
729
798
|
output.append(f"{'AIC':<50} {self.aic:>20.6f}")
|
|
730
799
|
output.append(f"{'BIC':<50} {self.bic:>20.6f}")
|
|
731
|
-
|
|
800
|
+
|
|
732
801
|
# Coeficientes
|
|
733
802
|
output.append("\nCOEFICIENTES:")
|
|
734
803
|
output.append("-" * 100)
|
|
735
|
-
|
|
736
804
|
if self.std_errors is not None:
|
|
737
805
|
output.append(f"{'Variable':<20} {'Coef.':>15} {'Std Err':>15} {'t':>15} {'P>|t|':>15}")
|
|
738
806
|
output.append("-" * 100)
|
|
739
807
|
output.append(f"{'const':<20} {self.intercept_:>15.6f} {'-':>15} {'-':>15} {'-':>15}")
|
|
740
|
-
|
|
741
808
|
for i, name in enumerate(self.X_names):
|
|
742
809
|
output.append(
|
|
743
810
|
f"{name:<20} {self.coef_[i]:>15.6f} {self.std_errors[i]:>15.6f} "
|
|
@@ -747,10 +814,9 @@ class LinearRegressionResult:
|
|
|
747
814
|
output.append(f"{'Variable':<20} {'Coeficiente':>20}")
|
|
748
815
|
output.append("-" * 100)
|
|
749
816
|
output.append(f"{'const':<20} {self.intercept_:>20.6f}")
|
|
750
|
-
|
|
751
817
|
for i, name in enumerate(self.X_names):
|
|
752
818
|
output.append(f"{name:<20} {self.coef_[i]:>20.6f}")
|
|
753
|
-
|
|
819
|
+
|
|
754
820
|
# Análisis de residuos
|
|
755
821
|
output.append("\nANÁLISIS DE RESIDUOS:")
|
|
756
822
|
output.append("-" * 100)
|
|
@@ -760,10 +826,31 @@ class LinearRegressionResult:
|
|
|
760
826
|
output.append(f"{'Desv. Std. de Residuos':<50} {np.std(self.residuals):>20.6f}")
|
|
761
827
|
output.append(f"{'Mínimo Residuo':<50} {np.min(self.residuals):>20.6f}")
|
|
762
828
|
output.append(f"{'Máximo Residuo':<50} {np.max(self.residuals):>20.6f}")
|
|
763
|
-
|
|
764
829
|
output.append("=" * 100)
|
|
765
|
-
|
|
830
|
+
|
|
766
831
|
if self.show_plot:
|
|
832
|
+
self.plot()
|
|
767
833
|
output.append("\n[Gráficos diagnósticos generados]")
|
|
768
|
-
|
|
769
|
-
return "\n".join(output)
|
|
834
|
+
|
|
835
|
+
return "\n".join(output)
|
|
836
|
+
|
|
837
|
+
def plot(self):
|
|
838
|
+
"""Generar gráficos de regresión y residuales"""
|
|
839
|
+
if len(self.X_names) == 1:
|
|
840
|
+
# Scatter + línea de regresión
|
|
841
|
+
df_plot = pd.DataFrame({
|
|
842
|
+
self.X_names[0]: self.X.flatten(),
|
|
843
|
+
self.y_name: self.y,
|
|
844
|
+
'Predicciones': self.predictions
|
|
845
|
+
})
|
|
846
|
+
sns.lmplot(x=self.X_names[0], y=self.y_name, data=df_plot, ci=None)
|
|
847
|
+
plt.title(f"Regresión lineal: {self.y_name} ~ {self.X_names[0]}")
|
|
848
|
+
plt.show()
|
|
849
|
+
else:
|
|
850
|
+
# Para regresión múltiple, solo gráfico residuos vs predicciones
|
|
851
|
+
plt.scatter(self.predictions, self.residuals)
|
|
852
|
+
plt.axhline(0, color='red', linestyle='--')
|
|
853
|
+
plt.xlabel("Predicciones")
|
|
854
|
+
plt.ylabel("Residuos")
|
|
855
|
+
plt.title("Residuos vs Predicciones")
|
|
856
|
+
plt.show()
|