tsadmetrics 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. entorno/bin/activate_this.py +32 -0
  2. entorno/bin/rst2html.py +23 -0
  3. entorno/bin/rst2html4.py +26 -0
  4. entorno/bin/rst2html5.py +33 -0
  5. entorno/bin/rst2latex.py +26 -0
  6. entorno/bin/rst2man.py +27 -0
  7. entorno/bin/rst2odt.py +28 -0
  8. entorno/bin/rst2odt_prepstyles.py +20 -0
  9. entorno/bin/rst2pseudoxml.py +23 -0
  10. entorno/bin/rst2s5.py +24 -0
  11. entorno/bin/rst2xetex.py +27 -0
  12. entorno/bin/rst2xml.py +23 -0
  13. entorno/bin/rstpep2html.py +25 -0
  14. experiments/scripts/compute_metrics.py +187 -0
  15. experiments/scripts/metrics_complexity_analysis.py +109 -0
  16. experiments/scripts/metro_experiment.py +133 -0
  17. experiments/scripts/opt_metro_experiment.py +343 -0
  18. tests/__init__.py +0 -0
  19. tests/test_binary.py +759 -0
  20. tests/test_non_binary.py +371 -0
  21. tsadmetrics/_tsadeval/affiliation/__init__.py +0 -0
  22. tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +86 -0
  23. tsadmetrics/_tsadeval/affiliation/_integral_interval.py +464 -0
  24. tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +68 -0
  25. tsadmetrics/_tsadeval/affiliation/generics.py +135 -0
  26. tsadmetrics/_tsadeval/affiliation/metrics.py +114 -0
  27. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +175 -0
  28. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +50 -0
  29. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +184 -0
  30. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/__init__.py +0 -0
  31. tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
  32. tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +386 -0
  33. tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +362 -0
  34. tsadmetrics/_tsadeval/prts/__init__.py +0 -0
  35. tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
  36. tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +165 -0
  37. tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +121 -0
  38. tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
  39. tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +61 -0
  40. tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +86 -0
  41. tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +21 -0
  42. tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +85 -0
  43. {tsadmetrics-0.1.3.dist-info → tsadmetrics-0.1.5.dist-info}/METADATA +1 -1
  44. tsadmetrics-0.1.5.dist-info/RECORD +62 -0
  45. tsadmetrics-0.1.5.dist-info/top_level.txt +4 -0
  46. tsadmetrics-0.1.3.dist-info/RECORD +0 -20
  47. tsadmetrics-0.1.3.dist-info/top_level.txt +0 -1
  48. {tsadmetrics-0.1.3.dist-info → tsadmetrics-0.1.5.dist-info}/WHEEL +0 -0
@@ -0,0 +1,133 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import tsadmetrics as tm
6
+ import time
7
+ from sklearn.metrics import f1_score
8
+
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.preprocessing import MinMaxScaler
11
+
12
+
13
+ df_analog = pd.read_csv('../preprocessed_data/MetroPT3_analogic.csv')
14
+ df_analog = pd.DataFrame(df_analog).set_index('timestamp')
15
+
16
+ # Separar las características (X) y la variable objetivo (y)
17
+ X = df_analog.drop(columns='anomaly') # Características
18
+ y = df_analog['anomaly'] # Variable objetivo
19
+
20
+ # Normalizar las características entre [0, 1]
21
+ scaler = MinMaxScaler(feature_range=(0, 1))
22
+ X_normalized = scaler.fit_transform(X)
23
+
24
+ # Convertir el resultado normalizado de nuevo a un DataFrame
25
+ X_normalized = pd.DataFrame(X_normalized, columns=X.columns, index=X.index)
26
+
27
+ # Dividir el conjunto de datos normalizado en entrenamiento y prueba
28
+ train_df_analog, test_df_analog = train_test_split(
29
+ X_normalized.join(y), # Unir las características normalizadas con la variable objetivo
30
+ test_size=0.4,
31
+ random_state=42
32
+ )
33
+
34
+ X_train_analog = train_df_analog.drop(columns='anomaly')
35
+ y_train_analog = train_df_analog['anomaly']
36
+ X_test_analog = test_df_analog.drop(columns='anomaly')
37
+ y_test_analog = test_df_analog['anomaly']
38
+
39
+
40
+ #Modelos basados en distancia
41
+ from pyod.models.lof import LOF
42
+ from pyod.models.cblof import CBLOF
43
+ from pyod.models.knn import KNN
44
+ from pyod.models.abod import ABOD
45
+
46
+ modelos_distancia = [
47
+ LOF(n_neighbors=35, contamination=np.sum(y_train_analog)/len(y_train_analog), n_jobs=-1),
48
+ #COF(contamination=np.sum(y_train_analog)/len(y_train_analog),method='memory'),
49
+ CBLOF(contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1),
50
+ KNN(n_neighbors=35, contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1),
51
+ ABOD(contamination=np.sum(y_train_analog)/len(y_train_analog))
52
+ ]
53
+
54
+ #Modelos basados en árboles de aislamiento
55
+ from pyod.models.iforest import IForest
56
+ modelos_arboles = [
57
+ IForest(contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1, random_state=42)
58
+ ]
59
+
60
+ #Modelos basados en Reconstrucción
61
+ from pyod.models.ae1svm import AE1SVM
62
+ from pyod.models.alad import ALAD
63
+ from pyod.models.auto_encoder import AutoEncoder
64
+
65
+ modelos_reconstruccion = [
66
+ AE1SVM(contamination=np.sum(y_train_analog)/len(y_train_analog)),
67
+ ALAD(contamination=np.sum(y_train_analog)/len(y_train_analog)),
68
+ AutoEncoder(contamination=np.sum(y_train_analog)/len(y_train_analog))
69
+ ]
70
+
71
+
72
+
73
+ #ejecucion de los modelos
74
+
75
+ distancia_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
76
+ for modelo in modelos_distancia:
77
+
78
+ nombre_modelo = modelo.__class__.__name__
79
+
80
+ inicio = time.time()
81
+ try:
82
+ modelo.fit(X_train_analog)
83
+ t = time.time() - inicio
84
+ y_pred = modelo.predict(X_test_analog)
85
+ f1 = f1_score(y_test_analog, y_pred)
86
+ sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
87
+ print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
88
+ except Exception as e:
89
+ print(f'Error en el modelo {nombre_modelo}: {e}')
90
+ # Añadir los resultados al DataFrame
91
+ distancia_results.loc[len(distancia_results)] = [nombre_modelo, f1, sw_f1, t]
92
+
93
+ distancia_results.to_csv('../results/distancia_results.csv')
94
+
95
+ arbol_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
96
+ for modelo in modelos_arboles:
97
+
98
+ nombre_modelo = modelo.__class__.__name__
99
+
100
+ inicio = time.time()
101
+ try:
102
+ modelo.fit(X_train_analog)
103
+ t = time.time() - inicio
104
+ y_pred = modelo.predict(X_test_analog)
105
+ f1 = f1_score(y_test_analog, y_pred)
106
+ sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
107
+ print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
108
+ except Exception as e:
109
+ print(f'Error en el modelo {nombre_modelo}: {e}')
110
+ # Añadir los resultados al DataFrame
111
+ arbol_results.loc[len(arbol_results)] = [nombre_modelo, f1, sw_f1, t]
112
+
113
+ arbol_results.to_csv('../results/arbol_results.csv')
114
+
115
+ reconstruccion_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
116
+ for modelo in modelos_reconstruccion:
117
+
118
+ nombre_modelo = modelo.__class__.__name__
119
+
120
+ inicio = time.time()
121
+ try:
122
+ modelo.fit(X_train_analog[y_train_analog == 0])
123
+ t = time.time() - inicio
124
+ y_pred = modelo.predict(X_test_analog)
125
+ f1 = f1_score(y_test_analog, y_pred)
126
+ sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
127
+ print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
128
+ except Exception as e:
129
+ print(f'Error en el modelo {nombre_modelo}: {e}')
130
+ # Añadir los resultados al DataFrame
131
+ reconstruccion_results.loc[len(reconstruccion_results)] = [nombre_modelo, f1, sw_f1, t]
132
+
133
+ reconstruccion_results.to_csv('../results/reconstruccion_results.csv')
@@ -0,0 +1,343 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import tsadmetrics as tm
6
+ import time
7
+ from sklearn.metrics import f1_score, recall_score, precision_score
8
+ import optuna
9
+ from optuna.samplers import TPESampler
10
+ from functools import partial
11
+ import warnings
12
+ warnings.filterwarnings('ignore')
13
+ from sklearn.model_selection import train_test_split
14
+ from sklearn.preprocessing import MinMaxScaler
15
+ import os
16
+ import json
17
+ import torch
18
+ import torch.nn as nn
19
+ from torch.utils.data import DataLoader, TensorDataset
20
+ from sklearn.metrics import classification_report
21
+
22
+ def simplify_dataset(
23
+ data: pd.DataFrame,
24
+ window_size: int = 2,
25
+ time_col: str = None,
26
+ anomaly_col: str = 'anomaly',
27
+ agg_func: str = 'mean'
28
+ ) -> pd.DataFrame:
29
+ """
30
+ Reduce un dataset aplicando agregación en ventanas temporales.
31
+ """
32
+ simplified_data = data.rolling(window_size, step=window_size).mean()
33
+ simplified_data = simplified_data.dropna()
34
+ simplified_data[anomaly_col] = (simplified_data[anomaly_col] > 0.1).astype(int)
35
+ return simplified_data.reset_index(drop=True)
36
+
37
+ # Configuración inicial para PyTorch
38
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
39
+ print(f"Using device: {device}")
40
+
41
+ def guardar_prediccion(modelo_nombre, y_true, y_pred_binario, y_pred_continuo=None, timestamps=None):
42
+ """
43
+ Guarda las predicciones ordenadas por timestamp.
44
+ """
45
+ os.makedirs('../results/predictions', exist_ok=True)
46
+
47
+ resultados = pd.DataFrame({
48
+ 'timestamp': timestamps if timestamps is not None else np.arange(len(y_true)),
49
+ 'ground_truth': np.array(y_true).flatten(),
50
+ 'prediction_binary': np.array(y_pred_binario).flatten()
51
+ })
52
+
53
+ if y_pred_continuo is not None:
54
+ resultados['prediction_continuous'] = np.array(y_pred_continuo).flatten()
55
+
56
+ if 'timestamp' in resultados.columns:
57
+ resultados = resultados.sort_values('timestamp')
58
+
59
+ nombre_archivo = f'../results/predictions/{modelo_nombre}_pred.csv'
60
+ resultados.to_csv(nombre_archivo, index=False)
61
+ return nombre_archivo
62
+
63
+ # -------------------------------
64
+ # Cargar y preparar datos
65
+ df_analog = pd.read_csv('../preprocessed_data/MetroPT3_analogic.csv')
66
+ df_analog = pd.DataFrame(df_analog).set_index('timestamp')
67
+ df_analog = df_analog.sort_index()
68
+
69
+ # Reducción de tamaño
70
+ print('Tamaño inicial del dataset:', df_analog.shape)
71
+ print(f'Proporción de anomalías: {df_analog["anomaly"].mean():.2f}')
72
+ df_analog = simplify_dataset(df_analog, window_size=10, time_col='timestamp')
73
+ print('Tamaño del dataset:', df_analog.shape)
74
+ print(f'Proporción de anomalías: {df_analog["anomaly"].mean():.2f}')
75
+
76
+ # Separar y normalizar datos
77
+ X = df_analog.drop(columns='anomaly')
78
+ y = df_analog['anomaly']
79
+ scaler = MinMaxScaler(feature_range=(0, 1))
80
+ X_normalized = scaler.fit_transform(X)
81
+ X_normalized = pd.DataFrame(X_normalized, columns=X.columns, index=X.index)
82
+
83
+ # -------------------------------
84
+ # DIVISIÓN DE DATOS
85
+ # 1. Para modelos no-LSTM (shuffle=True)
86
+ train_df_shuf, test_df_shuf = train_test_split(
87
+ X_normalized.join(y),
88
+ test_size=0.4,
89
+ random_state=42,
90
+ shuffle=True
91
+ )
92
+
93
+ # 2. Para LSTM (shuffle=False para mantener orden temporal)
94
+ train_df_noshuf, test_df_noshuf = train_test_split(
95
+ X_normalized.join(y),
96
+ test_size=0.4,
97
+ random_state=42,
98
+ shuffle=False
99
+ )
100
+
101
+ # Preparar datos para modelos no-LSTM
102
+ X_train_shuf = train_df_shuf.drop(columns='anomaly')
103
+ y_train_shuf = train_df_shuf['anomaly']
104
+ X_test_shuf = test_df_shuf.drop(columns='anomaly')
105
+ y_test_shuf = test_df_shuf['anomaly']
106
+
107
+ # Preparar datos para LSTM
108
+ X_train_noshuf = train_df_noshuf.drop(columns='anomaly')
109
+ y_train_noshuf = train_df_noshuf['anomaly']
110
+ X_test_noshuf = test_df_noshuf.drop(columns='anomaly')
111
+ y_test_noshuf = test_df_noshuf['anomaly']
112
+
113
+ contamination = np.sum(y_train_shuf)/len(y_train_shuf)
114
+
115
+ # -------------------------------
116
+ # Definición del modelo LSTM
117
+ class AnomalyLSTM(nn.Module):
118
+ def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
119
+ super(AnomalyLSTM, self).__init__()
120
+ self.lstm = nn.LSTM(
121
+ input_size=input_size,
122
+ hidden_size=hidden_size,
123
+ num_layers=num_layers,
124
+ batch_first=True,
125
+ dropout=dropout if num_layers > 1 else 0
126
+ )
127
+ self.fc = nn.Linear(hidden_size, 1)
128
+ self.sigmoid = nn.Sigmoid()
129
+
130
+ def forward(self, x):
131
+ lstm_out, _ = self.lstm(x)
132
+ last_time_step = lstm_out[:, -1, :]
133
+ output = self.fc(last_time_step)
134
+ return self.sigmoid(output)
135
+
136
+ def train_lstm(X_train, y_train, X_test, y_test, timestamps_test, sequence_length=10, epochs=20, batch_size=16):
137
+ # Preparar datos secuenciales
138
+ def create_sequences(data, targets, seq_length):
139
+ xs, ys = [], []
140
+ for i in range(len(data)-seq_length):
141
+ xs.append(data[i:(i+seq_length)])
142
+ ys.append(targets[i+seq_length])
143
+ return np.array(xs), np.array(ys)
144
+
145
+ X_train_seq, y_train_seq = create_sequences(X_train.values, y_train.values, sequence_length)
146
+ X_test_seq, y_test_seq = create_sequences(X_test.values, y_test.values, sequence_length)
147
+
148
+ # Convertir a tensores PyTorch
149
+ train_data = TensorDataset(
150
+ torch.FloatTensor(X_train_seq),
151
+ torch.FloatTensor(y_train_seq).unsqueeze(1)
152
+ )
153
+ test_data = TensorDataset(
154
+ torch.FloatTensor(X_test_seq),
155
+ torch.FloatTensor(y_test_seq).unsqueeze(1)
156
+ )
157
+
158
+ # IMPORTANTE: shuffle=False para DataLoader
159
+ train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
160
+ test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
161
+
162
+ # Inicializar modelo
163
+ model = AnomalyLSTM(input_size=X_train.shape[1]).to(device)
164
+ criterion = nn.BCELoss()
165
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
166
+
167
+ # Entrenamiento
168
+ train_start = time.time()
169
+ for epoch in range(epochs):
170
+ model.train()
171
+ for batch_x, batch_y in train_loader:
172
+ batch_x, batch_y = batch_x.to(device), batch_y.to(device)
173
+ optimizer.zero_grad()
174
+ outputs = model(batch_x)
175
+ loss = criterion(outputs, batch_y)
176
+ loss.backward()
177
+ optimizer.step()
178
+
179
+ # Evaluación
180
+ model.eval()
181
+ test_preds, test_true, test_scores = [], [], []
182
+ with torch.no_grad():
183
+ for batch_x, batch_y in test_loader:
184
+ batch_x, batch_y = batch_x.to(device), batch_y.to(device)
185
+ outputs = model(batch_x)
186
+ predicted = (outputs > 0.5).float()
187
+ test_preds.extend(predicted.cpu().numpy())
188
+ test_scores.extend(outputs.cpu().numpy())
189
+ test_true.extend(batch_y.cpu().numpy())
190
+
191
+ train_time = time.time() - train_start
192
+
193
+ # Ajustar predicciones al tamaño original y ordenar por timestamp
194
+ full_preds = np.concatenate([np.zeros(sequence_length), np.array(test_preds).flatten()])
195
+ full_scores = np.concatenate([np.zeros(sequence_length), np.array(test_scores).flatten()])
196
+ full_preds = full_preds[:len(y_test)]
197
+ full_scores = full_scores[:len(y_test)]
198
+
199
+ # Crear DataFrame con timestamps para ordenar
200
+ pred_df = pd.DataFrame({
201
+ 'timestamp': timestamps_test[-len(full_preds):],
202
+ 'y_true': y_test[-len(full_preds):],
203
+ 'y_pred': full_preds,
204
+ 'y_scores': full_scores
205
+ }).sort_values('timestamp')
206
+
207
+ # Calcular métricas ordenadas
208
+ f1 = f1_score(pred_df['y_true'], pred_df['y_pred'])
209
+ sw_f1 = tm.segment_wise_f_score(pred_df['y_true'], pred_df['y_pred'])
210
+
211
+ guardar_prediccion("LSTM", pred_df['y_true'], pred_df['y_pred'], pred_df['y_scores'], pred_df['timestamp'])
212
+
213
+ return model, f1, sw_f1, train_time, pred_df['y_pred']
214
+
215
+ # [Resto de las funciones (objective, optimize_model, evaluate_models) permanecen iguales...]
216
+
217
+ # -------------------------------
218
+ # Modelos
219
+ from pyod.models.lof import LOF
220
+ from pyod.models.cblof import CBLOF
221
+ from pyod.models.knn import KNN
222
+ from pyod.models.iforest import IForest
223
+ from pyod.models.ae1svm import AE1SVM
224
+ from pyod.models.auto_encoder import AutoEncoder
225
+
226
+ modelos_distancia = [LOF, CBLOF, KNN]
227
+ modelos_arboles = [IForest]
228
+ modelos_machine_learning = []
229
+ modelos_reconstruccion = [AE1SVM, AutoEncoder]
230
+
231
+ # -------------------------------
232
+ # Función evaluate_models modificada
233
+ # -------------------------------
234
+ # Función evaluate_models modificada para ordenar todas las predicciones
235
+ def evaluate_models(model_classes, best_params_dict, results_filename, include_lstm=False):
236
+ results_df = pd.DataFrame(columns=[
237
+ 'nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento', 'best_params'
238
+ ])
239
+
240
+ # Evaluar modelos no-LSTM (usando datos con shuffle)
241
+ for model_class in model_classes:
242
+ nombre_modelo = model_class.__name__
243
+ params = best_params_dict.get(nombre_modelo, {})
244
+ params['contamination'] = contamination
245
+
246
+ if model_class.__name__ in ['LOF', 'CBLOF', 'KNN', 'IForest']:
247
+ params['n_jobs'] = -1
248
+
249
+ inicio = time.time()
250
+ try:
251
+ model = model_class(**params)
252
+
253
+ if nombre_modelo in ['AutoEncoder', 'AE1SVM']:
254
+ model.fit(X_train_shuf[y_train_shuf == 0])
255
+ else:
256
+ model.fit(X_train_shuf)
257
+
258
+ t = time.time() - inicio
259
+ y_pred = model.predict(X_test_shuf)
260
+ y_scores = model.decision_function(X_test_shuf) if hasattr(model, 'decision_function') else None
261
+
262
+ # Crear DataFrame temporal con timestamps para ordenar
263
+ pred_df = pd.DataFrame({
264
+ 'timestamp': X_test_shuf.index,
265
+ 'y_true': y_test_shuf,
266
+ 'y_pred': y_pred,
267
+ 'y_scores': y_scores if y_scores is not None else np.nan
268
+ }).sort_values('timestamp')
269
+
270
+ # Calcular métricas sobre datos ordenados
271
+ f1 = f1_score(pred_df['y_true'], pred_df['y_pred'])
272
+ sw_f1 = tm.segment_wise_f_score(pred_df['y_true'], pred_df['y_pred'])
273
+
274
+ # Guardar predicciones ordenadas
275
+ guardar_prediccion(
276
+ nombre_modelo,
277
+ pred_df['y_true'],
278
+ pred_df['y_pred'],
279
+ pred_df['y_scores'] if 'y_scores' in pred_df.columns else None,
280
+ pred_df['timestamp']
281
+ )
282
+
283
+ print(f'Modelo: {nombre_modelo} - F1: {f1:.4f} - Segment-wise F1: {sw_f1:.4f} - Tiempo: {t:.2f}s')
284
+
285
+ results_df.loc[len(results_df)] = [
286
+ nombre_modelo, f1, sw_f1, t, json.dumps(params, ensure_ascii=False)
287
+ ]
288
+ except Exception as e:
289
+ print(f'Error en el modelo {nombre_modelo}: {e}')
290
+
291
+ # Evaluar LSTM (usando datos sin shuffle)
292
+ if include_lstm:
293
+ inicio_lstm = time.time()
294
+ print("\nEntrenando modelo LSTM (sin shuffle)...")
295
+
296
+ lstm_model, lstm_f1, lstm_sw_f1, lstm_time, lstm_preds = train_lstm(
297
+ X_train_noshuf, y_train_noshuf,
298
+ X_test_noshuf, y_test_noshuf,
299
+ timestamps_test=X_test_noshuf.index
300
+ )
301
+
302
+ print(f'Modelo: LSTM - F1: {lstm_f1:.4f} - Segment-wise F1: {lstm_sw_f1:.4f} - Tiempo: {lstm_time:.2f}s')
303
+
304
+ results_df.loc[len(results_df)] = [
305
+ "LSTM", lstm_f1, lstm_sw_f1, lstm_time,
306
+ json.dumps({
307
+ "sequence_length": 10,
308
+ "epochs": 20,
309
+ "batch_size": 16,
310
+ "hidden_size": 64,
311
+ "num_layers": 2,
312
+ "dropout": 0.2
313
+ }, ensure_ascii=False)
314
+ ]
315
+
316
+ # Guardar resultados
317
+ os.makedirs('../results', exist_ok=True)
318
+ results_df.to_csv(f'../results/{results_filename}', index=False)
319
+ print(f'Resultados guardados en {results_filename}')
320
+ return results_df
321
+
322
+ # -------------------------------
323
+ # Parámetros y ejecución
324
+ best_params = {
325
+ 'LOF': {"n_neighbors": 62, "metric":"minkowski", "contamination":contamination, "n_jobs":-1},
326
+ 'CBLOF': {"n_clusters": 8, "alpha": 0.87571, "beta": 6, "contamination":contamination, "n_jobs":-1},
327
+ 'KNN': {"n_neighbors": 5, "method":"mean", "contamination":contamination, "n_jobs":-1},
328
+ 'IForest': {'n_jobs':-1, "contamination":contamination},
329
+ 'AutoEncoder': {},
330
+ 'AE1SVM': {}
331
+ }
332
+
333
+ print("\nEvaluando modelos basados en distancia...")
334
+ distancia_results = evaluate_models(modelos_distancia, best_params, 'distancia_results.csv')
335
+
336
+ print("\nEvaluando modelos basados en árboles...")
337
+ arbol_results = evaluate_models(modelos_arboles, best_params, 'arbol_results.csv')
338
+
339
+ print("\nEvaluando modelos de reconstrucción...")
340
+ reconstruccion_results = evaluate_models(modelos_reconstruccion, best_params, 'reconstruccion_results.csv')
341
+
342
+ print("\nEvaluando LSTM...")
343
+ ml_results = evaluate_models(modelos_machine_learning, best_params, 'ml_results.csv', include_lstm=True)
tests/__init__.py ADDED
File without changes