tsadmetrics 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {tsadmetrics-0.1.5/tsadmetrics.egg-info → tsadmetrics-0.1.6}/PKG-INFO +1 -1
  2. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2html.py +1 -1
  3. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2html4.py +1 -1
  4. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2html5.py +1 -1
  5. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2latex.py +1 -1
  6. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2man.py +1 -1
  7. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2odt.py +1 -1
  8. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2odt_prepstyles.py +1 -1
  9. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2pseudoxml.py +1 -1
  10. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2s5.py +1 -1
  11. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2xetex.py +1 -1
  12. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2xml.py +1 -1
  13. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rstpep2html.py +1 -1
  14. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/pyproject.toml +1 -1
  15. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/utils.py +10 -4
  16. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6/tsadmetrics.egg-info}/PKG-INFO +1 -1
  17. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics.egg-info/SOURCES.txt +0 -4
  18. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics.egg-info/top_level.txt +0 -1
  19. tsadmetrics-0.1.5/experiments/scripts/compute_metrics.py +0 -187
  20. tsadmetrics-0.1.5/experiments/scripts/metrics_complexity_analysis.py +0 -109
  21. tsadmetrics-0.1.5/experiments/scripts/metro_experiment.py +0 -133
  22. tsadmetrics-0.1.5/experiments/scripts/opt_metro_experiment.py +0 -343
  23. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/MANIFEST.in +0 -0
  24. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/README.md +0 -0
  25. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/activate_this.py +0 -0
  26. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/setup.cfg +0 -0
  27. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/setup.py +0 -0
  28. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tests/__init__.py +0 -0
  29. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tests/test_binary.py +0 -0
  30. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tests/test_non_binary.py +0 -0
  31. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/__init__.py +0 -0
  32. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/__init__.py +0 -0
  33. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/affiliation/__init__.py +0 -0
  34. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +0 -0
  35. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/affiliation/_integral_interval.py +0 -0
  36. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +0 -0
  37. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/affiliation/generics.py +0 -0
  38. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/affiliation/metrics.py +0 -0
  39. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/auc_roc_pr_plot.py +0 -0
  40. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/discontinuity_graph.py +0 -0
  41. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +0 -0
  42. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +0 -0
  43. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +0 -0
  44. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/__init__.py +0 -0
  45. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
  46. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +0 -0
  47. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +0 -0
  48. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/latency_sparsity_aware.py +0 -0
  49. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/metrics.py +0 -0
  50. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/nabscore.py +0 -0
  51. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/__init__.py +0 -0
  52. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
  53. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +0 -0
  54. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +0 -0
  55. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
  56. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +0 -0
  57. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +0 -0
  58. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +0 -0
  59. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +0 -0
  60. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/tests.py +0 -0
  61. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/threshold_plt.py +0 -0
  62. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/time_tolerant.py +0 -0
  63. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/_tsadeval/vus_utils.py +0 -0
  64. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/binary_metrics.py +0 -0
  65. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/metric_utils.py +0 -0
  66. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/non_binary_metrics.py +0 -0
  67. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/py.typed +0 -0
  68. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics.egg-info/dependency_links.txt +0 -0
  69. {tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics.egg-info/requires.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tsadmetrics
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Librería para evaluación de detección de anomalías en series temporales
5
5
  Home-page: https://github.com/pathsko/TSADmetrics
6
6
  Author: Pedro Rafael Velasco Priego
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2html.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: David Goodger <goodger@python.org>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2html4.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: David Goodger <goodger@python.org>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
  # :Copyright: © 2015 Günter Milde.
3
3
  # :License: Released under the terms of the `2-Clause BSD license`_, in short:
4
4
  #
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2latex.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: David Goodger <goodger@python.org>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # Author:
4
4
  # Contact: grubert@users.sf.net
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2odt.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: Dave Kuhlman <dkuhlman@rexx.com>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # Copyright: This module has been placed in the public domain.
4
4
 
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2pseudoxml.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: David Goodger <goodger@python.org>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2s5.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: Chris Liechti <cliechti@gmx.net>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2xetex.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: Guenter Milde
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rst2xml.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: David Goodger <goodger@python.org>
@@ -1,4 +1,4 @@
1
- #!/home/linux/Documentos/TSADmetrics/entorno/bin/python
1
+ #!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
2
2
 
3
3
  # $Id: rstpep2html.py 9115 2022-07-28 17:06:24Z milde $
4
4
  # Author: David Goodger <goodger@python.org>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tsadmetrics"
3
- version = "0.1.5"
3
+ version = "0.1.6"
4
4
  description = "Librería para evaluación de detección de anomalías en series temporales"
5
5
  authors = [
6
6
  { name = "Pedro Rafael Velasco Priego", email = "i12veprp@uco.es" }
@@ -1,7 +1,8 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
+ import time
3
4
 
4
- def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_params: dict, is_anomaly_score = False):
5
+ def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_params: dict, is_anomaly_score = False, verbose = False):
5
6
  """
6
7
  Computes the specified metrics for the given true and predicted values.
7
8
 
@@ -11,7 +12,7 @@ def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_par
11
12
  - metrics (list): List of metric names to compute.
12
13
  - metrics_params (dict): Dictionary of parameters for each metric.
13
14
  - is_anomaly_score (bool): Flag indicating if y_true and y_pred are anomaly scores. Otherwise, they are treated as binary labels.
14
-
15
+ - verbose (bool): Flag to print additional information.
15
16
  Returns:
16
17
  - metrics_df (DataFrame): DataFrame containing the computed metrics and their values.
17
18
  """
@@ -29,9 +30,14 @@ def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_par
29
30
  for metric in metrics:
30
31
  metric_name = metric[0]
31
32
  metric_func = metric[1]
32
-
33
+ if verbose:
34
+ print(f"Calculating metric: {metric_name}")
35
+ t0 = time.time()
33
36
  metric_value = metric_func(y_true, y_pred, **metrics_params.get(metric_name, {}))
34
-
37
+ if verbose:
38
+ t1 = time.time()
39
+ print(f"Metric {metric_name} calculated in {t1 - t0:.4f} seconds")
40
+ print(f"Metric {metric_name} value: {metric_value}")
35
41
  # Store the result in the DataFrame
36
42
  results[metric_name] = metric_value
37
43
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tsadmetrics
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Librería para evaluación de detección de anomalías en series temporales
5
5
  Home-page: https://github.com/pathsko/TSADmetrics
6
6
  Author: Pedro Rafael Velasco Priego
@@ -15,10 +15,6 @@ entorno/bin/rst2s5.py
15
15
  entorno/bin/rst2xetex.py
16
16
  entorno/bin/rst2xml.py
17
17
  entorno/bin/rstpep2html.py
18
- experiments/scripts/compute_metrics.py
19
- experiments/scripts/metrics_complexity_analysis.py
20
- experiments/scripts/metro_experiment.py
21
- experiments/scripts/opt_metro_experiment.py
22
18
  tests/__init__.py
23
19
  tests/test_binary.py
24
20
  tests/test_non_binary.py
@@ -1,4 +1,3 @@
1
1
  entorno
2
- experiments
3
2
  tests
4
3
  tsadmetrics
@@ -1,187 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import time
4
- import os
5
- from tsadmetrics import *
6
-
7
- def cargar_prediccion(modelo_nombre):
8
- """
9
- Carga las predicciones guardadas previamente.
10
- """
11
- nombre_archivo = f'../results/predictions/{modelo_nombre}_pred.csv'
12
- resultados = pd.read_csv(nombre_archivo)
13
-
14
- y_true = resultados['ground_truth'].values
15
- y_pred_binary = resultados['prediction_binary'].values
16
- y_pred_continuous = resultados['prediction_continuous'].values
17
- return y_true, y_pred_binary, y_pred_continuous
18
-
19
- # Lista de modelos y métricas
20
- nombre_modelos = ['CBLOF', 'IForest', 'KNN', 'LOF','AE1SVM','AutoEncoder','LSTM']
21
- metrics = [
22
- ('point_wise_f_score', point_wise_f_score),
23
- ('point_adjusted_f_score', point_adjusted_f_score),
24
- ('delay_th_point_adjusted_f_score', delay_th_point_adjusted_f_score),
25
- ('point_adjusted_at_k_f_score', point_adjusted_at_k_f_score),
26
- ('latency_sparsity_aw_f_score', latency_sparsity_aw_f_score),
27
- ('segment_wise_f_score', segment_wise_f_score),
28
- ('composite_f_score', composite_f_score),
29
- ('time_tolerant_f_score', time_tolerant_f_score),
30
- ('range_based_f_score',range_based_f_score),
31
- ('ts_aware_f_score',ts_aware_f_score),
32
- ('enhanced_ts_aware_f_score', enhanced_ts_aware_f_score),
33
- ('affiliation_based_f_score', affiliation_based_f_score),
34
- ('nab_score', nab_score),
35
- ('temporal_distance', temporal_distance),
36
- ('average_detection_count', average_detection_count),
37
- ('absolute_detection_distance',absolute_detection_distance),
38
- ('total_detected_in_range',total_detected_in_range),
39
- ('detection_accuracy_in_range',detection_accuracy_in_range),
40
- ('weighted_detection_difference',weighted_detection_difference),
41
- ('binary_pate', binary_pate),
42
- ('mean_time_to_detect', mean_time_to_detect),
43
- ]
44
-
45
- metrics_params = {
46
- 'delay_th_point_adjusted_f_score': {'k': 10},
47
- 'point_adjusted_at_k_f_score': {'k': 0.7},
48
- 'latency_sparsity_aw_f_score': {'ni': 2},
49
- 'time_tolerant_f_score': {'t': 30},
50
- 'range_based_f_score': {'p_alpha': 0, 'r_alpha':0},
51
- 'ts_aware_f_score': {'theta': 0.5, 'alpha':0.5, 'delta': 0, 'beta':1},
52
- 'enhanced_ts_aware_f_score': {'beta':1,'theta_p': 0.5, 'theta_r':0.1},
53
- 'total_detected_in_range': {'k': 30},
54
- 'detection_accuracy_in_range': {'k': 30},
55
- 'weighted_detection_difference':{'k': 30},
56
- 'binary_pate': {'early': 20, 'delay': 20}
57
- }
58
-
59
- # Crear directorio si no existe
60
- os.makedirs('../results/computed_metrics', exist_ok=True)
61
-
62
- # Rutas de los archivos
63
- results_path = '../results/computed_metrics/resultados.csv'
64
- times_path = '../results/computed_metrics/tiempos.csv'
65
-
66
- # Intentar cargar resultados existentes o crear nuevos DataFrames
67
- try:
68
- all_results_df = pd.read_csv(results_path)
69
- all_times_df = pd.read_csv(times_path)
70
- print("Cargados resultados previos encontrados.")
71
- except:
72
- all_results_df = pd.DataFrame(columns=['modelo'] + [m[0] for m in metrics])
73
- all_times_df = pd.DataFrame(columns=['modelo', 'metrica', 'tiempo'])
74
- print("No se encontraron resultados previos, comenzando desde cero.")
75
-
76
- # Función para guardar el progreso
77
- def guardar_progreso():
78
- all_results_df.to_csv(results_path, index=False)
79
- all_times_df.to_csv(times_path, index=False)
80
- print(f"\nProgreso guardado a las {time.strftime('%H:%M:%S')}")
81
-
82
- # Tiempo de inicio y última vez que se guardó
83
- start_time = time.time()
84
- last_save_time = start_time
85
-
86
- # Bucle principal de cálculo
87
- for modelo in nombre_modelos:
88
- # Verificar si el modelo ya está completo en los resultados
89
- if modelo in all_results_df['modelo'].values:
90
- print(f"\nModelo {modelo} ya calculado, saltando...")
91
- continue
92
-
93
- print(f"\nComenzando cálculo para modelo: {modelo}")
94
-
95
- try:
96
- # Cargar predicciones
97
- y_true, y_pred, _ = cargar_prediccion(modelo)
98
-
99
- # Diccionario para almacenar resultados del modelo actual
100
- model_results = {'modelo': modelo}
101
- model_times = []
102
-
103
- for metric_name, metric_func in metrics:
104
- # Verificar si esta métrica ya está calculada para este modelo
105
- if not all_results_df.empty and modelo in all_results_df['modelo'].values:
106
- existing_row = all_results_df[all_results_df['modelo'] == modelo].iloc[0]
107
- if not pd.isna(existing_row[metric_name]):
108
- print(f"Métrica {metric_name} ya calculada para {modelo}, saltando...")
109
- continue
110
-
111
- print(f"Calculando métrica: {metric_name}, modelo: {modelo}...")
112
-
113
- try:
114
- # Calcular métrica y tiempo de ejecución
115
- start_metric_time = time.time()
116
-
117
- if metric_name in metrics_params:
118
- params = metrics_params[metric_name]
119
- metric_value = metric_func(y_true, y_pred, **params)
120
- else:
121
- metric_value = metric_func(y_true, y_pred)
122
-
123
- computation_time = time.time() - start_metric_time
124
-
125
- # Actualizar resultados
126
- model_results[metric_name] = metric_value
127
- model_times.append({
128
- 'modelo': modelo,
129
- 'metrica': metric_name,
130
- 'tiempo': computation_time
131
- })
132
-
133
- print(f"Valor: {metric_value:.4f}, tiempo: {computation_time:.4f}s")
134
-
135
- # Guardar progreso cada hora
136
- current_time = time.time()
137
- if current_time - last_save_time > 3600: # 3600 segundos = 1 hora
138
- # Añadir resultados parciales
139
- if modelo not in all_results_df['modelo'].values:
140
- all_results_df = pd.concat([all_results_df, pd.DataFrame([model_results])], ignore_index=True)
141
- else:
142
- idx = all_results_df.index[all_results_df['modelo'] == modelo][0]
143
- all_results_df.loc[idx, metric_name] = metric_value
144
-
145
- all_times_df = pd.concat([all_times_df, pd.DataFrame(model_times)], ignore_index=True)
146
- model_times = [] # Resetear tiempos para no duplicar
147
-
148
- guardar_progreso()
149
- last_save_time = current_time
150
-
151
- except Exception as e:
152
- print(f"Error calculando {metric_name} para {modelo}: {str(e)}")
153
- model_results[metric_name] = np.nan
154
- model_times.append({
155
- 'modelo': modelo,
156
- 'metrica': metric_name,
157
- 'tiempo': np.nan
158
- })
159
-
160
- # Añadir resultados completos del modelo a los DataFrames principales
161
- if modelo not in all_results_df['modelo'].values:
162
- all_results_df = pd.concat([all_results_df, pd.DataFrame([model_results])], ignore_index=True)
163
- else:
164
- # Actualizar fila existente
165
- idx = all_results_df.index[all_results_df['modelo'] == modelo][0]
166
- for metric_name in model_results:
167
- if metric_name != 'modelo':
168
- all_results_df.loc[idx, metric_name] = model_results[metric_name]
169
-
170
- all_times_df = pd.concat([all_times_df, pd.DataFrame(model_times)], ignore_index=True)
171
-
172
- # Guardar después de completar cada modelo
173
- guardar_progreso()
174
- last_save_time = time.time()
175
-
176
- except Exception as e:
177
- print(f"Error procesando modelo {modelo}: {str(e)}")
178
- # Añadir filas con NaN para este modelo
179
- model_results = {'modelo': modelo}
180
- for m in metrics:
181
- model_results[m[0]] = np.nan
182
- all_results_df = pd.concat([all_results_df, pd.DataFrame([model_results])], ignore_index=True)
183
- guardar_progreso()
184
-
185
- # Guardar resultados finales
186
- guardar_progreso()
187
- print("\nProceso completado. Resultados finales guardados.")
@@ -1,109 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import time
4
- import os
5
- from tsadmetrics import *
6
-
7
- # Lista de modelos y métricas
8
- binary_metrics = [
9
- ('point_wise_f_score', point_wise_f_score),
10
- ('point_adjusted_f_score', point_adjusted_f_score),
11
- ('delay_th_point_adjusted_f_score', delay_th_point_adjusted_f_score),
12
- ('point_adjusted_at_k_f_score', point_adjusted_at_k_f_score),
13
- ('latency_sparsity_aw_f_score', latency_sparsity_aw_f_score),
14
- ('segment_wise_f_score', segment_wise_f_score),
15
- ('composite_f_score', composite_f_score),
16
- ('time_tolerant_f_score', time_tolerant_f_score),
17
- ('range_based_f_score',range_based_f_score),
18
- ('ts_aware_f_score',ts_aware_f_score),
19
- ('enhanced_ts_aware_f_score', enhanced_ts_aware_f_score),
20
- ('affiliation_based_f_score', affiliation_based_f_score),
21
- ('nab_score', nab_score),
22
- ('temporal_distance', temporal_distance),
23
- ('average_detection_count', average_detection_count),
24
- ('absolute_detection_distance',absolute_detection_distance),
25
- ('total_detected_in_range',total_detected_in_range),
26
- ('detection_accuracy_in_range',detection_accuracy_in_range),
27
- ('weighted_detection_difference',weighted_detection_difference),
28
- ('binary_pate', binary_pate),
29
- ('mean_time_to_detect', mean_time_to_detect),
30
-
31
- ]
32
- binary_metrics_params ={
33
- 'delay_th_point_adjusted_f_score': {'k': 10},
34
- 'point_adjusted_at_k_f_score': {'k': 0.7},
35
- 'latency_sparsity_aw_f_score': {'ni': 2},
36
- 'time_tolerant_f_score': {'t': 30},
37
- 'range_based_f_score': {'p_alpha': 0, 'r_alpha':0}, #Valor por defecto
38
- 'ts_aware_f_score': {'theta': 0.5, 'alpha':0.5, 'delta': 0, 'beta':1}, #Valor por defecto
39
- 'enhanced_ts_aware_f_score': {'beta':1,'theta_p': 0.5, 'theta_r':0.1}, #Valor por defecto
40
- 'total_detected_in_range': {'k': 30},
41
- 'detection_accuracy_in_range': {'k': 30},
42
- 'weighted_detection_difference':{'k': 3},
43
- 'binary_pate': {'early': 20, 'delay': 20}
44
-
45
- }
46
-
47
- continuous_metrics = [
48
- ('precision_at_k', precision_at_k),
49
- ('auc_roc_pw', auc_roc_pw),
50
- ('auc_pr_pw', auc_pr_pw),
51
- ('auc_pr_pa', auc_pr_pa),
52
- ('auc_pr_sw', auc_pr_sw),
53
- ('vus_roc', vus_roc),
54
- ('vus_pr', vus_pr),
55
- ('real_pate', real_pate)]
56
-
57
- continuous_metrics_params ={
58
- 'vus_roc': {'window': 4},
59
- 'vus_pr': {'window': 4},
60
- 'real_pate': {'early': 3, 'delay': 3},
61
-
62
- }
63
- SIZE = 1000
64
- # Cargar predicciones
65
- y_true, y_pred = np.random.choice([0, 1], size=SIZE), np.random.choice([0, 1], size=SIZE)
66
-
67
- Binary_mode = 1
68
-
69
- if Binary_mode == 0:
70
- for metric_name, metric_func in binary_metrics:
71
-
72
- # Calcular métrica y tiempo de ejecución
73
-
74
-
75
- start_time = time.time()
76
- if metric_name in binary_metrics_params:
77
- params = binary_metrics_params[metric_name]
78
- metric_value = metric_func(y_true, y_pred, **params)
79
- else:
80
- metric_value = metric_func(y_true, y_pred)
81
- computation_time = time.time() - start_time
82
-
83
-
84
-
85
- print(f"Métrica: {metric_name} - Valor: {metric_value:.4f} - Tiempo: {computation_time:.4f}s")
86
-
87
- else:
88
- y_true = np.random.choice([0, 1], size=SIZE)
89
- y_pred = np.random.rand(SIZE) # Predicciones continuas
90
-
91
- # Calcular métrica y tiempo de ejecución
92
-
93
- for metric_name, metric_func in continuous_metrics:
94
-
95
- start_time = time.time()
96
- if metric_name in continuous_metrics_params:
97
- params = continuous_metrics_params[metric_name]
98
- metric_value = metric_func(y_true, y_pred, **params)
99
- else:
100
- metric_value = metric_func(y_true, y_pred)
101
- computation_time = time.time() - start_time
102
-
103
-
104
-
105
- print(f"Métrica: {metric_name} - Valor: {metric_value:.4f} - Tiempo: {computation_time:.4f}s")
106
-
107
-
108
-
109
-
@@ -1,133 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- import tsadmetrics as tm
6
- import time
7
- from sklearn.metrics import f1_score
8
-
9
- from sklearn.model_selection import train_test_split
10
- from sklearn.preprocessing import MinMaxScaler
11
-
12
-
13
- df_analog = pd.read_csv('../preprocessed_data/MetroPT3_analogic.csv')
14
- df_analog = pd.DataFrame(df_analog).set_index('timestamp')
15
-
16
- # Separar las características (X) y la variable objetivo (y)
17
- X = df_analog.drop(columns='anomaly') # Características
18
- y = df_analog['anomaly'] # Variable objetivo
19
-
20
- # Normalizar las características entre [0, 1]
21
- scaler = MinMaxScaler(feature_range=(0, 1))
22
- X_normalized = scaler.fit_transform(X)
23
-
24
- # Convertir el resultado normalizado de nuevo a un DataFrame
25
- X_normalized = pd.DataFrame(X_normalized, columns=X.columns, index=X.index)
26
-
27
- # Dividir el conjunto de datos normalizado en entrenamiento y prueba
28
- train_df_analog, test_df_analog = train_test_split(
29
- X_normalized.join(y), # Unir las características normalizadas con la variable objetivo
30
- test_size=0.4,
31
- random_state=42
32
- )
33
-
34
- X_train_analog = train_df_analog.drop(columns='anomaly')
35
- y_train_analog = train_df_analog['anomaly']
36
- X_test_analog = test_df_analog.drop(columns='anomaly')
37
- y_test_analog = test_df_analog['anomaly']
38
-
39
-
40
- #Modelos basados en distancia
41
- from pyod.models.lof import LOF
42
- from pyod.models.cblof import CBLOF
43
- from pyod.models.knn import KNN
44
- from pyod.models.abod import ABOD
45
-
46
- modelos_distancia = [
47
- LOF(n_neighbors=35, contamination=np.sum(y_train_analog)/len(y_train_analog), n_jobs=-1),
48
- #COF(contamination=np.sum(y_train_analog)/len(y_train_analog),method='memory'),
49
- CBLOF(contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1),
50
- KNN(n_neighbors=35, contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1),
51
- ABOD(contamination=np.sum(y_train_analog)/len(y_train_analog))
52
- ]
53
-
54
- #Modelos basados en árboles de aislamiento
55
- from pyod.models.iforest import IForest
56
- modelos_arboles = [
57
- IForest(contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1, random_state=42)
58
- ]
59
-
60
- #Modelos basados en Reconstrucción
61
- from pyod.models.ae1svm import AE1SVM
62
- from pyod.models.alad import ALAD
63
- from pyod.models.auto_encoder import AutoEncoder
64
-
65
- modelos_reconstruccion = [
66
- AE1SVM(contamination=np.sum(y_train_analog)/len(y_train_analog)),
67
- ALAD(contamination=np.sum(y_train_analog)/len(y_train_analog)),
68
- AutoEncoder(contamination=np.sum(y_train_analog)/len(y_train_analog))
69
- ]
70
-
71
-
72
-
73
- #ejecucion de los modelos
74
-
75
- distancia_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
76
- for modelo in modelos_distancia:
77
-
78
- nombre_modelo = modelo.__class__.__name__
79
-
80
- inicio = time.time()
81
- try:
82
- modelo.fit(X_train_analog)
83
- t = time.time() - inicio
84
- y_pred = modelo.predict(X_test_analog)
85
- f1 = f1_score(y_test_analog, y_pred)
86
- sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
87
- print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
88
- except Exception as e:
89
- print(f'Error en el modelo {nombre_modelo}: {e}')
90
- # Añadir los resultados al DataFrame
91
- distancia_results.loc[len(distancia_results)] = [nombre_modelo, f1, sw_f1, t]
92
-
93
- distancia_results.to_csv('../results/distancia_results.csv')
94
-
95
- arbol_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
96
- for modelo in modelos_arboles:
97
-
98
- nombre_modelo = modelo.__class__.__name__
99
-
100
- inicio = time.time()
101
- try:
102
- modelo.fit(X_train_analog)
103
- t = time.time() - inicio
104
- y_pred = modelo.predict(X_test_analog)
105
- f1 = f1_score(y_test_analog, y_pred)
106
- sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
107
- print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
108
- except Exception as e:
109
- print(f'Error en el modelo {nombre_modelo}: {e}')
110
- # Añadir los resultados al DataFrame
111
- arbol_results.loc[len(arbol_results)] = [nombre_modelo, f1, sw_f1, t]
112
-
113
- arbol_results.to_csv('../results/arbol_results.csv')
114
-
115
- reconstruccion_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
116
- for modelo in modelos_reconstruccion:
117
-
118
- nombre_modelo = modelo.__class__.__name__
119
-
120
- inicio = time.time()
121
- try:
122
- modelo.fit(X_train_analog[y_train_analog == 0])
123
- t = time.time() - inicio
124
- y_pred = modelo.predict(X_test_analog)
125
- f1 = f1_score(y_test_analog, y_pred)
126
- sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
127
- print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
128
- except Exception as e:
129
- print(f'Error en el modelo {nombre_modelo}: {e}')
130
- # Añadir los resultados al DataFrame
131
- reconstruccion_results.loc[len(reconstruccion_results)] = [nombre_modelo, f1, sw_f1, t]
132
-
133
- reconstruccion_results.to_csv('../results/reconstruccion_results.csv')
@@ -1,343 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- import tsadmetrics as tm
6
- import time
7
- from sklearn.metrics import f1_score, recall_score, precision_score
8
- import optuna
9
- from optuna.samplers import TPESampler
10
- from functools import partial
11
- import warnings
12
- warnings.filterwarnings('ignore')
13
- from sklearn.model_selection import train_test_split
14
- from sklearn.preprocessing import MinMaxScaler
15
- import os
16
- import json
17
- import torch
18
- import torch.nn as nn
19
- from torch.utils.data import DataLoader, TensorDataset
20
- from sklearn.metrics import classification_report
21
-
22
- def simplify_dataset(
23
- data: pd.DataFrame,
24
- window_size: int = 2,
25
- time_col: str = None,
26
- anomaly_col: str = 'anomaly',
27
- agg_func: str = 'mean'
28
- ) -> pd.DataFrame:
29
- """
30
- Reduce un dataset aplicando agregación en ventanas temporales.
31
- """
32
- simplified_data = data.rolling(window_size, step=window_size).mean()
33
- simplified_data = simplified_data.dropna()
34
- simplified_data[anomaly_col] = (simplified_data[anomaly_col] > 0.1).astype(int)
35
- return simplified_data.reset_index(drop=True)
36
-
37
- # Configuración inicial para PyTorch
38
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
39
- print(f"Using device: {device}")
40
-
41
- def guardar_prediccion(modelo_nombre, y_true, y_pred_binario, y_pred_continuo=None, timestamps=None):
42
- """
43
- Guarda las predicciones ordenadas por timestamp.
44
- """
45
- os.makedirs('../results/predictions', exist_ok=True)
46
-
47
- resultados = pd.DataFrame({
48
- 'timestamp': timestamps if timestamps is not None else np.arange(len(y_true)),
49
- 'ground_truth': np.array(y_true).flatten(),
50
- 'prediction_binary': np.array(y_pred_binario).flatten()
51
- })
52
-
53
- if y_pred_continuo is not None:
54
- resultados['prediction_continuous'] = np.array(y_pred_continuo).flatten()
55
-
56
- if 'timestamp' in resultados.columns:
57
- resultados = resultados.sort_values('timestamp')
58
-
59
- nombre_archivo = f'../results/predictions/{modelo_nombre}_pred.csv'
60
- resultados.to_csv(nombre_archivo, index=False)
61
- return nombre_archivo
62
-
63
- # -------------------------------
64
- # Cargar y preparar datos
65
- df_analog = pd.read_csv('../preprocessed_data/MetroPT3_analogic.csv')
66
- df_analog = pd.DataFrame(df_analog).set_index('timestamp')
67
- df_analog = df_analog.sort_index()
68
-
69
- # Reducción de tamaño
70
- print('Tamaño inicial del dataset:', df_analog.shape)
71
- print(f'Proporción de anomalías: {df_analog["anomaly"].mean():.2f}')
72
- df_analog = simplify_dataset(df_analog, window_size=10, time_col='timestamp')
73
- print('Tamaño del dataset:', df_analog.shape)
74
- print(f'Proporción de anomalías: {df_analog["anomaly"].mean():.2f}')
75
-
76
- # Separar y normalizar datos
77
- X = df_analog.drop(columns='anomaly')
78
- y = df_analog['anomaly']
79
- scaler = MinMaxScaler(feature_range=(0, 1))
80
- X_normalized = scaler.fit_transform(X)
81
- X_normalized = pd.DataFrame(X_normalized, columns=X.columns, index=X.index)
82
-
83
- # -------------------------------
84
- # DIVISIÓN DE DATOS
85
- # 1. Para modelos no-LSTM (shuffle=True)
86
- train_df_shuf, test_df_shuf = train_test_split(
87
- X_normalized.join(y),
88
- test_size=0.4,
89
- random_state=42,
90
- shuffle=True
91
- )
92
-
93
- # 2. Para LSTM (shuffle=False para mantener orden temporal)
94
- train_df_noshuf, test_df_noshuf = train_test_split(
95
- X_normalized.join(y),
96
- test_size=0.4,
97
- random_state=42,
98
- shuffle=False
99
- )
100
-
101
- # Preparar datos para modelos no-LSTM
102
- X_train_shuf = train_df_shuf.drop(columns='anomaly')
103
- y_train_shuf = train_df_shuf['anomaly']
104
- X_test_shuf = test_df_shuf.drop(columns='anomaly')
105
- y_test_shuf = test_df_shuf['anomaly']
106
-
107
- # Preparar datos para LSTM
108
- X_train_noshuf = train_df_noshuf.drop(columns='anomaly')
109
- y_train_noshuf = train_df_noshuf['anomaly']
110
- X_test_noshuf = test_df_noshuf.drop(columns='anomaly')
111
- y_test_noshuf = test_df_noshuf['anomaly']
112
-
113
- contamination = np.sum(y_train_shuf)/len(y_train_shuf)
114
-
115
- # -------------------------------
116
- # Definición del modelo LSTM
117
- class AnomalyLSTM(nn.Module):
118
- def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
119
- super(AnomalyLSTM, self).__init__()
120
- self.lstm = nn.LSTM(
121
- input_size=input_size,
122
- hidden_size=hidden_size,
123
- num_layers=num_layers,
124
- batch_first=True,
125
- dropout=dropout if num_layers > 1 else 0
126
- )
127
- self.fc = nn.Linear(hidden_size, 1)
128
- self.sigmoid = nn.Sigmoid()
129
-
130
- def forward(self, x):
131
- lstm_out, _ = self.lstm(x)
132
- last_time_step = lstm_out[:, -1, :]
133
- output = self.fc(last_time_step)
134
- return self.sigmoid(output)
135
-
136
- def train_lstm(X_train, y_train, X_test, y_test, timestamps_test, sequence_length=10, epochs=20, batch_size=16):
137
- # Preparar datos secuenciales
138
- def create_sequences(data, targets, seq_length):
139
- xs, ys = [], []
140
- for i in range(len(data)-seq_length):
141
- xs.append(data[i:(i+seq_length)])
142
- ys.append(targets[i+seq_length])
143
- return np.array(xs), np.array(ys)
144
-
145
- X_train_seq, y_train_seq = create_sequences(X_train.values, y_train.values, sequence_length)
146
- X_test_seq, y_test_seq = create_sequences(X_test.values, y_test.values, sequence_length)
147
-
148
- # Convertir a tensores PyTorch
149
- train_data = TensorDataset(
150
- torch.FloatTensor(X_train_seq),
151
- torch.FloatTensor(y_train_seq).unsqueeze(1)
152
- )
153
- test_data = TensorDataset(
154
- torch.FloatTensor(X_test_seq),
155
- torch.FloatTensor(y_test_seq).unsqueeze(1)
156
- )
157
-
158
- # IMPORTANTE: shuffle=False para DataLoader
159
- train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
160
- test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
161
-
162
- # Inicializar modelo
163
- model = AnomalyLSTM(input_size=X_train.shape[1]).to(device)
164
- criterion = nn.BCELoss()
165
- optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
166
-
167
- # Entrenamiento
168
- train_start = time.time()
169
- for epoch in range(epochs):
170
- model.train()
171
- for batch_x, batch_y in train_loader:
172
- batch_x, batch_y = batch_x.to(device), batch_y.to(device)
173
- optimizer.zero_grad()
174
- outputs = model(batch_x)
175
- loss = criterion(outputs, batch_y)
176
- loss.backward()
177
- optimizer.step()
178
-
179
- # Evaluación
180
- model.eval()
181
- test_preds, test_true, test_scores = [], [], []
182
- with torch.no_grad():
183
- for batch_x, batch_y in test_loader:
184
- batch_x, batch_y = batch_x.to(device), batch_y.to(device)
185
- outputs = model(batch_x)
186
- predicted = (outputs > 0.5).float()
187
- test_preds.extend(predicted.cpu().numpy())
188
- test_scores.extend(outputs.cpu().numpy())
189
- test_true.extend(batch_y.cpu().numpy())
190
-
191
- train_time = time.time() - train_start
192
-
193
- # Ajustar predicciones al tamaño original y ordenar por timestamp
194
- full_preds = np.concatenate([np.zeros(sequence_length), np.array(test_preds).flatten()])
195
- full_scores = np.concatenate([np.zeros(sequence_length), np.array(test_scores).flatten()])
196
- full_preds = full_preds[:len(y_test)]
197
- full_scores = full_scores[:len(y_test)]
198
-
199
- # Crear DataFrame con timestamps para ordenar
200
- pred_df = pd.DataFrame({
201
- 'timestamp': timestamps_test[-len(full_preds):],
202
- 'y_true': y_test[-len(full_preds):],
203
- 'y_pred': full_preds,
204
- 'y_scores': full_scores
205
- }).sort_values('timestamp')
206
-
207
- # Calcular métricas ordenadas
208
- f1 = f1_score(pred_df['y_true'], pred_df['y_pred'])
209
- sw_f1 = tm.segment_wise_f_score(pred_df['y_true'], pred_df['y_pred'])
210
-
211
- guardar_prediccion("LSTM", pred_df['y_true'], pred_df['y_pred'], pred_df['y_scores'], pred_df['timestamp'])
212
-
213
- return model, f1, sw_f1, train_time, pred_df['y_pred']
214
-
215
- # [Resto de las funciones (objective, optimize_model, evaluate_models) permanecen iguales...]
216
-
217
- # -------------------------------
218
- # Modelos
219
- from pyod.models.lof import LOF
220
- from pyod.models.cblof import CBLOF
221
- from pyod.models.knn import KNN
222
- from pyod.models.iforest import IForest
223
- from pyod.models.ae1svm import AE1SVM
224
- from pyod.models.auto_encoder import AutoEncoder
225
-
226
- modelos_distancia = [LOF, CBLOF, KNN]
227
- modelos_arboles = [IForest]
228
- modelos_machine_learning = []
229
- modelos_reconstruccion = [AE1SVM, AutoEncoder]
230
-
231
- # -------------------------------
232
- # Función evaluate_models modificada
233
- # -------------------------------
234
- # Función evaluate_models modificada para ordenar todas las predicciones
235
- def evaluate_models(model_classes, best_params_dict, results_filename, include_lstm=False):
236
- results_df = pd.DataFrame(columns=[
237
- 'nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento', 'best_params'
238
- ])
239
-
240
- # Evaluar modelos no-LSTM (usando datos con shuffle)
241
- for model_class in model_classes:
242
- nombre_modelo = model_class.__name__
243
- params = best_params_dict.get(nombre_modelo, {})
244
- params['contamination'] = contamination
245
-
246
- if model_class.__name__ in ['LOF', 'CBLOF', 'KNN', 'IForest']:
247
- params['n_jobs'] = -1
248
-
249
- inicio = time.time()
250
- try:
251
- model = model_class(**params)
252
-
253
- if nombre_modelo in ['AutoEncoder', 'AE1SVM']:
254
- model.fit(X_train_shuf[y_train_shuf == 0])
255
- else:
256
- model.fit(X_train_shuf)
257
-
258
- t = time.time() - inicio
259
- y_pred = model.predict(X_test_shuf)
260
- y_scores = model.decision_function(X_test_shuf) if hasattr(model, 'decision_function') else None
261
-
262
- # Crear DataFrame temporal con timestamps para ordenar
263
- pred_df = pd.DataFrame({
264
- 'timestamp': X_test_shuf.index,
265
- 'y_true': y_test_shuf,
266
- 'y_pred': y_pred,
267
- 'y_scores': y_scores if y_scores is not None else np.nan
268
- }).sort_values('timestamp')
269
-
270
- # Calcular métricas sobre datos ordenados
271
- f1 = f1_score(pred_df['y_true'], pred_df['y_pred'])
272
- sw_f1 = tm.segment_wise_f_score(pred_df['y_true'], pred_df['y_pred'])
273
-
274
- # Guardar predicciones ordenadas
275
- guardar_prediccion(
276
- nombre_modelo,
277
- pred_df['y_true'],
278
- pred_df['y_pred'],
279
- pred_df['y_scores'] if 'y_scores' in pred_df.columns else None,
280
- pred_df['timestamp']
281
- )
282
-
283
- print(f'Modelo: {nombre_modelo} - F1: {f1:.4f} - Segment-wise F1: {sw_f1:.4f} - Tiempo: {t:.2f}s')
284
-
285
- results_df.loc[len(results_df)] = [
286
- nombre_modelo, f1, sw_f1, t, json.dumps(params, ensure_ascii=False)
287
- ]
288
- except Exception as e:
289
- print(f'Error en el modelo {nombre_modelo}: {e}')
290
-
291
- # Evaluar LSTM (usando datos sin shuffle)
292
- if include_lstm:
293
- inicio_lstm = time.time()
294
- print("\nEntrenando modelo LSTM (sin shuffle)...")
295
-
296
- lstm_model, lstm_f1, lstm_sw_f1, lstm_time, lstm_preds = train_lstm(
297
- X_train_noshuf, y_train_noshuf,
298
- X_test_noshuf, y_test_noshuf,
299
- timestamps_test=X_test_noshuf.index
300
- )
301
-
302
- print(f'Modelo: LSTM - F1: {lstm_f1:.4f} - Segment-wise F1: {lstm_sw_f1:.4f} - Tiempo: {lstm_time:.2f}s')
303
-
304
- results_df.loc[len(results_df)] = [
305
- "LSTM", lstm_f1, lstm_sw_f1, lstm_time,
306
- json.dumps({
307
- "sequence_length": 10,
308
- "epochs": 20,
309
- "batch_size": 16,
310
- "hidden_size": 64,
311
- "num_layers": 2,
312
- "dropout": 0.2
313
- }, ensure_ascii=False)
314
- ]
315
-
316
- # Guardar resultados
317
- os.makedirs('../results', exist_ok=True)
318
- results_df.to_csv(f'../results/{results_filename}', index=False)
319
- print(f'Resultados guardados en {results_filename}')
320
- return results_df
321
-
322
- # -------------------------------
323
- # Parámetros y ejecución
324
- best_params = {
325
- 'LOF': {"n_neighbors": 62, "metric":"minkowski", "contamination":contamination, "n_jobs":-1},
326
- 'CBLOF': {"n_clusters": 8, "alpha": 0.87571, "beta": 6, "contamination":contamination, "n_jobs":-1},
327
- 'KNN': {"n_neighbors": 5, "method":"mean", "contamination":contamination, "n_jobs":-1},
328
- 'IForest': {'n_jobs':-1, "contamination":contamination},
329
- 'AutoEncoder': {},
330
- 'AE1SVM': {}
331
- }
332
-
333
- print("\nEvaluando modelos basados en distancia...")
334
- distancia_results = evaluate_models(modelos_distancia, best_params, 'distancia_results.csv')
335
-
336
- print("\nEvaluando modelos basados en árboles...")
337
- arbol_results = evaluate_models(modelos_arboles, best_params, 'arbol_results.csv')
338
-
339
- print("\nEvaluando modelos de reconstrucción...")
340
- reconstruccion_results = evaluate_models(modelos_reconstruccion, best_params, 'reconstruccion_results.csv')
341
-
342
- print("\nEvaluando LSTM...")
343
- ml_results = evaluate_models(modelos_machine_learning, best_params, 'ml_results.csv', include_lstm=True)
File without changes
File without changes
File without changes
File without changes