PyPI - tsadmetrics - Versions diffs - 0.1.5__tar.gz → 0.1.6__tar.gz - Mend

tsadmetrics 0.1.5tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

{tsadmetrics-0.1.5/tsadmetrics.egg-info → tsadmetrics-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tsadmetrics
-Version: 0.1.5
+Version: 0.1.6
 Summary: Librería para evaluación de detección de anomalías en series temporales
 Home-page: https://github.com/pathsko/TSADmetrics
 Author: Pedro Rafael Velasco Priego

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2html.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2html.py 9115 2022-07-28 17:06:24Z milde $
 # Author: David Goodger <goodger@python.org>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2html4.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2html4.py 9115 2022-07-28 17:06:24Z milde $
 # Author: David Goodger <goodger@python.org>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2html5.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # :Copyright: © 2015 Günter Milde.
 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
 #

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2latex.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2latex.py 9115 2022-07-28 17:06:24Z milde $
 # Author: David Goodger <goodger@python.org>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2man.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # Author:
 # Contact: grubert@users.sf.net

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2odt.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2odt.py 9115 2022-07-28 17:06:24Z milde $
 # Author: Dave Kuhlman <dkuhlman@rexx.com>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2odt_prepstyles.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # Copyright: This module has been placed in the public domain.

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2pseudoxml.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2pseudoxml.py 9115 2022-07-28 17:06:24Z milde $
 # Author: David Goodger <goodger@python.org>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2s5.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2s5.py 9115 2022-07-28 17:06:24Z milde $
 # Author: Chris Liechti <cliechti@gmx.net>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2xetex.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2xetex.py 9115 2022-07-28 17:06:24Z milde $
 # Author: Guenter Milde

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rst2xml.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rst2xml.py 9115 2022-07-28 17:06:24Z milde $
 # Author: David Goodger <goodger@python.org>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/entorno/bin/rstpep2html.py RENAMED Viewed

@@ -1,4 +1,4 @@
-#!/home/linux/Documentos/TSADmetrics/entorno/bin/python
+#!/home/linux/Documentos/TFG/TSADmetrics/entorno/bin/python
 # $Id: rstpep2html.py 9115 2022-07-28 17:06:24Z milde $
 # Author: David Goodger <goodger@python.org>

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "tsadmetrics"
-version = "0.1.5"
+version = "0.1.6"
 description = "Librería para evaluación de detección de anomalías en series temporales"
 authors = [
   { name = "Pedro Rafael Velasco Priego", email = "i12veprp@uco.es" }

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics/utils.py RENAMED Viewed

@@ -1,7 +1,8 @@
 import numpy as np
 import pandas as pd
+import time
-def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_params: dict, is_anomaly_score = False):
+def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_params: dict, is_anomaly_score = False, verbose = False):
     """
     Computes the specified metrics for the given true and predicted values.
@@ -11,7 +12,7 @@ def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_par
     - metrics (list): List of metric names to compute.
     - metrics_params (dict): Dictionary of parameters for each metric.
     - is_anomaly_score (bool): Flag indicating if y_true and y_pred are anomaly scores. Otherwise, they are treated as binary labels.
+    - verbose (bool): Flag to print additional information.
     Returns:
     - metrics_df (DataFrame): DataFrame containing the computed metrics and their values.
     """
@@ -29,9 +30,14 @@ def compute_metrics(y_true: np.array,y_pred: np.array,metrics: list, metrics_par
     for metric in metrics:
         metric_name = metric[0]
         metric_func = metric[1]
+        if verbose:
+            print(f"Calculating metric: {metric_name}")
+            t0 = time.time()
         metric_value = metric_func(y_true, y_pred, **metrics_params.get(metric_name, {}))
+        if verbose:
+            t1 = time.time()
+            print(f"Metric {metric_name} calculated in {t1 - t0:.4f} seconds")
+            print(f"Metric {metric_name} value: {metric_value}")
         # Store the result in the DataFrame
         results[metric_name] = metric_value

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6/tsadmetrics.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tsadmetrics
-Version: 0.1.5
+Version: 0.1.6
 Summary: Librería para evaluación de detección de anomalías en series temporales
 Home-page: https://github.com/pathsko/TSADmetrics
 Author: Pedro Rafael Velasco Priego

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics.egg-info/SOURCES.txt RENAMED Viewed

@@ -15,10 +15,6 @@ entorno/bin/rst2s5.py
 entorno/bin/rst2xetex.py
 entorno/bin/rst2xml.py
 entorno/bin/rstpep2html.py
-experiments/scripts/compute_metrics.py
-experiments/scripts/metrics_complexity_analysis.py
-experiments/scripts/metro_experiment.py
-experiments/scripts/opt_metro_experiment.py
 tests/__init__.py
 tests/test_binary.py
 tests/test_non_binary.py

{tsadmetrics-0.1.5 → tsadmetrics-0.1.6}/tsadmetrics.egg-info/top_level.txt RENAMED Viewed

@@ -1,4 +1,3 @@
 entorno
-experiments
 tests
 tsadmetrics

tsadmetrics-0.1.5/experiments/scripts/compute_metrics.py DELETED Viewed

@@ -1,187 +0,0 @@
-import pandas as pd
-import numpy as np
-import time
-import os
-from tsadmetrics import *
-def cargar_prediccion(modelo_nombre):
-    """
-    Carga las predicciones guardadas previamente.
-    """
-    nombre_archivo = f'../results/predictions/{modelo_nombre}_pred.csv'
-    resultados = pd.read_csv(nombre_archivo)
-    y_true = resultados['ground_truth'].values
-    y_pred_binary = resultados['prediction_binary'].values
-    y_pred_continuous = resultados['prediction_continuous'].values
-    return y_true, y_pred_binary, y_pred_continuous
-# Lista de modelos y métricas
-nombre_modelos = ['CBLOF', 'IForest', 'KNN', 'LOF','AE1SVM','AutoEncoder','LSTM']
-metrics = [
-    ('point_wise_f_score', point_wise_f_score),
-    ('point_adjusted_f_score', point_adjusted_f_score),
-    ('delay_th_point_adjusted_f_score', delay_th_point_adjusted_f_score),
-    ('point_adjusted_at_k_f_score', point_adjusted_at_k_f_score),
-    ('latency_sparsity_aw_f_score', latency_sparsity_aw_f_score),
-    ('segment_wise_f_score', segment_wise_f_score),
-    ('composite_f_score', composite_f_score),
-    ('time_tolerant_f_score', time_tolerant_f_score),
-    ('range_based_f_score',range_based_f_score),
-    ('ts_aware_f_score',ts_aware_f_score),
-    ('enhanced_ts_aware_f_score', enhanced_ts_aware_f_score),
-    ('affiliation_based_f_score', affiliation_based_f_score),
-    ('nab_score', nab_score),
-    ('temporal_distance', temporal_distance),
-    ('average_detection_count', average_detection_count),
-    ('absolute_detection_distance',absolute_detection_distance),
-    ('total_detected_in_range',total_detected_in_range),
-    ('detection_accuracy_in_range',detection_accuracy_in_range),
-    ('weighted_detection_difference',weighted_detection_difference),
-    ('binary_pate', binary_pate),
-    ('mean_time_to_detect', mean_time_to_detect),
-]
-metrics_params = {
-    'delay_th_point_adjusted_f_score': {'k': 10},
-    'point_adjusted_at_k_f_score': {'k': 0.7},
-    'latency_sparsity_aw_f_score': {'ni': 2},
-    'time_tolerant_f_score': {'t': 30},
-    'range_based_f_score': {'p_alpha': 0, 'r_alpha':0},
-    'ts_aware_f_score': {'theta': 0.5, 'alpha':0.5, 'delta': 0, 'beta':1},
-    'enhanced_ts_aware_f_score': {'beta':1,'theta_p': 0.5, 'theta_r':0.1},
-    'total_detected_in_range': {'k': 30},
-    'detection_accuracy_in_range': {'k': 30},
-    'weighted_detection_difference':{'k': 30},
-    'binary_pate': {'early': 20, 'delay': 20}
-}
-# Crear directorio si no existe
-os.makedirs('../results/computed_metrics', exist_ok=True)
-# Rutas de los archivos
-results_path = '../results/computed_metrics/resultados.csv'
-times_path = '../results/computed_metrics/tiempos.csv'
-# Intentar cargar resultados existentes o crear nuevos DataFrames
-try:
-    all_results_df = pd.read_csv(results_path)
-    all_times_df = pd.read_csv(times_path)
-    print("Cargados resultados previos encontrados.")
-except:
-    all_results_df = pd.DataFrame(columns=['modelo'] + [m[0] for m in metrics])
-    all_times_df = pd.DataFrame(columns=['modelo', 'metrica', 'tiempo'])
-    print("No se encontraron resultados previos, comenzando desde cero.")
-# Función para guardar el progreso
-def guardar_progreso():
-    all_results_df.to_csv(results_path, index=False)
-    all_times_df.to_csv(times_path, index=False)
-    print(f"\nProgreso guardado a las {time.strftime('%H:%M:%S')}")
-# Tiempo de inicio y última vez que se guardó
-start_time = time.time()
-last_save_time = start_time
-# Bucle principal de cálculo
-for modelo in nombre_modelos:
-    # Verificar si el modelo ya está completo en los resultados
-    if modelo in all_results_df['modelo'].values:
-        print(f"\nModelo {modelo} ya calculado, saltando...")
-        continue
-    print(f"\nComenzando cálculo para modelo: {modelo}")
-    try:
-        # Cargar predicciones
-        y_true, y_pred, _ = cargar_prediccion(modelo)
-        # Diccionario para almacenar resultados del modelo actual
-        model_results = {'modelo': modelo}
-        model_times = []
-        for metric_name, metric_func in metrics:
-            # Verificar si esta métrica ya está calculada para este modelo
-            if not all_results_df.empty and modelo in all_results_df['modelo'].values:
-                existing_row = all_results_df[all_results_df['modelo'] == modelo].iloc[0]
-                if not pd.isna(existing_row[metric_name]):
-                    print(f"Métrica {metric_name} ya calculada para {modelo}, saltando...")
-                    continue
-            print(f"Calculando métrica: {metric_name}, modelo: {modelo}...")
-            try:
-                # Calcular métrica y tiempo de ejecución
-                start_metric_time = time.time()
-                if metric_name in metrics_params:
-                    params = metrics_params[metric_name]
-                    metric_value = metric_func(y_true, y_pred, **params)
-                else:
-                    metric_value = metric_func(y_true, y_pred)
-                computation_time = time.time() - start_metric_time
-                # Actualizar resultados
-                model_results[metric_name] = metric_value
-                model_times.append({
-                    'modelo': modelo,
-                    'metrica': metric_name,
-                    'tiempo': computation_time
-                })
-                print(f"Valor: {metric_value:.4f}, tiempo: {computation_time:.4f}s")
-                # Guardar progreso cada hora
-                current_time = time.time()
-                if current_time - last_save_time > 3600:  # 3600 segundos = 1 hora
-                    # Añadir resultados parciales
-                    if modelo not in all_results_df['modelo'].values:
-                        all_results_df = pd.concat([all_results_df, pd.DataFrame([model_results])], ignore_index=True)
-                    else:
-                        idx = all_results_df.index[all_results_df['modelo'] == modelo][0]
-                        all_results_df.loc[idx, metric_name] = metric_value
-                    all_times_df = pd.concat([all_times_df, pd.DataFrame(model_times)], ignore_index=True)
-                    model_times = []  # Resetear tiempos para no duplicar
-                    guardar_progreso()
-                    last_save_time = current_time
-            except Exception as e:
-                print(f"Error calculando {metric_name} para {modelo}: {str(e)}")
-                model_results[metric_name] = np.nan
-                model_times.append({
-                    'modelo': modelo,
-                    'metrica': metric_name,
-                    'tiempo': np.nan
-                })
-        # Añadir resultados completos del modelo a los DataFrames principales
-        if modelo not in all_results_df['modelo'].values:
-            all_results_df = pd.concat([all_results_df, pd.DataFrame([model_results])], ignore_index=True)
-        else:
-            # Actualizar fila existente
-            idx = all_results_df.index[all_results_df['modelo'] == modelo][0]
-            for metric_name in model_results:
-                if metric_name != 'modelo':
-                    all_results_df.loc[idx, metric_name] = model_results[metric_name]
-        all_times_df = pd.concat([all_times_df, pd.DataFrame(model_times)], ignore_index=True)
-        # Guardar después de completar cada modelo
-        guardar_progreso()
-        last_save_time = time.time()
-    except Exception as e:
-        print(f"Error procesando modelo {modelo}: {str(e)}")
-        # Añadir filas con NaN para este modelo
-        model_results = {'modelo': modelo}
-        for m in metrics:
-            model_results[m[0]] = np.nan
-        all_results_df = pd.concat([all_results_df, pd.DataFrame([model_results])], ignore_index=True)
-        guardar_progreso()
-# Guardar resultados finales
-guardar_progreso()
-print("\nProceso completado. Resultados finales guardados.")

tsadmetrics-0.1.5/experiments/scripts/metrics_complexity_analysis.py DELETED Viewed

@@ -1,109 +0,0 @@
-import pandas as pd
-import numpy as np
-import time
-import os
-from tsadmetrics import *
-# Lista de modelos y métricas
-binary_metrics = [
-    ('point_wise_f_score', point_wise_f_score),
-    ('point_adjusted_f_score', point_adjusted_f_score),
-    ('delay_th_point_adjusted_f_score', delay_th_point_adjusted_f_score),
-    ('point_adjusted_at_k_f_score', point_adjusted_at_k_f_score),
-    ('latency_sparsity_aw_f_score', latency_sparsity_aw_f_score),
-    ('segment_wise_f_score', segment_wise_f_score),
-    ('composite_f_score', composite_f_score),
-    ('time_tolerant_f_score', time_tolerant_f_score),
-    ('range_based_f_score',range_based_f_score),
-    ('ts_aware_f_score',ts_aware_f_score),
-    ('enhanced_ts_aware_f_score', enhanced_ts_aware_f_score),
-    ('affiliation_based_f_score', affiliation_based_f_score),
-    ('nab_score', nab_score),
-    ('temporal_distance', temporal_distance),
-    ('average_detection_count', average_detection_count),
-    ('absolute_detection_distance',absolute_detection_distance),
-    ('total_detected_in_range',total_detected_in_range),
-    ('detection_accuracy_in_range',detection_accuracy_in_range),
-    ('weighted_detection_difference',weighted_detection_difference),
-    ('binary_pate', binary_pate),
-    ('mean_time_to_detect', mean_time_to_detect),
-]
-binary_metrics_params ={
-    'delay_th_point_adjusted_f_score': {'k': 10},
-    'point_adjusted_at_k_f_score': {'k': 0.7},
-    'latency_sparsity_aw_f_score': {'ni': 2},
-    'time_tolerant_f_score': {'t': 30},
-    'range_based_f_score': {'p_alpha': 0, 'r_alpha':0}, #Valor por defecto
-    'ts_aware_f_score': {'theta': 0.5, 'alpha':0.5, 'delta': 0, 'beta':1}, #Valor por defecto
-    'enhanced_ts_aware_f_score': {'beta':1,'theta_p': 0.5, 'theta_r':0.1}, #Valor por defecto
-    'total_detected_in_range': {'k': 30},
-    'detection_accuracy_in_range': {'k': 30},
-    'weighted_detection_difference':{'k': 3},
-    'binary_pate': {'early': 20, 'delay': 20}
-}
-continuous_metrics = [
-    ('precision_at_k', precision_at_k),
-    ('auc_roc_pw', auc_roc_pw),
-    ('auc_pr_pw', auc_pr_pw),
-    ('auc_pr_pa', auc_pr_pa),
-    ('auc_pr_sw', auc_pr_sw),
-    ('vus_roc', vus_roc),
-    ('vus_pr', vus_pr),
-    ('real_pate', real_pate)]
-continuous_metrics_params ={
-    'vus_roc': {'window': 4},
-    'vus_pr': {'window': 4},
-    'real_pate': {'early': 3, 'delay': 3},
-}
-SIZE = 1000
-# Cargar predicciones
-y_true, y_pred = np.random.choice([0, 1], size=SIZE), np.random.choice([0, 1], size=SIZE)
-Binary_mode = 1
-if Binary_mode == 0:
-    for metric_name, metric_func in binary_metrics:
-            # Calcular métrica y tiempo de ejecución
-            start_time = time.time()
-            if metric_name in binary_metrics_params:
-                params = binary_metrics_params[metric_name]
-                metric_value = metric_func(y_true, y_pred, **params)
-            else:
-                metric_value = metric_func(y_true, y_pred)
-            computation_time = time.time() - start_time
-            print(f"Métrica: {metric_name} - Valor: {metric_value:.4f} - Tiempo: {computation_time:.4f}s")
-else:
-    y_true = np.random.choice([0, 1], size=SIZE)
-    y_pred = np.random.rand(SIZE)  # Predicciones continuas
-            # Calcular métrica y tiempo de ejecución
-    for metric_name, metric_func in continuous_metrics:
-        start_time = time.time()
-        if metric_name in continuous_metrics_params:
-            params = continuous_metrics_params[metric_name]
-            metric_value = metric_func(y_true, y_pred, **params)
-        else:
-            metric_value = metric_func(y_true, y_pred)
-        computation_time = time.time() - start_time
-        print(f"Métrica: {metric_name} - Valor: {metric_value:.4f} - Tiempo: {computation_time:.4f}s")

tsadmetrics-0.1.5/experiments/scripts/metro_experiment.py DELETED Viewed

@@ -1,133 +0,0 @@
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import tsadmetrics as tm
-import time
-from sklearn.metrics import f1_score
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MinMaxScaler
-df_analog  = pd.read_csv('../preprocessed_data/MetroPT3_analogic.csv')
-df_analog = pd.DataFrame(df_analog).set_index('timestamp')
-# Separar las características (X) y la variable objetivo (y)
-X = df_analog.drop(columns='anomaly')  # Características
-y = df_analog['anomaly']  # Variable objetivo
-# Normalizar las características entre [0, 1]
-scaler = MinMaxScaler(feature_range=(0, 1))
-X_normalized = scaler.fit_transform(X)
-# Convertir el resultado normalizado de nuevo a un DataFrame
-X_normalized = pd.DataFrame(X_normalized, columns=X.columns, index=X.index)
-# Dividir el conjunto de datos normalizado en entrenamiento y prueba
-train_df_analog, test_df_analog = train_test_split(
-    X_normalized.join(y),  # Unir las características normalizadas con la variable objetivo
-    test_size=0.4,
-    random_state=42
-)
-X_train_analog = train_df_analog.drop(columns='anomaly')
-y_train_analog = train_df_analog['anomaly']
-X_test_analog = test_df_analog.drop(columns='anomaly')
-y_test_analog = test_df_analog['anomaly']
-#Modelos basados en distancia
-from pyod.models.lof import LOF
-from pyod.models.cblof import CBLOF
-from pyod.models.knn import KNN
-from pyod.models.abod import ABOD
-modelos_distancia = [
-    LOF(n_neighbors=35, contamination=np.sum(y_train_analog)/len(y_train_analog), n_jobs=-1),
-    #COF(contamination=np.sum(y_train_analog)/len(y_train_analog),method='memory'),
-    CBLOF(contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1),
-    KNN(n_neighbors=35, contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1),
-    ABOD(contamination=np.sum(y_train_analog)/len(y_train_analog))
-]
-#Modelos basados en árboles de aislamiento
-from pyod.models.iforest import IForest
-modelos_arboles = [
-    IForest(contamination=np.sum(y_train_analog)/len(y_train_analog),n_jobs=-1, random_state=42)
-]
-#Modelos basados en Reconstrucción
-from pyod.models.ae1svm import AE1SVM
-from pyod.models.alad import ALAD
-from pyod.models.auto_encoder import AutoEncoder
-modelos_reconstruccion = [
-    AE1SVM(contamination=np.sum(y_train_analog)/len(y_train_analog)),
-    ALAD(contamination=np.sum(y_train_analog)/len(y_train_analog)),
-    AutoEncoder(contamination=np.sum(y_train_analog)/len(y_train_analog))
-]
-#ejecucion de los modelos
-distancia_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
-for modelo in modelos_distancia:
-    nombre_modelo = modelo.__class__.__name__
-    inicio = time.time()
-    try:
-        modelo.fit(X_train_analog)
-        t = time.time() - inicio
-        y_pred = modelo.predict(X_test_analog)
-        f1 = f1_score(y_test_analog, y_pred)
-        sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
-        print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
-    except Exception as e:
-            print(f'Error en el modelo {nombre_modelo}: {e}')
-    # Añadir los resultados al DataFrame
-    distancia_results.loc[len(distancia_results)] = [nombre_modelo, f1, sw_f1, t]
-distancia_results.to_csv('../results/distancia_results.csv')
-arbol_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
-for modelo in modelos_arboles:
-    nombre_modelo = modelo.__class__.__name__
-    inicio = time.time()
-    try:
-        modelo.fit(X_train_analog)
-        t = time.time() - inicio
-        y_pred = modelo.predict(X_test_analog)
-        f1 = f1_score(y_test_analog, y_pred)
-        sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
-        print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
-    except Exception as e:
-            print(f'Error en el modelo {nombre_modelo}: {e}')
-    # Añadir los resultados al DataFrame
-    arbol_results.loc[len(arbol_results)] = [nombre_modelo, f1, sw_f1, t]
-arbol_results.to_csv('../results/arbol_results.csv')
-reconstruccion_results = pd.DataFrame(columns=['nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento'])
-for modelo in modelos_reconstruccion:
-    nombre_modelo = modelo.__class__.__name__
-    inicio = time.time()
-    try:
-        modelo.fit(X_train_analog[y_train_analog == 0])
-        t = time.time() - inicio
-        y_pred = modelo.predict(X_test_analog)
-        f1 = f1_score(y_test_analog, y_pred)
-        sw_f1 = tm.segment_wise_f_score(y_test_analog, y_pred)
-        print(f'Modelo: {nombre_modelo} - F1: {f1} - Segment-wise F1: {sw_f1} - Tiempo: {t}')
-    except Exception as e:
-            print(f'Error en el modelo {nombre_modelo}: {e}')
-    # Añadir los resultados al DataFrame
-    reconstruccion_results.loc[len(reconstruccion_results)] = [nombre_modelo, f1, sw_f1, t]
-reconstruccion_results.to_csv('../results/reconstruccion_results.csv')

tsadmetrics-0.1.5/experiments/scripts/opt_metro_experiment.py DELETED Viewed

@@ -1,343 +0,0 @@
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import tsadmetrics as tm
-import time
-from sklearn.metrics import f1_score, recall_score, precision_score
-import optuna
-from optuna.samplers import TPESampler
-from functools import partial
-import warnings
-warnings.filterwarnings('ignore')
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MinMaxScaler
-import os
-import json
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader, TensorDataset
-from sklearn.metrics import classification_report
-def simplify_dataset(
-    data: pd.DataFrame,
-    window_size: int = 2,
-    time_col: str = None,
-    anomaly_col: str = 'anomaly',
-    agg_func: str = 'mean'
-) -> pd.DataFrame:
-    """
-    Reduce un dataset aplicando agregación en ventanas temporales.
-    """
-    simplified_data = data.rolling(window_size, step=window_size).mean()
-    simplified_data = simplified_data.dropna()
-    simplified_data[anomaly_col] = (simplified_data[anomaly_col] > 0.1).astype(int)
-    return simplified_data.reset_index(drop=True)
-# Configuración inicial para PyTorch
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
-def guardar_prediccion(modelo_nombre, y_true, y_pred_binario, y_pred_continuo=None, timestamps=None):
-    """
-    Guarda las predicciones ordenadas por timestamp.
-    """
-    os.makedirs('../results/predictions', exist_ok=True)
-    resultados = pd.DataFrame({
-        'timestamp': timestamps if timestamps is not None else np.arange(len(y_true)),
-        'ground_truth': np.array(y_true).flatten(),
-        'prediction_binary': np.array(y_pred_binario).flatten()
-    })
-    if y_pred_continuo is not None:
-        resultados['prediction_continuous'] = np.array(y_pred_continuo).flatten()
-    if 'timestamp' in resultados.columns:
-        resultados = resultados.sort_values('timestamp')
-    nombre_archivo = f'../results/predictions/{modelo_nombre}_pred.csv'
-    resultados.to_csv(nombre_archivo, index=False)
-    return nombre_archivo
-# -------------------------------
-# Cargar y preparar datos
-df_analog = pd.read_csv('../preprocessed_data/MetroPT3_analogic.csv')
-df_analog = pd.DataFrame(df_analog).set_index('timestamp')
-df_analog = df_analog.sort_index()
-# Reducción de tamaño
-print('Tamaño inicial del dataset:', df_analog.shape)
-print(f'Proporción de anomalías: {df_analog["anomaly"].mean():.2f}')
-df_analog = simplify_dataset(df_analog, window_size=10, time_col='timestamp')
-print('Tamaño del dataset:', df_analog.shape)
-print(f'Proporción de anomalías: {df_analog["anomaly"].mean():.2f}')
-# Separar y normalizar datos
-X = df_analog.drop(columns='anomaly')
-y = df_analog['anomaly']
-scaler = MinMaxScaler(feature_range=(0, 1))
-X_normalized = scaler.fit_transform(X)
-X_normalized = pd.DataFrame(X_normalized, columns=X.columns, index=X.index)
-# -------------------------------
-# DIVISIÓN DE DATOS
-# 1. Para modelos no-LSTM (shuffle=True)
-train_df_shuf, test_df_shuf = train_test_split(
-    X_normalized.join(y),
-    test_size=0.4,
-    random_state=42,
-    shuffle=True
-)
-# 2. Para LSTM (shuffle=False para mantener orden temporal)
-train_df_noshuf, test_df_noshuf = train_test_split(
-    X_normalized.join(y),
-    test_size=0.4,
-    random_state=42,
-    shuffle=False
-)
-# Preparar datos para modelos no-LSTM
-X_train_shuf = train_df_shuf.drop(columns='anomaly')
-y_train_shuf = train_df_shuf['anomaly']
-X_test_shuf = test_df_shuf.drop(columns='anomaly')
-y_test_shuf = test_df_shuf['anomaly']
-# Preparar datos para LSTM
-X_train_noshuf = train_df_noshuf.drop(columns='anomaly')
-y_train_noshuf = train_df_noshuf['anomaly']
-X_test_noshuf = test_df_noshuf.drop(columns='anomaly')
-y_test_noshuf = test_df_noshuf['anomaly']
-contamination = np.sum(y_train_shuf)/len(y_train_shuf)
-# -------------------------------
-# Definición del modelo LSTM
-class AnomalyLSTM(nn.Module):
-    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
-        super(AnomalyLSTM, self).__init__()
-        self.lstm = nn.LSTM(
-            input_size=input_size,
-            hidden_size=hidden_size,
-            num_layers=num_layers,
-            batch_first=True,
-            dropout=dropout if num_layers > 1 else 0
-        )
-        self.fc = nn.Linear(hidden_size, 1)
-        self.sigmoid = nn.Sigmoid()
-    def forward(self, x):
-        lstm_out, _ = self.lstm(x)
-        last_time_step = lstm_out[:, -1, :]
-        output = self.fc(last_time_step)
-        return self.sigmoid(output)
-def train_lstm(X_train, y_train, X_test, y_test, timestamps_test, sequence_length=10, epochs=20, batch_size=16):
-    # Preparar datos secuenciales
-    def create_sequences(data, targets, seq_length):
-        xs, ys = [], []
-        for i in range(len(data)-seq_length):
-            xs.append(data[i:(i+seq_length)])
-            ys.append(targets[i+seq_length])
-        return np.array(xs), np.array(ys)
-    X_train_seq, y_train_seq = create_sequences(X_train.values, y_train.values, sequence_length)
-    X_test_seq, y_test_seq = create_sequences(X_test.values, y_test.values, sequence_length)
-    # Convertir a tensores PyTorch
-    train_data = TensorDataset(
-        torch.FloatTensor(X_train_seq),
-        torch.FloatTensor(y_train_seq).unsqueeze(1)
-    )
-    test_data = TensorDataset(
-        torch.FloatTensor(X_test_seq),
-        torch.FloatTensor(y_test_seq).unsqueeze(1)
-    )
-    # IMPORTANTE: shuffle=False para DataLoader
-    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
-    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
-    # Inicializar modelo
-    model = AnomalyLSTM(input_size=X_train.shape[1]).to(device)
-    criterion = nn.BCELoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
-    # Entrenamiento
-    train_start = time.time()
-    for epoch in range(epochs):
-        model.train()
-        for batch_x, batch_y in train_loader:
-            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
-            optimizer.zero_grad()
-            outputs = model(batch_x)
-            loss = criterion(outputs, batch_y)
-            loss.backward()
-            optimizer.step()
-    # Evaluación
-    model.eval()
-    test_preds, test_true, test_scores = [], [], []
-    with torch.no_grad():
-        for batch_x, batch_y in test_loader:
-            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
-            outputs = model(batch_x)
-            predicted = (outputs > 0.5).float()
-            test_preds.extend(predicted.cpu().numpy())
-            test_scores.extend(outputs.cpu().numpy())
-            test_true.extend(batch_y.cpu().numpy())
-    train_time = time.time() - train_start
-    # Ajustar predicciones al tamaño original y ordenar por timestamp
-    full_preds = np.concatenate([np.zeros(sequence_length), np.array(test_preds).flatten()])
-    full_scores = np.concatenate([np.zeros(sequence_length), np.array(test_scores).flatten()])
-    full_preds = full_preds[:len(y_test)]
-    full_scores = full_scores[:len(y_test)]
-    # Crear DataFrame con timestamps para ordenar
-    pred_df = pd.DataFrame({
-        'timestamp': timestamps_test[-len(full_preds):],
-        'y_true': y_test[-len(full_preds):],
-        'y_pred': full_preds,
-        'y_scores': full_scores
-    }).sort_values('timestamp')
-    # Calcular métricas ordenadas
-    f1 = f1_score(pred_df['y_true'], pred_df['y_pred'])
-    sw_f1 = tm.segment_wise_f_score(pred_df['y_true'], pred_df['y_pred'])
-    guardar_prediccion("LSTM", pred_df['y_true'], pred_df['y_pred'], pred_df['y_scores'], pred_df['timestamp'])
-    return model, f1, sw_f1, train_time, pred_df['y_pred']
-# [Resto de las funciones (objective, optimize_model, evaluate_models) permanecen iguales...]
-# -------------------------------
-# Modelos
-from pyod.models.lof import LOF
-from pyod.models.cblof import CBLOF
-from pyod.models.knn import KNN
-from pyod.models.iforest import IForest
-from pyod.models.ae1svm import AE1SVM
-from pyod.models.auto_encoder import AutoEncoder
-modelos_distancia = [LOF, CBLOF, KNN]
-modelos_arboles = [IForest]
-modelos_machine_learning = []
-modelos_reconstruccion = [AE1SVM, AutoEncoder]
-# -------------------------------
-# Función evaluate_models modificada
-# -------------------------------
-# Función evaluate_models modificada para ordenar todas las predicciones
-def evaluate_models(model_classes, best_params_dict, results_filename, include_lstm=False):
-    results_df = pd.DataFrame(columns=[
-        'nombre_modelo', 'f1_score', 'segment_wise_f_score', 'tiempo_entrenamiento', 'best_params'
-    ])
-    # Evaluar modelos no-LSTM (usando datos con shuffle)
-    for model_class in model_classes:
-        nombre_modelo = model_class.__name__
-        params = best_params_dict.get(nombre_modelo, {})
-        params['contamination'] = contamination
-        if model_class.__name__ in ['LOF', 'CBLOF', 'KNN', 'IForest']:
-            params['n_jobs'] = -1
-        inicio = time.time()
-        try:
-            model = model_class(**params)
-            if nombre_modelo in ['AutoEncoder', 'AE1SVM']:
-                model.fit(X_train_shuf[y_train_shuf == 0])
-            else:
-                model.fit(X_train_shuf)
-            t = time.time() - inicio
-            y_pred = model.predict(X_test_shuf)
-            y_scores = model.decision_function(X_test_shuf) if hasattr(model, 'decision_function') else None
-            # Crear DataFrame temporal con timestamps para ordenar
-            pred_df = pd.DataFrame({
-                'timestamp': X_test_shuf.index,
-                'y_true': y_test_shuf,
-                'y_pred': y_pred,
-                'y_scores': y_scores if y_scores is not None else np.nan
-            }).sort_values('timestamp')
-            # Calcular métricas sobre datos ordenados
-            f1 = f1_score(pred_df['y_true'], pred_df['y_pred'])
-            sw_f1 = tm.segment_wise_f_score(pred_df['y_true'], pred_df['y_pred'])
-            # Guardar predicciones ordenadas
-            guardar_prediccion(
-                nombre_modelo,
-                pred_df['y_true'],
-                pred_df['y_pred'],
-                pred_df['y_scores'] if 'y_scores' in pred_df.columns else None,
-                pred_df['timestamp']
-            )
-            print(f'Modelo: {nombre_modelo} - F1: {f1:.4f} - Segment-wise F1: {sw_f1:.4f} - Tiempo: {t:.2f}s')
-            results_df.loc[len(results_df)] = [
-                nombre_modelo, f1, sw_f1, t, json.dumps(params, ensure_ascii=False)
-            ]
-        except Exception as e:
-            print(f'Error en el modelo {nombre_modelo}: {e}')
-    # Evaluar LSTM (usando datos sin shuffle)
-    if include_lstm:
-        inicio_lstm = time.time()
-        print("\nEntrenando modelo LSTM (sin shuffle)...")
-        lstm_model, lstm_f1, lstm_sw_f1, lstm_time, lstm_preds = train_lstm(
-            X_train_noshuf, y_train_noshuf,
-            X_test_noshuf, y_test_noshuf,
-            timestamps_test=X_test_noshuf.index
-        )
-        print(f'Modelo: LSTM - F1: {lstm_f1:.4f} - Segment-wise F1: {lstm_sw_f1:.4f} - Tiempo: {lstm_time:.2f}s')
-        results_df.loc[len(results_df)] = [
-            "LSTM", lstm_f1, lstm_sw_f1, lstm_time,
-            json.dumps({
-                "sequence_length": 10,
-                "epochs": 20,
-                "batch_size": 16,
-                "hidden_size": 64,
-                "num_layers": 2,
-                "dropout": 0.2
-            }, ensure_ascii=False)
-        ]
-    # Guardar resultados
-    os.makedirs('../results', exist_ok=True)
-    results_df.to_csv(f'../results/{results_filename}', index=False)
-    print(f'Resultados guardados en {results_filename}')
-    return results_df
-# -------------------------------
-# Parámetros y ejecución
-best_params = {
-    'LOF': {"n_neighbors": 62, "metric":"minkowski", "contamination":contamination, "n_jobs":-1},
-    'CBLOF': {"n_clusters": 8, "alpha": 0.87571, "beta": 6, "contamination":contamination, "n_jobs":-1},
-    'KNN': {"n_neighbors": 5, "method":"mean", "contamination":contamination, "n_jobs":-1},
-    'IForest': {'n_jobs':-1, "contamination":contamination},
-    'AutoEncoder': {},
-    'AE1SVM': {}
-}
-print("\nEvaluando modelos basados en distancia...")
-distancia_results = evaluate_models(modelos_distancia, best_params, 'distancia_results.csv')
-print("\nEvaluando modelos basados en árboles...")
-arbol_results = evaluate_models(modelos_arboles, best_params, 'arbol_results.csv')
-print("\nEvaluando modelos de reconstrucción...")
-reconstruccion_results = evaluate_models(modelos_reconstruccion, best_params, 'reconstruccion_results.csv')
-print("\nEvaluando LSTM...")
-ml_results = evaluate_models(modelos_machine_learning, best_params, 'ml_results.csv', include_lstm=True)