tsadmetrics 0.1.17__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docs_api → docs/add_docs/api_doc}/conf.py +3 -26
- {docs_manual → docs/add_docs/full_doc}/conf.py +2 -25
- docs/add_docs/manual_doc/conf.py +67 -0
- docs/conf.py +1 -1
- examples/example_direct_data.py +28 -0
- examples/example_direct_single_data.py +25 -0
- examples/example_file_reference.py +24 -0
- examples/example_global_config_file.py +13 -0
- examples/example_metric_config_file.py +19 -0
- examples/example_simple_metric.py +8 -0
- examples/specific_examples/AbsoluteDetectionDistance_example.py +24 -0
- examples/specific_examples/AffiliationbasedFScore_example.py +24 -0
- examples/specific_examples/AverageDetectionCount_example.py +24 -0
- examples/specific_examples/CompositeFScore_example.py +24 -0
- examples/specific_examples/DelayThresholdedPointadjustedFScore_example.py +24 -0
- examples/specific_examples/DetectionAccuracyInRange_example.py +24 -0
- examples/specific_examples/EnhancedTimeseriesAwareFScore_example.py +24 -0
- examples/specific_examples/LatencySparsityawareFScore_example.py +24 -0
- examples/specific_examples/MeanTimeToDetect_example.py +24 -0
- examples/specific_examples/NabScore_example.py +24 -0
- examples/specific_examples/PateFScore_example.py +24 -0
- examples/specific_examples/Pate_example.py +24 -0
- examples/specific_examples/PointadjustedAtKFScore_example.py +24 -0
- examples/specific_examples/PointadjustedAucPr_example.py +24 -0
- examples/specific_examples/PointadjustedAucRoc_example.py +24 -0
- examples/specific_examples/PointadjustedFScore_example.py +24 -0
- examples/specific_examples/RangebasedFScore_example.py +24 -0
- examples/specific_examples/SegmentwiseFScore_example.py +24 -0
- examples/specific_examples/TemporalDistance_example.py +24 -0
- examples/specific_examples/TimeTolerantFScore_example.py +24 -0
- examples/specific_examples/TimeseriesAwareFScore_example.py +24 -0
- examples/specific_examples/TotalDetectedInRange_example.py +24 -0
- examples/specific_examples/VusPr_example.py +24 -0
- examples/specific_examples/VusRoc_example.py +24 -0
- examples/specific_examples/WeightedDetectionDifference_example.py +24 -0
- tsadmetrics/__init__.py +0 -21
- tsadmetrics/base/Metric.py +188 -0
- tsadmetrics/evaluation/Report.py +25 -0
- tsadmetrics/evaluation/Runner.py +253 -0
- tsadmetrics/metrics/Registry.py +141 -0
- tsadmetrics/metrics/__init__.py +2 -0
- tsadmetrics/metrics/spm/PointwiseAucPr.py +62 -0
- tsadmetrics/metrics/spm/PointwiseAucRoc.py +63 -0
- tsadmetrics/metrics/spm/PointwiseFScore.py +86 -0
- tsadmetrics/metrics/spm/PrecisionAtK.py +81 -0
- tsadmetrics/metrics/spm/__init__.py +9 -0
- tsadmetrics/metrics/tem/dpm/DelayThresholdedPointadjustedFScore.py +83 -0
- tsadmetrics/metrics/tem/dpm/LatencySparsityawareFScore.py +76 -0
- tsadmetrics/metrics/tem/dpm/MeanTimeToDetect.py +47 -0
- tsadmetrics/metrics/tem/dpm/NabScore.py +60 -0
- tsadmetrics/metrics/tem/dpm/__init__.py +11 -0
- tsadmetrics/metrics/tem/ptdm/AverageDetectionCount.py +53 -0
- tsadmetrics/metrics/tem/ptdm/DetectionAccuracyInRange.py +66 -0
- tsadmetrics/metrics/tem/ptdm/PointadjustedAtKFScore.py +80 -0
- tsadmetrics/metrics/tem/ptdm/TimeseriesAwareFScore.py +248 -0
- tsadmetrics/metrics/tem/ptdm/TotalDetectedInRange.py +65 -0
- tsadmetrics/metrics/tem/ptdm/WeightedDetectionDifference.py +97 -0
- tsadmetrics/metrics/tem/ptdm/__init__.py +12 -0
- tsadmetrics/metrics/tem/tmem/AbsoluteDetectionDistance.py +48 -0
- tsadmetrics/metrics/tem/tmem/EnhancedTimeseriesAwareFScore.py +252 -0
- tsadmetrics/metrics/tem/tmem/TemporalDistance.py +68 -0
- tsadmetrics/metrics/tem/tmem/__init__.py +9 -0
- tsadmetrics/metrics/tem/tpdm/CompositeFScore.py +104 -0
- tsadmetrics/metrics/tem/tpdm/PointadjustedAucPr.py +123 -0
- tsadmetrics/metrics/tem/tpdm/PointadjustedAucRoc.py +119 -0
- tsadmetrics/metrics/tem/tpdm/PointadjustedFScore.py +96 -0
- tsadmetrics/metrics/tem/tpdm/RangebasedFScore.py +236 -0
- tsadmetrics/metrics/tem/tpdm/SegmentwiseFScore.py +73 -0
- tsadmetrics/metrics/tem/tpdm/__init__.py +12 -0
- tsadmetrics/metrics/tem/tstm/AffiliationbasedFScore.py +68 -0
- tsadmetrics/metrics/tem/tstm/Pate.py +62 -0
- tsadmetrics/metrics/tem/tstm/PateFScore.py +61 -0
- tsadmetrics/metrics/tem/tstm/TimeTolerantFScore.py +85 -0
- tsadmetrics/metrics/tem/tstm/VusPr.py +51 -0
- tsadmetrics/metrics/tem/tstm/VusRoc.py +55 -0
- tsadmetrics/metrics/tem/tstm/__init__.py +15 -0
- tsadmetrics/{_tsadeval/affiliation/_integral_interval.py → utils/functions_affiliation.py} +377 -9
- tsadmetrics/utils/functions_auc.py +393 -0
- tsadmetrics/utils/functions_conversion.py +63 -0
- tsadmetrics/utils/functions_counting_metrics.py +26 -0
- tsadmetrics/{_tsadeval/latency_sparsity_aware.py → utils/functions_latency_sparsity_aware.py} +1 -1
- tsadmetrics/{_tsadeval/nabscore.py → utils/functions_nabscore.py} +15 -1
- tsadmetrics-1.0.1.dist-info/METADATA +83 -0
- tsadmetrics-1.0.1.dist-info/RECORD +91 -0
- tsadmetrics-1.0.1.dist-info/top_level.txt +3 -0
- entorno/bin/activate_this.py +0 -32
- entorno/bin/rst2html.py +0 -23
- entorno/bin/rst2html4.py +0 -26
- entorno/bin/rst2html5.py +0 -33
- entorno/bin/rst2latex.py +0 -26
- entorno/bin/rst2man.py +0 -27
- entorno/bin/rst2odt.py +0 -28
- entorno/bin/rst2odt_prepstyles.py +0 -20
- entorno/bin/rst2pseudoxml.py +0 -23
- entorno/bin/rst2s5.py +0 -24
- entorno/bin/rst2xetex.py +0 -27
- entorno/bin/rst2xml.py +0 -23
- entorno/bin/rstpep2html.py +0 -25
- tests/test_binary.py +0 -946
- tests/test_non_binary.py +0 -450
- tests/test_utils.py +0 -49
- tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +0 -86
- tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +0 -68
- tsadmetrics/_tsadeval/affiliation/generics.py +0 -135
- tsadmetrics/_tsadeval/affiliation/metrics.py +0 -114
- tsadmetrics/_tsadeval/auc_roc_pr_plot.py +0 -295
- tsadmetrics/_tsadeval/discontinuity_graph.py +0 -109
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +0 -175
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +0 -50
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +0 -184
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/__init__.py +0 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +0 -386
- tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +0 -362
- tsadmetrics/_tsadeval/metrics.py +0 -698
- tsadmetrics/_tsadeval/prts/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +0 -165
- tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +0 -121
- tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +0 -61
- tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +0 -86
- tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +0 -21
- tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +0 -85
- tsadmetrics/_tsadeval/tests.py +0 -376
- tsadmetrics/_tsadeval/threshold_plt.py +0 -30
- tsadmetrics/_tsadeval/time_tolerant.py +0 -33
- tsadmetrics/binary_metrics.py +0 -1652
- tsadmetrics/metric_utils.py +0 -98
- tsadmetrics/non_binary_metrics.py +0 -372
- tsadmetrics/scripts/__init__.py +0 -0
- tsadmetrics/scripts/compute_metrics.py +0 -42
- tsadmetrics/utils.py +0 -124
- tsadmetrics/validation.py +0 -35
- tsadmetrics-0.1.17.dist-info/METADATA +0 -54
- tsadmetrics-0.1.17.dist-info/RECORD +0 -66
- tsadmetrics-0.1.17.dist-info/entry_points.txt +0 -2
- tsadmetrics-0.1.17.dist-info/top_level.txt +0 -6
- {tests → tsadmetrics/base}/__init__.py +0 -0
- /tsadmetrics/{_tsadeval → evaluation}/__init__.py +0 -0
- /tsadmetrics/{_tsadeval/affiliation → metrics/tem}/__init__.py +0 -0
- /tsadmetrics/{_tsadeval/vus_utils.py → utils/functions_vus.py} +0 -0
- {tsadmetrics-0.1.17.dist-info → tsadmetrics-1.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
from ....base.Metric import Metric
|
2
|
+
import numpy as np
|
3
|
+
|
4
|
+
class TimeTolerantFScore(Metric):
|
5
|
+
"""
|
6
|
+
Calculate time tolerant F-score for anomaly detection in time series.
|
7
|
+
This metric is based on the standard F-score, but applies a temporal adjustment
|
8
|
+
to the predictions before computing it. Specifically, a predicted anomalous point is considered
|
9
|
+
a true positive if it lies within a temporal window of size :math:`{\\tau}` around any ground-truth anomalous point.
|
10
|
+
This allows for small temporal deviations in the predictions to be tolerated. The adjusted predictions are then used
|
11
|
+
to _compute the standard point-wise F-Score.
|
12
|
+
|
13
|
+
Implementation of https://link.springer.com/article/10.1007/s10618-023-00988-8
|
14
|
+
|
15
|
+
For more information, see the original paper:
|
16
|
+
https://arxiv.org/abs/2008.05788
|
17
|
+
|
18
|
+
Parameters:
|
19
|
+
t (int):
|
20
|
+
The time tolerance parameter.
|
21
|
+
beta (float):
|
22
|
+
The beta value, which determines the weight of precision in the combined score.
|
23
|
+
Default is 1, which gives equal weight to precision and recall.
|
24
|
+
"""
|
25
|
+
name = "ttf"
|
26
|
+
binary_prediction = True
|
27
|
+
param_schema = {
|
28
|
+
"t": {
|
29
|
+
"default": 5,
|
30
|
+
"type": int
|
31
|
+
},
|
32
|
+
"beta": {
|
33
|
+
"default": 1.0,
|
34
|
+
"type": float
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
def __init__(self, **kwargs):
|
39
|
+
super().__init__(name="ttf", **kwargs)
|
40
|
+
|
41
|
+
def _compute(self, y_true, y_pred):
|
42
|
+
"""
|
43
|
+
Calculate the time tolerant F-score (optimized version).
|
44
|
+
"""
|
45
|
+
t = self.params['t']
|
46
|
+
beta = self.params['beta']
|
47
|
+
|
48
|
+
# Precompute masks for efficiency
|
49
|
+
true_anomalies = y_true == 1
|
50
|
+
predictions = y_pred == 1
|
51
|
+
|
52
|
+
# Create P′1 for recall: for each true anomaly, check if any prediction within ±t
|
53
|
+
p_prime1 = np.zeros_like(y_true, dtype=bool)
|
54
|
+
|
55
|
+
for i in np.where(true_anomalies)[0]:
|
56
|
+
start = max(0, i - t)
|
57
|
+
end = min(len(y_pred), i + t + 1)
|
58
|
+
if np.any(predictions[start:end]):
|
59
|
+
p_prime1[i] = True
|
60
|
+
|
61
|
+
# Create P′2 for precision: for each prediction, check if any true anomaly within ±t
|
62
|
+
p_prime2 = np.zeros_like(y_pred, dtype=bool)
|
63
|
+
|
64
|
+
for j in np.where(predictions)[0]:
|
65
|
+
start = max(0, j - t)
|
66
|
+
end = min(len(y_true), j + t + 1)
|
67
|
+
if np.any(true_anomalies[start:end]):
|
68
|
+
p_prime2[j] = True
|
69
|
+
|
70
|
+
# Calculate recall using P′1
|
71
|
+
tp_recall = np.sum(true_anomalies & p_prime1)
|
72
|
+
fn_recall = np.sum(true_anomalies & ~p_prime1)
|
73
|
+
recall = tp_recall / (tp_recall + fn_recall) if (tp_recall + fn_recall) > 0 else 0.0
|
74
|
+
|
75
|
+
# Calculate precision using P′2
|
76
|
+
tp_precision = np.sum(predictions & p_prime2)
|
77
|
+
fp_precision = np.sum(predictions & ~p_prime2)
|
78
|
+
precision = tp_precision / (tp_precision + fp_precision) if (tp_precision + fp_precision) > 0 else 0.0
|
79
|
+
|
80
|
+
# Calculate F-score
|
81
|
+
if precision == 0 and recall == 0:
|
82
|
+
return 0.0
|
83
|
+
|
84
|
+
f_score = ((1 + beta**2) * precision * recall) / (beta**2 * precision + recall)
|
85
|
+
return f_score
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from ....base.Metric import Metric
|
2
|
+
from ....utils.functions_vus import generate_curve
|
3
|
+
import numpy as np
|
4
|
+
class VusPr(Metric):
|
5
|
+
"""
|
6
|
+
Calculate the VUS-PR (Volume Under the PR Surface) score for anomaly detection in time series.
|
7
|
+
|
8
|
+
This metric is an extension of the classical AUC-PR, incorporating a temporal tolerance parameter `window`
|
9
|
+
that smooths the binary ground-truth labels. It allows for some flexibility in the detection of
|
10
|
+
anomalies that are temporally close to the true events. The final metric integrates the PR-AUC
|
11
|
+
over several levels of temporal tolerance (from 0 to `window`), yielding a volume under the PR surface.
|
12
|
+
|
13
|
+
Implementation of https://link.springer.com/article/10.1007/s10618-023-00988-8
|
14
|
+
|
15
|
+
For more information, see the original paper:
|
16
|
+
https://dl.acm.org/doi/10.14778/3551793.3551830
|
17
|
+
|
18
|
+
Parameters:
|
19
|
+
window (int):
|
20
|
+
Maximum temporal tolerance used to smooth the evaluation.
|
21
|
+
Default is 4.
|
22
|
+
"""
|
23
|
+
name = "vus_pr"
|
24
|
+
binary_prediction = False
|
25
|
+
param_schema = {
|
26
|
+
"window": {
|
27
|
+
"default": 4,
|
28
|
+
"type": int
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
def __init__(self, **kwargs):
|
33
|
+
super().__init__(name="vus_pr", **kwargs)
|
34
|
+
|
35
|
+
def _compute(self, y_true, y_anomaly_scores):
|
36
|
+
"""
|
37
|
+
Calculate the VUS-PR score.
|
38
|
+
|
39
|
+
Parameters:
|
40
|
+
y_true (np.array):
|
41
|
+
Ground-truth binary labels (0 = normal, 1 = anomaly).
|
42
|
+
y_anomaly_scores (np.array):
|
43
|
+
Anomaly scores for each time point.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
float: VUS-PR score.
|
47
|
+
"""
|
48
|
+
window = self.params["window"]
|
49
|
+
_, _, _, _, _, _, _, pr = generate_curve(y_true, y_anomaly_scores, slidingWindow=window)
|
50
|
+
|
51
|
+
return pr
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from ....base.Metric import Metric
|
2
|
+
from ....utils.functions_vus import generate_curve
|
3
|
+
import numpy as np
|
4
|
+
|
5
|
+
class VusRoc(Metric):
|
6
|
+
"""
|
7
|
+
Calculate the VUS-ROC (Volume Under the ROC Surface) score for anomaly detection in time series.
|
8
|
+
|
9
|
+
This metric extends the classical AUC-ROC by introducing a temporal tolerance parameter `l`, which
|
10
|
+
smooths the binary ground-truth labels. The idea is to allow a flexible evaluation that tolerates
|
11
|
+
small misalignments in the detection of anomalies. The final score is computed by integrating
|
12
|
+
the ROC-AUC over different values of the tolerance parameter, from 0 to `window`, thus producing
|
13
|
+
a volume under the ROC surface.
|
14
|
+
|
15
|
+
Implementation of https://link.springer.com/article/10.1007/s10618-023-00988-8
|
16
|
+
|
17
|
+
For more information, see the original paper:
|
18
|
+
https://dl.acm.org/doi/10.14778/3551793.3551830
|
19
|
+
|
20
|
+
Parameters:
|
21
|
+
window (int):
|
22
|
+
Maximum temporal tolerance `l` used to smooth the evaluation.
|
23
|
+
Default is 4.
|
24
|
+
"""
|
25
|
+
name = "vus_roc"
|
26
|
+
binary_prediction = False
|
27
|
+
param_schema = {
|
28
|
+
"window": {
|
29
|
+
"default": 4,
|
30
|
+
"type": int
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
def __init__(self, **kwargs):
|
35
|
+
super().__init__(name="vus_roc", **kwargs)
|
36
|
+
|
37
|
+
def _compute(self, y_true, y_anomaly_scores):
|
38
|
+
"""
|
39
|
+
Calculate the VUS-ROC score.
|
40
|
+
|
41
|
+
Parameters:
|
42
|
+
y_true (np.array):
|
43
|
+
Ground-truth binary labels (0 = normal, 1 = anomaly).
|
44
|
+
y_anomaly_scores (np.array):
|
45
|
+
Anomaly scores for each time point.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
float: VUS-ROC score.
|
49
|
+
"""
|
50
|
+
|
51
|
+
_, _, _, _, _, _, roc, _ = generate_curve(
|
52
|
+
y_true, y_anomaly_scores, self.params["window"]
|
53
|
+
)
|
54
|
+
|
55
|
+
return roc
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from .AffiliationbasedFScore import AffiliationbasedFScore
|
2
|
+
from .TimeTolerantFScore import TimeTolerantFScore
|
3
|
+
from .VusPr import VusPr
|
4
|
+
from .VusRoc import VusRoc
|
5
|
+
from .PateFScore import PateFScore
|
6
|
+
from .Pate import Pate
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"AffiliationbasedFScore",
|
10
|
+
"TimeTolerantFScore",
|
11
|
+
"VusPr",
|
12
|
+
"VusRoc",
|
13
|
+
"PateFScore",
|
14
|
+
"Pate"
|
15
|
+
]
|
@@ -1,15 +1,244 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
import math
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
"""
|
4
|
+
import glob
|
5
|
+
import os
|
6
|
+
import gzip
|
7
|
+
from itertools import groupby
|
8
|
+
from operator import itemgetter
|
9
|
+
|
10
|
+
#Generics
|
11
|
+
def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]):
|
12
|
+
"""
|
13
|
+
Convert a binary vector (indicating 1 for the anomalous instances)
|
14
|
+
to a list of events. The events are considered as durations,
|
15
|
+
i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
|
16
|
+
|
17
|
+
:param vector: a list of elements belonging to {0, 1}
|
18
|
+
:return: a list of couples, each couple representing the start and stop of
|
19
|
+
each event
|
20
|
+
"""
|
21
|
+
positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
|
22
|
+
events = []
|
23
|
+
for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]):
|
24
|
+
cur_cut = list(map(itemgetter(1), g))
|
25
|
+
events.append((cur_cut[0], cur_cut[-1]))
|
26
|
+
|
27
|
+
# Consistent conversion in case of range anomalies (for indexes):
|
28
|
+
# A positive index i is considered as the interval [i, i+1),
|
29
|
+
# so the last index should be moved by 1
|
30
|
+
events = [(x, y+1) for (x,y) in events]
|
31
|
+
|
32
|
+
return(events)
|
33
|
+
|
34
|
+
|
35
|
+
def infer_Trange(events_pred, events_gt):
|
36
|
+
"""
|
37
|
+
Given the list of events events_pred and events_gt, get the
|
38
|
+
smallest possible Trange corresponding to the start and stop indexes
|
39
|
+
of the whole series.
|
40
|
+
Trange will not influence the measure of distances, but will impact the
|
41
|
+
measures of probabilities.
|
42
|
+
|
43
|
+
:param events_pred: a list of couples corresponding to predicted events
|
44
|
+
:param events_gt: a list of couples corresponding to ground truth events
|
45
|
+
:return: a couple corresponding to the smallest range containing the events
|
46
|
+
"""
|
47
|
+
if len(events_gt) == 0:
|
48
|
+
raise ValueError('The gt events should contain at least one event')
|
49
|
+
if len(events_pred) == 0:
|
50
|
+
# empty prediction, base Trange only on events_gt (which is non empty)
|
51
|
+
return(infer_Trange(events_gt, events_gt))
|
52
|
+
|
53
|
+
min_pred = min([x[0] for x in events_pred])
|
54
|
+
min_gt = min([x[0] for x in events_gt])
|
55
|
+
max_pred = max([x[1] for x in events_pred])
|
56
|
+
max_gt = max([x[1] for x in events_gt])
|
57
|
+
Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
|
58
|
+
return(Trange)
|
59
|
+
|
60
|
+
def has_point_anomalies(events):
|
61
|
+
"""
|
62
|
+
Checking whether events contain point anomalies, i.e.
|
63
|
+
events starting and stopping at the same time.
|
64
|
+
|
65
|
+
:param events: a list of couples corresponding to predicted events
|
66
|
+
:return: True is the events have any point anomalies, False otherwise
|
67
|
+
"""
|
68
|
+
if len(events) == 0:
|
69
|
+
return(False)
|
70
|
+
return(min([x[1] - x[0] for x in events]) == 0)
|
71
|
+
|
72
|
+
def _sum_wo_nan(vec):
|
73
|
+
"""
|
74
|
+
Sum of elements, ignoring math.isnan ones
|
75
|
+
|
76
|
+
:param vec: vector of floating numbers
|
77
|
+
:return: sum of the elements, ignoring math.isnan ones
|
78
|
+
"""
|
79
|
+
vec_wo_nan = [e for e in vec if not math.isnan(e)]
|
80
|
+
return(sum(vec_wo_nan))
|
81
|
+
|
82
|
+
def _len_wo_nan(vec):
|
83
|
+
"""
|
84
|
+
Count of elements, ignoring math.isnan ones
|
85
|
+
|
86
|
+
:param vec: vector of floating numbers
|
87
|
+
:return: count of the elements, ignoring math.isnan ones
|
88
|
+
"""
|
89
|
+
vec_wo_nan = [e for e in vec if not math.isnan(e)]
|
90
|
+
return(len(vec_wo_nan))
|
91
|
+
|
92
|
+
def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'):
|
93
|
+
"""
|
94
|
+
Load a file compressed with gz, such that each line of the
|
95
|
+
file is either 0 (representing a normal instance) or 1 (representing)
|
96
|
+
an anomalous instance.
|
97
|
+
:param filename: file path to the gz compressed file
|
98
|
+
:return: list of integers with either 0 or 1
|
99
|
+
"""
|
100
|
+
with gzip.open(filename, 'rb') as f:
|
101
|
+
content = f.read().splitlines()
|
102
|
+
content = [int(x) for x in content]
|
103
|
+
return(content)
|
104
|
+
|
105
|
+
def read_all_as_events():
|
106
|
+
"""
|
107
|
+
Load the files contained in the folder `data/` and convert
|
108
|
+
to events. The length of the series is kept.
|
109
|
+
The convention for the file name is: `dataset_algorithm.gz`
|
110
|
+
:return: two dictionaries:
|
111
|
+
- the first containing the list of events for each dataset and algorithm,
|
112
|
+
- the second containing the range of the series for each dataset
|
113
|
+
"""
|
114
|
+
filepaths = glob.glob('data/*.gz')
|
115
|
+
datasets = dict()
|
116
|
+
Tranges = dict()
|
117
|
+
for filepath in filepaths:
|
118
|
+
vector = read_gz_data(filepath)
|
119
|
+
events = convert_vector_to_events(vector)
|
120
|
+
# ad hoc cut for those files
|
121
|
+
cut_filepath = (os.path.split(filepath)[1]).split('_')
|
122
|
+
data_name = cut_filepath[0]
|
123
|
+
algo_name = (cut_filepath[1]).split('.')[0]
|
124
|
+
if not data_name in datasets:
|
125
|
+
datasets[data_name] = dict()
|
126
|
+
Tranges[data_name] = (0, len(vector))
|
127
|
+
datasets[data_name][algo_name] = events
|
128
|
+
return(datasets, Tranges)
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
#Affiliation zone
|
133
|
+
def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
|
134
|
+
"""
|
135
|
+
Helper for `E_gt_func`
|
136
|
+
|
137
|
+
:param j: index from 0 to len(Js) (included) on which to get the start
|
138
|
+
:param Js: ground truth events, as a list of couples
|
139
|
+
:param Trange: range of the series where Js is included
|
140
|
+
:return: generalized start such that the middle of t_start and t_stop
|
141
|
+
always gives the affiliation zone
|
142
|
+
"""
|
143
|
+
b = max(Trange)
|
144
|
+
n = len(Js)
|
145
|
+
if j == n:
|
146
|
+
return(2*b - t_stop(n-1, Js, Trange))
|
147
|
+
else:
|
148
|
+
return(Js[j][0])
|
149
|
+
|
150
|
+
|
151
|
+
def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
|
152
|
+
"""
|
153
|
+
Helper for `E_gt_func`
|
154
|
+
|
155
|
+
:param j: index from 0 to len(Js) (included) on which to get the stop
|
156
|
+
:param Js: ground truth events, as a list of couples
|
157
|
+
:param Trange: range of the series where Js is included
|
158
|
+
:return: generalized stop such that the middle of t_start and t_stop
|
159
|
+
always gives the affiliation zone
|
160
|
+
"""
|
161
|
+
if j == -1:
|
162
|
+
a = min(Trange)
|
163
|
+
return(2*a - t_start(0, Js, Trange))
|
164
|
+
else:
|
165
|
+
return(Js[j][1])
|
166
|
+
|
167
|
+
|
168
|
+
def E_gt_func(j, Js, Trange):
|
169
|
+
"""
|
170
|
+
Get the affiliation zone of element j of the ground truth
|
171
|
+
|
172
|
+
:param j: index from 0 to len(Js) (excluded) on which to get the zone
|
173
|
+
:param Js: ground truth events, as a list of couples
|
174
|
+
:param Trange: range of the series where Js is included, can
|
175
|
+
be (-math.inf, math.inf) for distance measures
|
176
|
+
:return: affiliation zone of element j of the ground truth represented
|
177
|
+
as a couple
|
178
|
+
"""
|
179
|
+
range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2
|
180
|
+
range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2
|
181
|
+
return((range_left, range_right))
|
182
|
+
|
183
|
+
def get_all_E_gt_func(Js, Trange):
|
184
|
+
"""
|
185
|
+
Get the affiliation partition from the ground truth point of view
|
186
|
+
|
187
|
+
:param Js: ground truth events, as a list of couples
|
188
|
+
:param Trange: range of the series where Js is included, can
|
189
|
+
be (-math.inf, math.inf) for distance measures
|
190
|
+
:return: affiliation partition of the events
|
191
|
+
"""
|
192
|
+
# E_gt is the limit of affiliation/attraction for each ground truth event
|
193
|
+
E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
|
194
|
+
return(E_gt)
|
195
|
+
|
196
|
+
|
197
|
+
def interval_intersection(I = (1, 3), J = (2, 4)):
|
198
|
+
"""
|
199
|
+
Intersection between two intervals I and J
|
200
|
+
I and J should be either empty or represent a positive interval (no point)
|
201
|
+
|
202
|
+
:param I: an interval represented by start and stop
|
203
|
+
:param J: a second interval of the same form
|
204
|
+
:return: an interval representing the start and stop of the intersection (or None if empty)
|
205
|
+
"""
|
206
|
+
if I is None:
|
207
|
+
return(None)
|
208
|
+
if J is None:
|
209
|
+
return(None)
|
210
|
+
|
211
|
+
I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
|
212
|
+
if I_inter_J[0] >= I_inter_J[1]:
|
213
|
+
return(None)
|
214
|
+
else:
|
215
|
+
return(I_inter_J)
|
216
|
+
|
217
|
+
|
218
|
+
def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]):
|
219
|
+
"""
|
220
|
+
Cut the events into the affiliation zones
|
221
|
+
The presentation given here is from the ground truth point of view,
|
222
|
+
but it is also used in the reversed direction in the main function.
|
223
|
+
|
224
|
+
:param Is: events as a list of couples
|
225
|
+
:param E_gt: range of the affiliation zones
|
226
|
+
:return: a list of list of intervals (each interval represented by either
|
227
|
+
a couple or None for empty interval). The outer list is indexed by each
|
228
|
+
affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`.
|
229
|
+
"""
|
230
|
+
out = [None] * len(E_gt)
|
231
|
+
for j in range(len(E_gt)):
|
232
|
+
E_gt_j = E_gt[j]
|
233
|
+
discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E
|
234
|
+
discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
|
235
|
+
kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
|
236
|
+
Is_j = [x for x, y in zip(Is, kept_index)]
|
237
|
+
out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
|
238
|
+
return(out)
|
239
|
+
|
240
|
+
# Single ground truth event
|
241
|
+
|
13
242
|
|
14
243
|
def interval_length(J = (1,2)):
|
15
244
|
"""
|
@@ -462,3 +691,142 @@ def integral_interval_probaCDF_recall(I, J, E):
|
|
462
691
|
d_right = f(cut_into_three[2])
|
463
692
|
# It's an integral so summable
|
464
693
|
return(d_left + d_middle + d_right)
|
694
|
+
|
695
|
+
|
696
|
+
def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
|
697
|
+
"""
|
698
|
+
Compute the individual precision probability from Is to a single ground truth J
|
699
|
+
|
700
|
+
:param Is: list of predicted events within the affiliation zone of J
|
701
|
+
:param J: couple representating the start and stop of a ground truth interval
|
702
|
+
:param E: couple representing the start and stop of the zone of affiliation of J
|
703
|
+
:return: individual precision probability in [0, 1], or math.nan if undefined
|
704
|
+
"""
|
705
|
+
if all([I is None for I in Is]): # no prediction in the current area
|
706
|
+
return(math.nan) # undefined
|
707
|
+
return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
|
708
|
+
|
709
|
+
def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
|
710
|
+
"""
|
711
|
+
Compute the individual recall probability from a single ground truth J to Is
|
712
|
+
|
713
|
+
:param Is: list of predicted events within the affiliation zone of J
|
714
|
+
:param J: couple representating the start and stop of a ground truth interval
|
715
|
+
:param E: couple representing the start and stop of the zone of affiliation of J
|
716
|
+
:return: individual recall probability in [0, 1]
|
717
|
+
"""
|
718
|
+
Is = [I for I in Is if I is not None] # filter possible None in Is
|
719
|
+
if len(Is) == 0: # there is no prediction in the current area
|
720
|
+
return(0)
|
721
|
+
E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
|
722
|
+
Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
|
723
|
+
return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
|
724
|
+
def test_events(events):
|
725
|
+
"""
|
726
|
+
Verify the validity of the input events
|
727
|
+
:param events: list of events, each represented by a couple (start, stop)
|
728
|
+
:return: None. Raise an error for incorrect formed or non ordered events
|
729
|
+
"""
|
730
|
+
if type(events) is not list:
|
731
|
+
raise TypeError('Input `events` should be a list of couples')
|
732
|
+
if not all([type(x) is tuple for x in events]):
|
733
|
+
raise TypeError('Input `events` should be a list of tuples')
|
734
|
+
if not all([len(x) == 2 for x in events]):
|
735
|
+
raise ValueError('Input `events` should be a list of couples (start, stop)')
|
736
|
+
if not all([x[0] <= x[1] for x in events]):
|
737
|
+
raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
|
738
|
+
if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]):
|
739
|
+
raise ValueError('Couples of input `events` should be disjoint and ordered')
|
740
|
+
|
741
|
+
def pr_from_events(events_pred, events_gt, Trange):
|
742
|
+
"""
|
743
|
+
Compute the affiliation metrics including the precision/recall in [0,1],
|
744
|
+
along with the individual precision/recall distances and probabilities
|
745
|
+
|
746
|
+
:param events_pred: list of predicted events, each represented by a couple
|
747
|
+
indicating the start and the stop of the event
|
748
|
+
:param events_gt: list of ground truth events, each represented by a couple
|
749
|
+
indicating the start and the stop of the event
|
750
|
+
:param Trange: range of the series where events_pred and events_gt are included,
|
751
|
+
represented as a couple (start, stop)
|
752
|
+
:return: dictionary with precision, recall, and the individual metrics
|
753
|
+
"""
|
754
|
+
# testing the inputs
|
755
|
+
# test_events(events_pred)
|
756
|
+
# test_events(events_gt)
|
757
|
+
|
758
|
+
# other tests
|
759
|
+
minimal_Trange = infer_Trange(events_pred, events_gt)
|
760
|
+
if not Trange[0] <= minimal_Trange[0]:
|
761
|
+
raise ValueError('`Trange` should include all the events')
|
762
|
+
if not minimal_Trange[1] <= Trange[1]:
|
763
|
+
raise ValueError('`Trange` should include all the events')
|
764
|
+
|
765
|
+
if len(events_gt) == 0:
|
766
|
+
raise ValueError('Input `events_gt` should have at least one event')
|
767
|
+
|
768
|
+
if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
|
769
|
+
raise ValueError('Cannot manage point anomalies currently')
|
770
|
+
|
771
|
+
if Trange is None:
|
772
|
+
# Set as default, but Trange should be indicated if probabilities are used
|
773
|
+
raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
|
774
|
+
|
775
|
+
E_gt = get_all_E_gt_func(events_gt, Trange)
|
776
|
+
aff_partition = affiliation_partition(events_pred, E_gt)
|
777
|
+
|
778
|
+
# # Computing precision distance
|
779
|
+
# d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
|
780
|
+
|
781
|
+
# # Computing recall distance
|
782
|
+
# d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
|
783
|
+
|
784
|
+
# Computing precision
|
785
|
+
p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
|
786
|
+
|
787
|
+
# Computing recall
|
788
|
+
p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
|
789
|
+
|
790
|
+
if _len_wo_nan(p_precision) > 0:
|
791
|
+
p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
|
792
|
+
else:
|
793
|
+
p_precision_average = p_precision[0] # math.nan
|
794
|
+
p_recall_average = sum(p_recall) / len(p_recall)
|
795
|
+
|
796
|
+
dict_out = dict({'precision': p_precision_average,
|
797
|
+
'recall': p_recall_average,
|
798
|
+
'individual_precision_probabilities': p_precision,
|
799
|
+
'individual_recall_probabilities': p_recall})
|
800
|
+
return(dict_out)
|
801
|
+
|
802
|
+
def produce_all_results():
|
803
|
+
"""
|
804
|
+
Produce the affiliation precision/recall for all files
|
805
|
+
contained in the `data` repository
|
806
|
+
:return: a dictionary indexed by data names, each containing a dictionary
|
807
|
+
indexed by algorithm names, each containing the results of the affiliation
|
808
|
+
metrics (precision, recall, individual probabilities and distances)
|
809
|
+
"""
|
810
|
+
datasets, Tranges = read_all_as_events() # read all the events in folder `data`
|
811
|
+
results = dict()
|
812
|
+
for data_name in datasets.keys():
|
813
|
+
results_data = dict()
|
814
|
+
for algo_name in datasets[data_name].keys():
|
815
|
+
if algo_name != 'groundtruth':
|
816
|
+
results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
|
817
|
+
datasets[data_name]['groundtruth'],
|
818
|
+
Tranges[data_name])
|
819
|
+
results[data_name] = results_data
|
820
|
+
return(results)
|
821
|
+
|
822
|
+
|
823
|
+
def reformat_segments(segments):
|
824
|
+
segments = include_end_of_segments(segments)
|
825
|
+
segments = tuplify_segments(segments)
|
826
|
+
return segments
|
827
|
+
|
828
|
+
def include_end_of_segments(segments):
|
829
|
+
return [[start, end + 1] for start, end in segments]
|
830
|
+
|
831
|
+
def tuplify_segments(segments):
|
832
|
+
return [tuple(segment) for segment in segments]
|