tsadmetrics 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsadmetrics/__init__.py +1 -1
- tsadmetrics/_tsadeval/__init__.py +0 -0
- tsadmetrics/_tsadeval/auc_roc_pr_plot.py +295 -0
- tsadmetrics/_tsadeval/discontinuity_graph.py +109 -0
- tsadmetrics/_tsadeval/latency_sparsity_aware.py +294 -0
- tsadmetrics/_tsadeval/metrics.py +698 -0
- tsadmetrics/_tsadeval/nabscore.py +311 -0
- tsadmetrics/_tsadeval/tests.py +376 -0
- tsadmetrics/_tsadeval/threshold_plt.py +30 -0
- tsadmetrics/_tsadeval/time_tolerant.py +33 -0
- tsadmetrics/_tsadeval/vus_utils.py +263 -0
- {tsadmetrics-0.1.2.dist-info → tsadmetrics-0.1.3.dist-info}/METADATA +1 -1
- tsadmetrics-0.1.3.dist-info/RECORD +20 -0
- tsadmetrics-0.1.2.dist-info/RECORD +0 -10
- {tsadmetrics-0.1.2.dist-info → tsadmetrics-0.1.3.dist-info}/WHEEL +0 -0
- {tsadmetrics-0.1.2.dist-info → tsadmetrics-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,698 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from sklearn.metrics import roc_auc_score, average_precision_score
|
4
|
+
|
5
|
+
from .nabscore import Sweeper
|
6
|
+
from .affiliation.metrics import pr_from_events as affiliation_pr
|
7
|
+
from .prts.basic_metrics_ts import ts_recall, ts_precision, ts_fscore
|
8
|
+
from .time_tolerant import time_tolerant_recall_,time_tolerant_precision_
|
9
|
+
from .latency_sparsity_aware import calc_twseq
|
10
|
+
from .eTaPR_pkg import etapr, tapr
|
11
|
+
from .eTaPR_pkg.DataManage import File_IO, Range as rng
|
12
|
+
from .vus_utils import generate_curve
|
13
|
+
|
14
|
+
|
15
|
+
# NOTE:
|
16
|
+
# Binary anomaly time series (either labels or predictions) are represented in 3 different ways.
|
17
|
+
# This is done to suit the different metrics.
|
18
|
+
#
|
19
|
+
# Example:
|
20
|
+
# A time series of length 10 (t=0 to t=9) with anomalies at times t=2, t=6 and t=7 is represented like this:
|
21
|
+
# Segmentwise: [[2,2], [6,7]]
|
22
|
+
# Pointwise: [2,6,7]
|
23
|
+
# Full_series: [0,0,1,0,0,0,1,1,0,0]
|
24
|
+
#
|
25
|
+
# The class Binary_anomalies is used to access these various formats.
|
26
|
+
|
27
|
+
|
28
|
+
def pointwise_to_segmentwise(pointwise):
|
29
|
+
"""Reformat anomaly time series from pointwise to segmentwise"""
|
30
|
+
segmentwise = []
|
31
|
+
|
32
|
+
prev = -10
|
33
|
+
for point in pointwise:
|
34
|
+
if point > prev + 1:
|
35
|
+
segmentwise.append([point, point])
|
36
|
+
else:
|
37
|
+
segmentwise[-1][-1] += 1
|
38
|
+
prev = point
|
39
|
+
return np.array(segmentwise)
|
40
|
+
|
41
|
+
|
42
|
+
def segmentwise_to_pointwise(segmentwise):
|
43
|
+
"""Reformat anomaly time series from segmentwise to pointwise"""
|
44
|
+
pointwise = []
|
45
|
+
|
46
|
+
for start, end in segmentwise:
|
47
|
+
for point in range(start, end + 1):
|
48
|
+
pointwise.append(point)
|
49
|
+
|
50
|
+
return np.array(pointwise)
|
51
|
+
|
52
|
+
|
53
|
+
def segmentwise_to_full_series(segmentwise, length):
|
54
|
+
"""Reformat anomaly time series from segmentwise to full_series"""
|
55
|
+
pw = segmentwise_to_pointwise(segmentwise)
|
56
|
+
|
57
|
+
return pointwise_to_full_series(pw, length)
|
58
|
+
|
59
|
+
def pointwise_to_full_series(pointwise, length):
|
60
|
+
"""Reformat anomaly time series from pointwise to full_series"""
|
61
|
+
anomalies_full_series = np.zeros(length)
|
62
|
+
if len(pointwise) > 0:
|
63
|
+
assert pointwise[-1] < length
|
64
|
+
anomalies_full_series[pointwise] = 1
|
65
|
+
return np.array(anomalies_full_series)
|
66
|
+
|
67
|
+
def full_series_to_pointwise(full_series):
|
68
|
+
"""Reformat anomaly time series from full_series to pointwise"""
|
69
|
+
anomalies_pointwise = []
|
70
|
+
for i in range(len(full_series)):
|
71
|
+
if full_series[i]==1:
|
72
|
+
anomalies_pointwise.append(i)
|
73
|
+
return np.array(anomalies_pointwise)
|
74
|
+
|
75
|
+
class Binary_anomalies:
|
76
|
+
def __init__(self, length, anomalies):
|
77
|
+
self._length = length
|
78
|
+
self._set_anomalies(anomalies)
|
79
|
+
|
80
|
+
def _set_anomalies(self, anomalies):
|
81
|
+
anomalies = np.array(anomalies)
|
82
|
+
if self._is_pointwise(anomalies):
|
83
|
+
anomalies_ptwise = anomalies
|
84
|
+
|
85
|
+
anomalies_segmentwise = pointwise_to_segmentwise(anomalies)
|
86
|
+
|
87
|
+
|
88
|
+
anomalies_full_series = pointwise_to_full_series(anomalies_ptwise, self._length)
|
89
|
+
|
90
|
+
|
91
|
+
elif self._is_full_series(anomalies):
|
92
|
+
anomalies_ptwise = full_series_to_pointwise(anomalies)
|
93
|
+
anomalies_segmentwise = pointwise_to_segmentwise(anomalies_ptwise)
|
94
|
+
|
95
|
+
anomalies_full_series = anomalies
|
96
|
+
tam_pointwiuse = len(np.unique(anomalies_ptwise))
|
97
|
+
tam_fullseries = np.sum(anomalies_full_series)
|
98
|
+
if tam_pointwiuse != tam_fullseries:
|
99
|
+
print(anomalies)
|
100
|
+
print(f'2- ERROR EN EL TAMAÑO DE LOS PUNTOS: {tam_pointwiuse} != {tam_fullseries}')
|
101
|
+
#raise NotImplementedError
|
102
|
+
elif self._is_segmentwise(anomalies):
|
103
|
+
anomalies_segmentwise = anomalies
|
104
|
+
anomalies_ptwise = segmentwise_to_pointwise(anomalies)
|
105
|
+
anomalies_full_series = pointwise_to_full_series(anomalies_ptwise, self._length)
|
106
|
+
tam_pointwiuse = len(np.unique(anomalies_ptwise))
|
107
|
+
tam_fullseries = np.sum(anomalies_full_series)
|
108
|
+
if tam_pointwiuse != tam_fullseries:
|
109
|
+
print(f'3- ERROR EN EL TAMAÑO DE LOS PUNTOS: {tam_pointwiuse} != {tam_fullseries}')
|
110
|
+
else:
|
111
|
+
raise ValueError(f"Illegal shape of anomalies:\n{anomalies}")
|
112
|
+
|
113
|
+
|
114
|
+
if len(anomalies_ptwise) > 0:
|
115
|
+
assert all(anomalies_ptwise == np.sort(anomalies_ptwise))
|
116
|
+
assert anomalies_ptwise[0] >= 0
|
117
|
+
assert len(anomalies_ptwise) == len(np.unique(anomalies_ptwise))
|
118
|
+
assert len(anomalies_ptwise) == sum(anomalies_full_series)
|
119
|
+
|
120
|
+
assert all(anomalies_segmentwise[:, 0] == np.sort(anomalies_segmentwise[:, 0]))
|
121
|
+
assert all(anomalies_segmentwise[:, 1] >= anomalies_segmentwise[:, 0])
|
122
|
+
|
123
|
+
self.anomalies_segmentwise = anomalies_segmentwise
|
124
|
+
self.anomalies_ptwise = anomalies_ptwise
|
125
|
+
self.anomalies_full_series = anomalies_full_series
|
126
|
+
|
127
|
+
def _is_pointwise(self, anomalies):
|
128
|
+
return len(anomalies.shape) == 1 and (len(anomalies) < self._length or (len(anomalies) == self._length and np.sum(anomalies) > self._length))
|
129
|
+
|
130
|
+
def _is_full_series(self, anomalies):
|
131
|
+
return len(anomalies.shape) == 1 and len(anomalies) == self._length
|
132
|
+
|
133
|
+
def _is_segmentwise(self, anomalies):
|
134
|
+
return len(anomalies.shape) == 2
|
135
|
+
|
136
|
+
def get_length(self):
|
137
|
+
return self._length
|
138
|
+
|
139
|
+
|
140
|
+
class Binary_detection:
|
141
|
+
"""This class represents a binary detection as a set of two time series:
|
142
|
+
gt: the binary labels
|
143
|
+
prediction: the binary predictions for corresponding to the labels"""
|
144
|
+
|
145
|
+
def __init__(self, length, gt_anomalies, predicted_anomalies):
|
146
|
+
self._length = length
|
147
|
+
self._gt = Binary_anomalies(length, gt_anomalies)
|
148
|
+
self._prediction = Binary_anomalies(length, predicted_anomalies)
|
149
|
+
|
150
|
+
def get_length(self):
|
151
|
+
return self._length
|
152
|
+
|
153
|
+
def get_gt_anomalies_ptwise(self):
|
154
|
+
return self._gt.anomalies_ptwise
|
155
|
+
|
156
|
+
def get_gt_anomalies_segmentwise(self):
|
157
|
+
return self._gt.anomalies_segmentwise
|
158
|
+
|
159
|
+
def get_predicted_anomalies_ptwise(self):
|
160
|
+
return self._prediction.anomalies_ptwise
|
161
|
+
|
162
|
+
def get_predicted_anomalies_segmentwise(self):
|
163
|
+
return self._prediction.anomalies_segmentwise
|
164
|
+
|
165
|
+
def get_predicted_anomalies_full_series(self):
|
166
|
+
return self._prediction.anomalies_full_series
|
167
|
+
|
168
|
+
def get_gt_anomalies_full_series(self):
|
169
|
+
return self._gt.anomalies_full_series
|
170
|
+
|
171
|
+
|
172
|
+
class Nonbinary_detection:
|
173
|
+
"""This class represents a nonbinary detection as a set of two time series:
|
174
|
+
gt: the binary labels
|
175
|
+
anomaly score: the time series defining the degree of anomaly at each time point"""
|
176
|
+
|
177
|
+
def __init__(self, gt_anomalies, anomaly_score):
|
178
|
+
self._length = len(anomaly_score)
|
179
|
+
self._gt = Binary_anomalies(self._length, gt_anomalies)
|
180
|
+
self._anomaly_score = anomaly_score
|
181
|
+
|
182
|
+
def get_gt_anomalies_ptwise(self):
|
183
|
+
return self._gt.anomalies_ptwise
|
184
|
+
|
185
|
+
def get_gt_anomalies_segmentwise(self):
|
186
|
+
return self._gt.anomalies_segmentwise
|
187
|
+
|
188
|
+
def get_gt_anomalies_full_series(self):
|
189
|
+
return self._gt.anomalies_full_series
|
190
|
+
|
191
|
+
def get_anomaly_score(self):
|
192
|
+
return self._anomaly_score
|
193
|
+
|
194
|
+
|
195
|
+
def f1_from_pr(p, r, beta=1):
|
196
|
+
if r == 0 and p == 0:
|
197
|
+
return 0
|
198
|
+
return ((1 + beta**2) * r * p) / (beta**2 * p + r)
|
199
|
+
|
200
|
+
|
201
|
+
def f1_score(*args, tp, fp, fn, beta=1):
|
202
|
+
r = recall(tp=tp, fn=fn)
|
203
|
+
p = precision(tp=tp, fp=fp)
|
204
|
+
return f1_from_pr(p, r, beta=beta)
|
205
|
+
|
206
|
+
|
207
|
+
def recall(*args, tp, fn):
|
208
|
+
return 0 if tp + fn == 0 else tp / (tp + fn)
|
209
|
+
|
210
|
+
|
211
|
+
def precision(*args, tp, fp):
|
212
|
+
return 0 if tp + fp == 0 else tp / (tp + fp)
|
213
|
+
|
214
|
+
|
215
|
+
class Pointwise_metrics(Binary_detection):
|
216
|
+
def __init__(self, *args):
|
217
|
+
super().__init__(*args)
|
218
|
+
self.name = "\\pwf[1]"
|
219
|
+
self.set_confusion()
|
220
|
+
|
221
|
+
def set_confusion(self):
|
222
|
+
gt = self.get_gt_anomalies_full_series()
|
223
|
+
pred = self.get_predicted_anomalies_full_series()
|
224
|
+
|
225
|
+
self.tp = np.sum(pred * gt)
|
226
|
+
self.fp = np.sum(pred * (1 - gt))
|
227
|
+
self.fn = np.sum((1 - pred) * gt)
|
228
|
+
|
229
|
+
def get_score(self):
|
230
|
+
return f1_score(tp=self.tp, fn=self.fn, fp=self.fp)
|
231
|
+
|
232
|
+
|
233
|
+
class DelayThresholdedPointAdjust(Pointwise_metrics):
|
234
|
+
def __init__(self, *args, k=2):
|
235
|
+
super().__init__(*args)
|
236
|
+
self.name = f"\\dtpaf[1]{{{k}}}"
|
237
|
+
self.k = k
|
238
|
+
self.adjust()
|
239
|
+
self.set_confusion()
|
240
|
+
|
241
|
+
def adjust(self):
|
242
|
+
adjusted_prediction = np.zeros(self._length)
|
243
|
+
predicted_anomalies = self.get_predicted_anomalies_ptwise()
|
244
|
+
adjusted_prediction[predicted_anomalies] = 1
|
245
|
+
|
246
|
+
for start, end in self.get_gt_anomalies_segmentwise():
|
247
|
+
anomaly_adjusted = False
|
248
|
+
for i in range(start, min(start + self.k + 1, end + 1)):
|
249
|
+
if adjusted_prediction[i] == 1:
|
250
|
+
adjusted_prediction[start:end + 1] = 1
|
251
|
+
anomaly_adjusted = True
|
252
|
+
break
|
253
|
+
if not anomaly_adjusted:
|
254
|
+
adjusted_prediction[start:end + 1] = 0
|
255
|
+
self._prediction._set_anomalies(np.where(adjusted_prediction == 1)[0])
|
256
|
+
|
257
|
+
|
258
|
+
class PointAdjust(DelayThresholdedPointAdjust):
|
259
|
+
def __init__(self, *args):
|
260
|
+
super().__init__(*args, k=args[0]) # set k to length of time series to avoid threshold making a difference
|
261
|
+
self.name = "\\paf[1]"
|
262
|
+
|
263
|
+
|
264
|
+
class PointAdjustKPercent(Pointwise_metrics):
|
265
|
+
def __init__(self, *args, k=0.2):
|
266
|
+
super().__init__(*args)
|
267
|
+
self.name = f"\\pakf[1]{{{int(k*100)}}}"
|
268
|
+
self.k = k
|
269
|
+
self.adjust()
|
270
|
+
self.set_confusion()
|
271
|
+
|
272
|
+
def adjust(self):
|
273
|
+
adjusted_prediction = self.get_predicted_anomalies_ptwise().tolist()
|
274
|
+
for start, end in self.get_gt_anomalies_segmentwise():
|
275
|
+
correct_points = 0
|
276
|
+
for i in range(start, end + 1):
|
277
|
+
if i in adjusted_prediction:
|
278
|
+
correct_points += 1
|
279
|
+
if correct_points / (end + 1 - start) >= self.k:
|
280
|
+
for j in range(start, end + 1):
|
281
|
+
adjusted_prediction.append(j)
|
282
|
+
break
|
283
|
+
|
284
|
+
self._prediction._set_anomalies(np.sort(np.unique(adjusted_prediction)))
|
285
|
+
|
286
|
+
|
287
|
+
class LatencySparsityAware(Binary_detection):
|
288
|
+
def __init__(self, *args, tw=2):
|
289
|
+
self.name = f"\\lsf[1]{{{tw}}}"
|
290
|
+
super().__init__(*args)
|
291
|
+
self.tw = tw
|
292
|
+
self.get_score()
|
293
|
+
|
294
|
+
def get_score(self):
|
295
|
+
f1, p, r, FPR, self.tp, self.tn, self.fp, self.fn = calc_twseq(
|
296
|
+
self.get_predicted_anomalies_full_series(),
|
297
|
+
self.get_gt_anomalies_full_series(),
|
298
|
+
normal=0,
|
299
|
+
threshold=0.5,
|
300
|
+
tw=self.tw,
|
301
|
+
)
|
302
|
+
return f1
|
303
|
+
|
304
|
+
|
305
|
+
class Segmentwise_metrics(Pointwise_metrics):
|
306
|
+
def __init__(self, *args):
|
307
|
+
super().__init__(*args)
|
308
|
+
self.name = "\\segf[1]"
|
309
|
+
self.set_confusion()
|
310
|
+
|
311
|
+
def set_confusion(self):
|
312
|
+
tp = 0
|
313
|
+
fn = 0
|
314
|
+
p_fs = self.get_predicted_anomalies_full_series()
|
315
|
+
gt_fs = self.get_gt_anomalies_full_series()
|
316
|
+
for gt_anomaly in self.get_gt_anomalies_segmentwise():
|
317
|
+
found = False
|
318
|
+
|
319
|
+
for i_index in range(gt_anomaly[0],gt_anomaly[1]+1):
|
320
|
+
if p_fs[i_index] == 1:
|
321
|
+
tp += 1
|
322
|
+
found = True
|
323
|
+
break
|
324
|
+
if found == False:
|
325
|
+
fn += 1
|
326
|
+
fp = 0
|
327
|
+
for predicted_anomaly in self.get_predicted_anomalies_segmentwise():
|
328
|
+
found = False
|
329
|
+
for i_index in range(predicted_anomaly[0],predicted_anomaly[1]+1):
|
330
|
+
if gt_fs[i_index] == 1:
|
331
|
+
found = True
|
332
|
+
break
|
333
|
+
if found == False:
|
334
|
+
fp += 1
|
335
|
+
self.fp = fp
|
336
|
+
self.fn = fn
|
337
|
+
self.tp = tp
|
338
|
+
|
339
|
+
def _overlap(self, anomaly1, anomaly2):
|
340
|
+
return not (anomaly1[1] < anomaly2[0] or anomaly2[1] < anomaly1[0])
|
341
|
+
|
342
|
+
|
343
|
+
class Redefined_PR_metric(Binary_detection):
|
344
|
+
def __init__(self, *args):
|
345
|
+
super().__init__(*args)
|
346
|
+
|
347
|
+
def get_score(self):
|
348
|
+
self.r = self.recall()
|
349
|
+
self.p = self.precision()
|
350
|
+
return f1_from_pr(self.p, self.r)
|
351
|
+
|
352
|
+
def recall(self):
|
353
|
+
raise NotImplementedError
|
354
|
+
|
355
|
+
def precision(self):
|
356
|
+
raise NotImplementedError
|
357
|
+
|
358
|
+
|
359
|
+
class Composite_f(Redefined_PR_metric):
|
360
|
+
def __init__(self, *args):
|
361
|
+
self.name = "\\cf[1]"
|
362
|
+
super().__init__(*args)
|
363
|
+
|
364
|
+
self.pointwise_metrics = Pointwise_metrics(*args)
|
365
|
+
self.segmentwise_metrics = Segmentwise_metrics(*args)
|
366
|
+
|
367
|
+
def recall(self):
|
368
|
+
return recall(tp=self.segmentwise_metrics.tp, fn=self.segmentwise_metrics.fn)
|
369
|
+
|
370
|
+
def precision(self):
|
371
|
+
return precision(tp=self.pointwise_metrics.tp, fp=self.pointwise_metrics.fp)
|
372
|
+
|
373
|
+
|
374
|
+
class Affiliation(Redefined_PR_metric):
|
375
|
+
def __init__(self, *args):
|
376
|
+
self.name = "\\af[1]"
|
377
|
+
super().__init__(*args)
|
378
|
+
|
379
|
+
def get_score(self, beta=1):
|
380
|
+
pr_output = affiliation_pr(
|
381
|
+
self._reformat_segments(self.get_predicted_anomalies_segmentwise()),
|
382
|
+
self._reformat_segments(self.get_gt_anomalies_segmentwise()),
|
383
|
+
(0, self.get_length()),
|
384
|
+
)
|
385
|
+
self.r = pr_output["recall"]
|
386
|
+
self.p = pr_output["precision"]
|
387
|
+
return f1_from_pr(self.p, self.r, beta=beta)
|
388
|
+
|
389
|
+
def _reformat_segments(self, segments):
|
390
|
+
segments = self._include_end_of_segments(segments)
|
391
|
+
segments = self._tuplify_segments(segments)
|
392
|
+
return segments
|
393
|
+
|
394
|
+
def _include_end_of_segments(self, segments):
|
395
|
+
return [[start, end + 1] for start, end in segments]
|
396
|
+
|
397
|
+
def _tuplify_segments(self, segments):
|
398
|
+
return [tuple(segment) for segment in segments]
|
399
|
+
|
400
|
+
|
401
|
+
class Range_PR(Redefined_PR_metric):
|
402
|
+
def __init__(self, *args, cardinality= "alpha", alpha=0.2, bias="flat"):
|
403
|
+
super().__init__(*args)
|
404
|
+
self.cardinality = cardinality
|
405
|
+
self.alpha = alpha
|
406
|
+
self.bias = bias
|
407
|
+
self.set_name()
|
408
|
+
|
409
|
+
def set_name(self):
|
410
|
+
self.name = f"\\rf[1]{{{self.bias}}}{{{self.alpha}}}"
|
411
|
+
|
412
|
+
def set_kwargs(self):
|
413
|
+
real = np.zeros(self.get_length())
|
414
|
+
real[self.get_gt_anomalies_ptwise()] = 1
|
415
|
+
pred = np.zeros(self.get_length())
|
416
|
+
pred[self.get_predicted_anomalies_ptwise()] = 1
|
417
|
+
|
418
|
+
self.kwargs = {"real": real, "pred": pred, "alpha": self.alpha, "cardinality": self.cardinality, "bias": self.bias}
|
419
|
+
|
420
|
+
def recall(self):
|
421
|
+
self.set_kwargs()
|
422
|
+
return ts_recall(**self.kwargs)
|
423
|
+
|
424
|
+
def precision(self):
|
425
|
+
self.set_kwargs()
|
426
|
+
return ts_precision(**self.kwargs)
|
427
|
+
|
428
|
+
|
429
|
+
|
430
|
+
|
431
|
+
class TaF(Redefined_PR_metric):
|
432
|
+
def __init__(self, *args, theta=0.5, alpha=0.5, delta=0, past_range=False):
|
433
|
+
super().__init__(*args)
|
434
|
+
self.alpha = alpha
|
435
|
+
self.theta = theta
|
436
|
+
self.delta = delta
|
437
|
+
self.past_range = past_range
|
438
|
+
self.name = f"\\taf[1]{{{self.alpha}}}{{{self.delta}}}{{{self.theta}}}"
|
439
|
+
|
440
|
+
self.prepare_scoring()
|
441
|
+
|
442
|
+
def prepare_scoring(self):
|
443
|
+
#self.prepare_data()
|
444
|
+
self.TaPR = tapr.TaPR(theta=self.theta, delta=self.delta,past_range=self.past_range)
|
445
|
+
aux_gt_anomalies = []
|
446
|
+
for start, end in self.get_gt_anomalies_segmentwise():
|
447
|
+
aux_gt_anomalies.append(rng.Range(start, end, ""))
|
448
|
+
|
449
|
+
aux_predicted_anomalies = []
|
450
|
+
for start, end in self.get_predicted_anomalies_segmentwise():
|
451
|
+
aux_predicted_anomalies.append(rng.Range(start, end, ""))
|
452
|
+
self.TaPR.set_anomalies(aux_gt_anomalies)
|
453
|
+
self.TaPR.set_predictions(aux_predicted_anomalies)
|
454
|
+
|
455
|
+
def prepare_data(self):
|
456
|
+
# self.write_data_files()
|
457
|
+
# self.read_data_files()
|
458
|
+
self.gt_anomalies = self.get_gt_anomalies_full_series().tolist()
|
459
|
+
self.predicted_anomalies = self.get_predicted_anomalies_full_series().tolist()
|
460
|
+
|
461
|
+
def write_data_files(self):
|
462
|
+
self.gt_filename = "temp_gt.txt"
|
463
|
+
with open(self.gt_filename, "w") as f:
|
464
|
+
for x in self.get_gt_anomalies_full_series():
|
465
|
+
f.write(str(1 if x == 0 else -1))
|
466
|
+
f.write("\n")
|
467
|
+
self.pred_filename = "temp_pred.txt"
|
468
|
+
with open(self.pred_filename, "w") as f:
|
469
|
+
for x in self.get_predicted_anomalies_full_series():
|
470
|
+
f.write(str(1 if x == 0 else -1))
|
471
|
+
f.write("\n")
|
472
|
+
|
473
|
+
def read_data_files(self):
|
474
|
+
self.gt_anomalies = File_IO.load_file(self.gt_filename, "stream")
|
475
|
+
self.predicted_anomalies = File_IO.load_file(self.pred_filename, "stream")
|
476
|
+
|
477
|
+
def recall(self):
|
478
|
+
tard_value, detected_list = self.TaPR.TaR_d()
|
479
|
+
tarp_value = self.TaPR.TaR_p_value
|
480
|
+
return self.alpha * tard_value + (1 - self.alpha) * tarp_value
|
481
|
+
|
482
|
+
def precision(self):
|
483
|
+
tapd_value, correct_list = self.TaPR.TaP_d()
|
484
|
+
tapp_value = self.TaPR.TaP_p_value
|
485
|
+
return self.alpha * tapd_value + (1 - self.alpha) * tapp_value
|
486
|
+
|
487
|
+
|
488
|
+
class eTaF(Redefined_PR_metric):
|
489
|
+
def __init__(self, *args, theta_p=0.5, theta_r=0.1):
|
490
|
+
super().__init__(*args)
|
491
|
+
self.theta_p = theta_p
|
492
|
+
self.theta_r = theta_r
|
493
|
+
|
494
|
+
self.name = f"\\etaf[1]{{{self.theta_p}}}{{{self.theta_r}}}"
|
495
|
+
|
496
|
+
self.make_scores()
|
497
|
+
|
498
|
+
def make_scores(self):
|
499
|
+
#self.prepare_data()
|
500
|
+
aux_gt_anomalies = []
|
501
|
+
for start, end in self.get_gt_anomalies_segmentwise():
|
502
|
+
aux_gt_anomalies.append(rng.Range(start, end, ""))
|
503
|
+
|
504
|
+
aux_predicted_anomalies = []
|
505
|
+
for start, end in self.get_predicted_anomalies_segmentwise():
|
506
|
+
aux_predicted_anomalies.append(rng.Range(start, end, ""))
|
507
|
+
self.gt_anomalies = aux_gt_anomalies
|
508
|
+
self.predicted_anomalies = aux_predicted_anomalies
|
509
|
+
self.result = etapr.evaluate_w_ranges(
|
510
|
+
self.gt_anomalies, self.predicted_anomalies, theta_p=self.theta_p, theta_r=self.theta_r, delta=0
|
511
|
+
)
|
512
|
+
|
513
|
+
def prepare_data(self):
|
514
|
+
self.write_data_files()
|
515
|
+
self.read_data_files()
|
516
|
+
|
517
|
+
def write_data_files(self):
|
518
|
+
self.gt_filename = "temp_gt.txt"
|
519
|
+
with open(self.gt_filename, "w") as f:
|
520
|
+
for x in self.get_gt_anomalies_full_series():
|
521
|
+
f.write(str(1 if x == 0 else -1))
|
522
|
+
f.write("\n")
|
523
|
+
self.pred_filename = "temp_pred.txt"
|
524
|
+
with open(self.pred_filename, "w") as f:
|
525
|
+
for x in self.get_predicted_anomalies_full_series():
|
526
|
+
f.write(str(1 if x == 0 else -1))
|
527
|
+
f.write("\n")
|
528
|
+
|
529
|
+
def read_data_files(self):
|
530
|
+
self.gt_anomalies = File_IO.load_file(self.gt_filename, "stream")
|
531
|
+
self.predicted_anomalies = File_IO.load_file(self.pred_filename, "stream")
|
532
|
+
|
533
|
+
def recall(self):
|
534
|
+
return self.result["eTaR"]
|
535
|
+
|
536
|
+
def precision(self):
|
537
|
+
return self.result["eTaP"]
|
538
|
+
|
539
|
+
|
540
|
+
class Time_Tolerant(Redefined_PR_metric):
|
541
|
+
def __init__(self, *args, d=2):
|
542
|
+
super().__init__(*args)
|
543
|
+
self.d = d
|
544
|
+
self.name = f"\\ttolf[1]{{{d}}}"
|
545
|
+
|
546
|
+
def recall(self):
|
547
|
+
return time_tolerant_recall_(**self.get_kwargs())
|
548
|
+
|
549
|
+
def precision(self):
|
550
|
+
return time_tolerant_precision_(**self.get_kwargs())
|
551
|
+
|
552
|
+
def get_kwargs(self):
|
553
|
+
return {
|
554
|
+
"A": np.pad(self.get_predicted_anomalies_full_series(), self.d),
|
555
|
+
"E": np.pad(self.get_gt_anomalies_full_series(), self.d),
|
556
|
+
"d": self.d,
|
557
|
+
}
|
558
|
+
|
559
|
+
|
560
|
+
class Temporal_Distance(Binary_detection):
|
561
|
+
def __init__(self, *args, distance=0):
|
562
|
+
super().__init__(*args)
|
563
|
+
self.distance = distance
|
564
|
+
self.name = f"\\tempdist"
|
565
|
+
|
566
|
+
def get_score(self):
|
567
|
+
a = np.array(self.get_gt_anomalies_ptwise())
|
568
|
+
b = np.array(self.get_predicted_anomalies_ptwise())
|
569
|
+
if self.distance == 0:
|
570
|
+
return self._dist(a, b) + self._dist(b, a)
|
571
|
+
elif self.distance == 1:
|
572
|
+
return self._dist(a, b)**2 + self._dist(b, a)**2
|
573
|
+
else:
|
574
|
+
raise ValueError(f"Distance {self.distance} not supported")
|
575
|
+
|
576
|
+
def _dist(self, a, b):
|
577
|
+
dist = 0
|
578
|
+
for pt in a:
|
579
|
+
if len(b) > 0:
|
580
|
+
dist += min(abs(b - pt))
|
581
|
+
else:
|
582
|
+
dist += self._length
|
583
|
+
return dist
|
584
|
+
|
585
|
+
|
586
|
+
class NAB_score(Binary_detection):
|
587
|
+
def __init__(self, *args):
|
588
|
+
self.name = "\\nab"
|
589
|
+
super().__init__(*args)
|
590
|
+
|
591
|
+
self.sweeper = Sweeper(probationPercent=0, costMatrix={"tpWeight": 1, "fpWeight": 0.11, "fnWeight": 1})
|
592
|
+
|
593
|
+
def get_score(self):
|
594
|
+
if len(self.get_predicted_anomalies_ptwise()) == 0:
|
595
|
+
return 0 # raw_score == null_score yeilds score = 0
|
596
|
+
if len(self.get_gt_anomalies_ptwise()) == 0:
|
597
|
+
return np.nan # perfect_score == null_score yields /0
|
598
|
+
try:
|
599
|
+
null_score, raw_score = self.calculate_scores(self.get_predicted_anomalies_ptwise())
|
600
|
+
null_score, perfect_score = self.calculate_scores(prediction=self.get_gt_anomalies_ptwise())
|
601
|
+
return (raw_score - null_score) / (perfect_score - null_score) * 100
|
602
|
+
except Exception as e:
|
603
|
+
#print(f"Error calculating NAB score: {e}")
|
604
|
+
return 0
|
605
|
+
|
606
|
+
def calculate_scores(self, prediction):
|
607
|
+
anomaly_scores = pointwise_to_full_series(prediction, self.get_length())
|
608
|
+
timestamps = np.arange(self.get_length())
|
609
|
+
windowLimits = self.get_gt_anomalies_segmentwise()
|
610
|
+
dataSetName = "dummyname"
|
611
|
+
anomalyList = self.sweeper.calcSweepScore(timestamps, anomaly_scores, windowLimits, dataSetName)
|
612
|
+
scoresByThreshold = self.sweeper.calcScoreByThreshold(anomalyList)
|
613
|
+
|
614
|
+
assert scoresByThreshold[0].threshold == 1.1 # all points regarded normal
|
615
|
+
assert scoresByThreshold[1].threshold == 1.0 # anomal points regarded anomal
|
616
|
+
|
617
|
+
return scoresByThreshold[0].score, scoresByThreshold[1].score
|
618
|
+
|
619
|
+
|
620
|
+
class Best_threshold_pw(Nonbinary_detection):
|
621
|
+
def __init__(self, *args):
|
622
|
+
self.name = "\\bestpwf"
|
623
|
+
super().__init__(*args)
|
624
|
+
|
625
|
+
def get_score(self):
|
626
|
+
scores = []
|
627
|
+
for current_anomaly_score in self.get_anomaly_score():
|
628
|
+
scores.append(self.get_score_given_anomaly_score_and_threshold(threshold=current_anomaly_score))
|
629
|
+
return np.nanmax(scores)
|
630
|
+
|
631
|
+
def get_score_given_anomaly_score_and_threshold(self, threshold):
|
632
|
+
gt = self.get_gt_anomalies_full_series()
|
633
|
+
pred = np.array(self.get_anomaly_score()) >= threshold
|
634
|
+
return f1_score(tp=pred @ gt, fn=(1 - pred) @ gt, fp=(1 - gt) @ pred)
|
635
|
+
|
636
|
+
|
637
|
+
class AUC_ROC(Nonbinary_detection):
|
638
|
+
def __init__(self, *args):
|
639
|
+
self.name = "\\aucroc"
|
640
|
+
super().__init__(*args)
|
641
|
+
|
642
|
+
def get_score(self):
|
643
|
+
gt = self.get_gt_anomalies_full_series()
|
644
|
+
return roc_auc_score(gt, self.get_anomaly_score())
|
645
|
+
|
646
|
+
|
647
|
+
class AUC_PR_pw(Nonbinary_detection):
|
648
|
+
def __init__(self, *args):
|
649
|
+
self.name = "\\aucpr"
|
650
|
+
super().__init__(*args)
|
651
|
+
|
652
|
+
def get_score(self):
|
653
|
+
gt = self.get_gt_anomalies_full_series()
|
654
|
+
return average_precision_score(gt, self.get_anomaly_score())
|
655
|
+
|
656
|
+
|
657
|
+
class VUS_ROC(Nonbinary_detection):
|
658
|
+
def __init__(self, *args, max_window=4):
|
659
|
+
super().__init__(*args)
|
660
|
+
self.name = f"\\vusroc{{{max_window}}}"
|
661
|
+
self.max_window = max_window
|
662
|
+
|
663
|
+
def get_score(self):
|
664
|
+
gt = np.array(self.get_gt_anomalies_full_series())
|
665
|
+
score = np.array(self.get_anomaly_score())
|
666
|
+
_, _, _, _, _, _, roc, pr = generate_curve(gt, score, self.max_window)
|
667
|
+
return roc
|
668
|
+
|
669
|
+
|
670
|
+
class VUS_PR(Nonbinary_detection):
|
671
|
+
def __init__(self, *args, max_window=4):
|
672
|
+
super().__init__(*args)
|
673
|
+
self.name = f"\\vuspr{{{max_window}}}"
|
674
|
+
self.max_window = max_window
|
675
|
+
|
676
|
+
def get_score(self):
|
677
|
+
gt = np.array(self.get_gt_anomalies_full_series())
|
678
|
+
score = np.array(self.get_anomaly_score())
|
679
|
+
_, _, _, _, _, _, roc, pr = generate_curve(gt, score, self.max_window)
|
680
|
+
return pr
|
681
|
+
|
682
|
+
|
683
|
+
class PatK_pw(Nonbinary_detection):
|
684
|
+
def __init__(self, *args):
|
685
|
+
super().__init__(*args)
|
686
|
+
self.name = f"\\patk[{len(self.get_gt_anomalies_ptwise())}]"
|
687
|
+
|
688
|
+
def get_score(self):
|
689
|
+
gt = self.get_gt_anomalies_full_series()
|
690
|
+
|
691
|
+
k = int(sum(gt))
|
692
|
+
assert k > 0
|
693
|
+
threshold = np.sort(self.get_anomaly_score())[-k]
|
694
|
+
|
695
|
+
pred = self.get_anomaly_score() >= threshold
|
696
|
+
assert sum(pred) >= k, (k, pred)
|
697
|
+
|
698
|
+
return pred @ gt / sum(pred)
|