tsadmetrics 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- entorno/bin/activate_this.py +32 -0
- entorno/bin/rst2html.py +23 -0
- entorno/bin/rst2html4.py +26 -0
- entorno/bin/rst2html5.py +33 -0
- entorno/bin/rst2latex.py +26 -0
- entorno/bin/rst2man.py +27 -0
- entorno/bin/rst2odt.py +28 -0
- entorno/bin/rst2odt_prepstyles.py +20 -0
- entorno/bin/rst2pseudoxml.py +23 -0
- entorno/bin/rst2s5.py +24 -0
- entorno/bin/rst2xetex.py +27 -0
- entorno/bin/rst2xml.py +23 -0
- entorno/bin/rstpep2html.py +25 -0
- tests/__init__.py +0 -0
- tests/test_binary.py +759 -0
- tests/test_non_binary.py +371 -0
- tsadmetrics/_tsadeval/affiliation/__init__.py +0 -0
- tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +86 -0
- tsadmetrics/_tsadeval/affiliation/_integral_interval.py +464 -0
- tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +68 -0
- tsadmetrics/_tsadeval/affiliation/generics.py +135 -0
- tsadmetrics/_tsadeval/affiliation/metrics.py +114 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +175 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +50 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +184 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/__init__.py +0 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +386 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +362 -0
- tsadmetrics/_tsadeval/prts/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +165 -0
- tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +121 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +61 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +86 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +21 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +85 -0
- tsadmetrics/utils.py +10 -4
- {tsadmetrics-0.1.4.dist-info → tsadmetrics-0.1.6.dist-info}/METADATA +1 -1
- tsadmetrics-0.1.6.dist-info/RECORD +58 -0
- tsadmetrics-0.1.6.dist-info/top_level.txt +3 -0
- tsadmetrics-0.1.4.dist-info/RECORD +0 -20
- tsadmetrics-0.1.4.dist-info/top_level.txt +0 -1
- {tsadmetrics-0.1.4.dist-info → tsadmetrics-0.1.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,362 @@
|
|
1
|
+
import argparse
|
2
|
+
from typing import Callable
|
3
|
+
import math
|
4
|
+
import copy
|
5
|
+
from .DataManage import File_IO, Range as rng#, Time_Plot
|
6
|
+
|
7
|
+
|
8
|
+
class TaPR:
|
9
|
+
def __init__(self, theta: float, delta: int, past_range: bool = False):
|
10
|
+
self._past_range = past_range
|
11
|
+
self._predictions = [] # list of Ranges
|
12
|
+
self._anomalies = [] # list of Ranges
|
13
|
+
self._ambiguous_inst = [] # list of Ranges
|
14
|
+
|
15
|
+
self._set_predictions = False
|
16
|
+
self._set_anomalies = False
|
17
|
+
|
18
|
+
#self._rho = theta
|
19
|
+
#self._pi = theta
|
20
|
+
self._theta = theta
|
21
|
+
self._delta = delta
|
22
|
+
|
23
|
+
pass
|
24
|
+
|
25
|
+
def set_anomalies(self, anomaly_list: list) -> None:
|
26
|
+
self._anomalies = copy.deepcopy(anomaly_list)
|
27
|
+
self._gen_ambiguous()
|
28
|
+
self._set_anomalies = True
|
29
|
+
|
30
|
+
def set_predictions(self, prediction_list: list) -> None:
|
31
|
+
self._predictions = copy.deepcopy(prediction_list)
|
32
|
+
self._set_predictions = True
|
33
|
+
|
34
|
+
def _gen_ambiguous(self):
|
35
|
+
for i in range(len(self._anomalies)):
|
36
|
+
start_id = self._anomalies[i].get_time()[1] + 1
|
37
|
+
end_id = end_id = start_id + self._delta
|
38
|
+
|
39
|
+
if self._past_range:
|
40
|
+
end_id = start_id + int(self._delta * (self._anomalies[i].get_time()[1] - self._anomalies[i].get_time()[0]))
|
41
|
+
|
42
|
+
#if the next anomaly occurs during the theta, update the end_id
|
43
|
+
if i+1 < len(self._anomalies) and end_id > self._anomalies[i+1].get_time()[0]:
|
44
|
+
end_id = self._anomalies[i+1].get_time()[0] - 1
|
45
|
+
|
46
|
+
if start_id > end_id:
|
47
|
+
start_id = -2
|
48
|
+
end_id = -1
|
49
|
+
|
50
|
+
self._ambiguous_inst.append(rng.Range(start_id, end_id, str(i)))
|
51
|
+
|
52
|
+
def get_n_predictions(self):
|
53
|
+
return len(self._predictions)
|
54
|
+
|
55
|
+
def get_n_anomalies(self):
|
56
|
+
return len(self._anomalies)
|
57
|
+
|
58
|
+
def _ids_2_objects(self, id_list, range_list):
|
59
|
+
result = []
|
60
|
+
for id in id_list:
|
61
|
+
result.append(range_list[id])
|
62
|
+
return result
|
63
|
+
|
64
|
+
def TaR_d(self) -> float and list:
|
65
|
+
score, detected_id_list = self._TaR_d(self._anomalies, self._ambiguous_inst, self._predictions, self._theta)
|
66
|
+
return score, self._ids_2_objects(detected_id_list, self._anomalies)
|
67
|
+
|
68
|
+
def _TaR_d(self, anomalies: list, ambiguous_inst: list, predictions: list, threshold: float) -> float and list:
|
69
|
+
total_score = 0.0
|
70
|
+
detected_anomalies = []
|
71
|
+
total_score_p = 0.0
|
72
|
+
for anomaly_id in range(len(anomalies)):
|
73
|
+
anomaly = anomalies[anomaly_id]
|
74
|
+
ambiguous = ambiguous_inst[anomaly_id]
|
75
|
+
|
76
|
+
max_score = self._sum_of_func(anomaly.get_time()[0], anomaly.get_time()[1],
|
77
|
+
anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
|
78
|
+
|
79
|
+
score = 0.0
|
80
|
+
for prediction in predictions:
|
81
|
+
score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
|
82
|
+
|
83
|
+
total_score_p += min(1.0, score/max_score)
|
84
|
+
if min(1.0, score / max_score) >= threshold:
|
85
|
+
total_score += 1.0
|
86
|
+
detected_anomalies.append(anomaly_id)
|
87
|
+
|
88
|
+
if len(anomalies) == 0:
|
89
|
+
self.TaR_p_value = 0
|
90
|
+
return 0.0, []
|
91
|
+
else:
|
92
|
+
self.TaR_p_value = total_score_p / len(anomalies)
|
93
|
+
return total_score / len(anomalies), detected_anomalies
|
94
|
+
|
95
|
+
def TaR_p(self) -> float:
|
96
|
+
total_score = 0.0
|
97
|
+
for anomaly_id in range(len(self._anomalies)):
|
98
|
+
anomaly = self._anomalies[anomaly_id]
|
99
|
+
ambiguous = self._ambiguous_inst[anomaly_id]
|
100
|
+
|
101
|
+
max_score = self._sum_of_func(anomaly.get_time()[0], anomaly.get_time()[1],
|
102
|
+
anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
|
103
|
+
|
104
|
+
score = 0.0
|
105
|
+
for prediction in self._predictions:
|
106
|
+
score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
|
107
|
+
|
108
|
+
total_score += min(1.0, score/max_score)
|
109
|
+
|
110
|
+
if len(self._anomalies) == 0:
|
111
|
+
return 0.0
|
112
|
+
else:
|
113
|
+
return total_score / len(self._anomalies)
|
114
|
+
|
115
|
+
|
116
|
+
def TaP_d(self) -> float and list:
|
117
|
+
score, correct_id_list = self._TaP_d(self._anomalies, self._ambiguous_inst, self._predictions, self._theta)
|
118
|
+
return score, self._ids_2_objects(correct_id_list, self._predictions)
|
119
|
+
|
120
|
+
def _TaP_d(self, anomalies, ambiguous_inst, predictions, threshold):
|
121
|
+
#Compute TaP_d and TaP_p in one function to optimize the performance
|
122
|
+
correct_predictions = []
|
123
|
+
total_score = 0.0
|
124
|
+
total_score_p = 0.0
|
125
|
+
for prediction_id in range(len(predictions)):
|
126
|
+
max_score = predictions[prediction_id].get_time()[1] - predictions[prediction_id].get_time()[0] + 1
|
127
|
+
|
128
|
+
score = 0.0
|
129
|
+
for anomaly_id in range(len(anomalies)):
|
130
|
+
anomaly = anomalies[anomaly_id]
|
131
|
+
ambiguous = ambiguous_inst[anomaly_id]
|
132
|
+
|
133
|
+
score += self._overlap_and_subsequent_score(anomaly, ambiguous, predictions[prediction_id])
|
134
|
+
total_score_p += score / max_score
|
135
|
+
if (score/max_score) >= threshold:
|
136
|
+
total_score += 1.0
|
137
|
+
correct_predictions.append(prediction_id)
|
138
|
+
|
139
|
+
if len(predictions) == 0:
|
140
|
+
self.TaP_p_value = 0
|
141
|
+
return 0.0, []
|
142
|
+
|
143
|
+
else:
|
144
|
+
self.TaP_p_value = total_score_p / len(predictions)
|
145
|
+
return total_score / len(predictions), correct_predictions
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
def _detect(self, src_range: rng.Range, ranges: list, theta: int) -> bool:
|
150
|
+
rest_len = src_range.get_time()[1] - src_range.get_time()[0] + 1
|
151
|
+
for dst_range in ranges:
|
152
|
+
len = self._overlapped_len(src_range, dst_range)
|
153
|
+
if len != -1:
|
154
|
+
rest_len -= len
|
155
|
+
return (float)(rest_len) / (src_range.get_time()[1] - src_range.get_time()[0] + 1) <= (1.0 - theta)
|
156
|
+
|
157
|
+
def _overlapped_len(self, range1: rng.Range, range2: rng.Range) -> int:
|
158
|
+
detected_start = max(range1.get_time()[0], range2.get_time()[0])
|
159
|
+
detected_end = min(range1.get_time()[1], range2.get_time()[1])
|
160
|
+
|
161
|
+
if detected_end < detected_start:
|
162
|
+
return 0
|
163
|
+
else:
|
164
|
+
return detected_end - detected_start + 1
|
165
|
+
|
166
|
+
def _min_max_norm(self, value: int, org_min: int, org_max: int, new_min: int, new_max: int) -> float:
|
167
|
+
if org_min == org_max:
|
168
|
+
return new_min
|
169
|
+
else:
|
170
|
+
return (float)(new_min) + (float)(value - org_min) * (new_max - new_min) / (org_max - org_min)
|
171
|
+
|
172
|
+
def _decaying_func(self, val: float) -> float:
|
173
|
+
assert (-6 <= val <= 6)
|
174
|
+
return 1 / (1 + math.exp(val))
|
175
|
+
|
176
|
+
def _ascending_func(self, val: float) -> float:
|
177
|
+
assert (-6 <= val <= 6)
|
178
|
+
return 1 / (1 + math.exp(val * -1))
|
179
|
+
|
180
|
+
def _uniform_func(self, val: float) -> float:
|
181
|
+
return 1.0
|
182
|
+
|
183
|
+
def _sum_of_func(self, start_time: int, end_time: int, org_start: int, org_end: int,
|
184
|
+
func: Callable[[float], float]) -> float:
|
185
|
+
val = 0.0
|
186
|
+
for timestamp in range(start_time, end_time + 1):
|
187
|
+
val += func(self._min_max_norm(timestamp, org_start, org_end, -6, 6))
|
188
|
+
return val
|
189
|
+
|
190
|
+
def _overlap_and_subsequent_score(self, anomaly: rng.Range, ambiguous: rng.Range, prediction: rng.Range) -> float:
|
191
|
+
score = 0.0
|
192
|
+
|
193
|
+
detected_start = max(anomaly.get_time()[0], prediction.get_time()[0])
|
194
|
+
detected_end = min(anomaly.get_time()[1], prediction.get_time()[1])
|
195
|
+
|
196
|
+
score += self._sum_of_func(detected_start, detected_end,
|
197
|
+
anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
|
198
|
+
|
199
|
+
if ambiguous.get_time()[0] < ambiguous.get_time()[1]:
|
200
|
+
detected_start = max(ambiguous.get_time()[0], prediction.get_time()[0])
|
201
|
+
detected_end = min(ambiguous.get_time()[1], prediction.get_time()[1])
|
202
|
+
|
203
|
+
score += self._sum_of_func(detected_start, detected_end,
|
204
|
+
ambiguous.get_time()[0], ambiguous.get_time()[1], self._decaying_func)
|
205
|
+
|
206
|
+
return score
|
207
|
+
|
208
|
+
def TaR_p(self) -> float:
|
209
|
+
total_score = 0.0
|
210
|
+
for anomaly_id in range(len(self._anomalies)):
|
211
|
+
anomaly = self._anomalies[anomaly_id]
|
212
|
+
ambiguous = self._ambiguous_inst[anomaly_id]
|
213
|
+
|
214
|
+
max_score = self._sum_of_func(anomaly.get_time()[0], anomaly.get_time()[1],
|
215
|
+
anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
|
216
|
+
|
217
|
+
score = 0.0
|
218
|
+
for prediction in self._predictions:
|
219
|
+
score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
|
220
|
+
|
221
|
+
total_score += min(1.0, score/max_score)
|
222
|
+
|
223
|
+
if len(self._anomalies) == 0:
|
224
|
+
return 0.0
|
225
|
+
else:
|
226
|
+
return total_score / len(self._anomalies)
|
227
|
+
|
228
|
+
def TaP_p(self) -> float:
|
229
|
+
total_score = 0.0
|
230
|
+
for prediction in self._predictions:
|
231
|
+
max_score = prediction.get_time()[1] - prediction.get_time()[0] + 1
|
232
|
+
|
233
|
+
score = 0.0
|
234
|
+
for anomaly_id in range(len(self._anomalies)):
|
235
|
+
anomaly = self._anomalies[anomaly_id]
|
236
|
+
ambiguous = self._ambiguous_inst[anomaly_id]
|
237
|
+
|
238
|
+
score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
|
239
|
+
|
240
|
+
total_score += score/max_score
|
241
|
+
|
242
|
+
if len(self._predictions) == 0:
|
243
|
+
return 0.0
|
244
|
+
else:
|
245
|
+
return total_score / len(self._predictions)
|
246
|
+
|
247
|
+
|
248
|
+
def compute(anomalies: list, predictions: list, alpha: float, theta: float, delta: int) -> dict:
|
249
|
+
ev = TaPR(theta, delta)
|
250
|
+
|
251
|
+
ev.set_anomalies(anomalies)
|
252
|
+
ev.set_predictions(predictions)
|
253
|
+
|
254
|
+
tard_value, detected_list = ev.TaR_d()
|
255
|
+
tarp_value = ev.TaR_p()
|
256
|
+
|
257
|
+
tapd_value, correct_list = ev.TaP_d()
|
258
|
+
tapp_value = ev.TaP_p()
|
259
|
+
|
260
|
+
result = {}
|
261
|
+
tar_value = alpha * tard_value + (1 - alpha) * tarp_value
|
262
|
+
result['TaR'] = tar_value
|
263
|
+
result['TaRd'] = tard_value
|
264
|
+
result['TaRp'] = tarp_value
|
265
|
+
|
266
|
+
tap_value = alpha * tapd_value + (1 - alpha) * tapp_value
|
267
|
+
result['TaP'] = tap_value
|
268
|
+
result['TaPd'] = tapd_value
|
269
|
+
result['TaPp'] = tapp_value
|
270
|
+
|
271
|
+
detected_anomalies = []
|
272
|
+
for value in detected_list:
|
273
|
+
detected_anomalies.append(value.get_name())
|
274
|
+
|
275
|
+
result['Detected_Anomalies'] = detected_anomalies
|
276
|
+
result['Detected_Anomalies_Ranges'] = detected_list
|
277
|
+
result['Correct_Predictions_Ranges'] = correct_list
|
278
|
+
|
279
|
+
if tar_value + tap_value == 0:
|
280
|
+
result['f1'] = 0.0
|
281
|
+
else:
|
282
|
+
result['f1'] = (2 * tar_value * tap_value) / (tar_value + tap_value)
|
283
|
+
|
284
|
+
return result
|
285
|
+
|
286
|
+
|
287
|
+
def compute_with_load(anomaly_file: str, prediction_file: str, file_type: str, alpha: float, theta: float, delta: int) -> dict:
|
288
|
+
anomalies = File_IO.load_file(anomaly_file, file_type)
|
289
|
+
predictions = File_IO.load_file(prediction_file, file_type)
|
290
|
+
return compute(anomalies, predictions, alpha, theta, delta)
|
291
|
+
|
292
|
+
|
293
|
+
def print_result(anomalies: list, predictions: list, alpha: float, theta: float, delta: int, verbose: bool, graph: str) -> None:
|
294
|
+
org_predictions = copy.deepcopy(predictions)
|
295
|
+
result = compute(anomalies, predictions, alpha, theta, delta)
|
296
|
+
|
297
|
+
print("The parameters (alpha, theta, delta) are set as %g, %g, and %d." % (alpha, theta, delta))
|
298
|
+
|
299
|
+
print('\n[TaR]:', "%0.5f" % result['TaR'])
|
300
|
+
print("\t* Detection score:", "%0.5f" % result['TaRd'])
|
301
|
+
print("\t* Portion score:", "%0.5f" % result['TaRp'])
|
302
|
+
if verbose:
|
303
|
+
buf = '\t\tdetected anomalies: '
|
304
|
+
if len(result['Detected_Anomalies_Ranges']) == 0:
|
305
|
+
buf += "None "
|
306
|
+
else:
|
307
|
+
for value in result['Detected_Anomalies_Ranges']:
|
308
|
+
buf += value.get_name() + '(' + str(value.get_time()[0]) + ':' + str(value.get_time()[1]) + '), '
|
309
|
+
print(buf[:-2])
|
310
|
+
|
311
|
+
|
312
|
+
print('\n[TaP]:', "%0.5f" % result['TaP'])
|
313
|
+
print("\t* Detection score:", "%0.5f" % result['TaPd'])
|
314
|
+
print("\t* Portion score:", "%0.5f" % result['TaPp'])
|
315
|
+
if verbose:
|
316
|
+
buf = '\t\tcorrect predictions: '
|
317
|
+
if len(result['Correct_Predictions_Ranges']) == 0:
|
318
|
+
buf += "None "
|
319
|
+
else:
|
320
|
+
for value in result['Correct_Predictions_Ranges']:
|
321
|
+
buf += value.get_name() + '(' + str(value.get_time()[0]) + ':' + str(value.get_time()[1]) + '), '
|
322
|
+
print(buf[:-2])
|
323
|
+
|
324
|
+
|
325
|
+
assert(graph == 'screen' or graph == 'file' or graph == 'none' or graph == 'all')
|
326
|
+
if graph == 'screen' or graph == 'file' or graph == 'all':
|
327
|
+
Time_Plot.draw_graphs(anomalies, org_predictions, graph)
|
328
|
+
|
329
|
+
|
330
|
+
if __name__ == '__main__':
|
331
|
+
argument_parser = argparse.ArgumentParser()
|
332
|
+
argument_parser.add_argument("--anomalies", help="anomaly file name (ground truth)", required=True)
|
333
|
+
argument_parser.add_argument("--predictions", help="prediction file name", required=True)
|
334
|
+
argument_parser.add_argument("--filetype", help="choose the file type between range and stream", required=True)
|
335
|
+
argument_parser.add_argument("--graph", help="show graph of results")
|
336
|
+
|
337
|
+
argument_parser.add_argument("--verbose", help="show detail results", action='store_true')
|
338
|
+
argument_parser.add_argument("--theta", help="set parameter theta")
|
339
|
+
argument_parser.add_argument("--alpha", help="set parameter alpha")
|
340
|
+
argument_parser.add_argument("--delta", help="set parameter delta")
|
341
|
+
arguments = argument_parser.parse_args()
|
342
|
+
|
343
|
+
arguments = argument_parser.parse_args()
|
344
|
+
theta, alpha, delta, graph = 0.5, 0.8, 600, 'none' #default values
|
345
|
+
if arguments.theta is not None:
|
346
|
+
theta = float(arguments.theta)
|
347
|
+
if arguments.alpha is not None:
|
348
|
+
alpha = float(arguments.alpha)
|
349
|
+
if arguments.delta is not None:
|
350
|
+
delta = int(arguments.delta)
|
351
|
+
if arguments.graph is not None:
|
352
|
+
graph = arguments.graph
|
353
|
+
|
354
|
+
assert(0.0 <= theta <= 1.0)
|
355
|
+
assert(0.0 <= alpha <= 1.0)
|
356
|
+
assert(isinstance(delta, int))
|
357
|
+
assert(graph == 'screen' or graph == 'file' or graph == 'none' or graph == 'all')
|
358
|
+
|
359
|
+
anomalies = File_IO.load_file(arguments.anomalies, arguments.filetype)
|
360
|
+
predictions = File_IO.load_file(arguments.predictions, arguments.filetype)
|
361
|
+
|
362
|
+
print_result(anomalies, predictions, alpha, theta, delta, arguments.verbose, graph)
|
File without changes
|
File without changes
|
@@ -0,0 +1,165 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
|
5
|
+
|
6
|
+
class BaseTimeSeriesMetrics:
|
7
|
+
"""Base class for time series metrics """
|
8
|
+
|
9
|
+
def score(self, real: np.ndarray, pred: np.ndarray) -> Any:
|
10
|
+
"""
|
11
|
+
|
12
|
+
Args:
|
13
|
+
real:
|
14
|
+
pred:
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
|
18
|
+
"""
|
19
|
+
...
|
20
|
+
|
21
|
+
def _udf_gamma(self):
|
22
|
+
"""The function of the user-defined gamma.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
float: the value of the user-defined gamma
|
26
|
+
"""
|
27
|
+
|
28
|
+
return 1.0
|
29
|
+
|
30
|
+
def _gamma_select(self, gamma: str, overlap: int) -> float:
|
31
|
+
"""The function of selecting the gamma value according to the parameters.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
gamma: str
|
35
|
+
- 'one': the value 1
|
36
|
+
- 'reciprocal';: a reciprocal of the overlap
|
37
|
+
- 'udf_gamma': user defined gamma
|
38
|
+
overlap: int
|
39
|
+
overlap between real and pred
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
float: the selected gamma value
|
43
|
+
"""
|
44
|
+
assert type(overlap) == int, TypeError("")
|
45
|
+
|
46
|
+
if gamma == "one":
|
47
|
+
return 1.0
|
48
|
+
elif gamma == "reciprocal":
|
49
|
+
if overlap > 1:
|
50
|
+
return 1.0 / overlap
|
51
|
+
else:
|
52
|
+
return 1.0
|
53
|
+
elif gamma == "udf_gamma":
|
54
|
+
if overlap > 1:
|
55
|
+
return 1.0 / self._udf_gamma()
|
56
|
+
else:
|
57
|
+
return 1.0
|
58
|
+
else:
|
59
|
+
raise ValueError(f"Expected one of one, reciprocal, udf_gamma. gamma type string: {gamma}")
|
60
|
+
|
61
|
+
def _gamma_function(self, overlap_count):
|
62
|
+
overlap = overlap_count[0]
|
63
|
+
return self._gamma_select(self.cardinality, overlap)
|
64
|
+
|
65
|
+
def _compute_omega_reward(self, r1, r2, overlap_count):
|
66
|
+
if r1[1] < r2[0] or r1[0] > r2[1]:
|
67
|
+
return 0
|
68
|
+
else:
|
69
|
+
overlap_count[0] += 1
|
70
|
+
overlap = np.zeros(r1.shape)
|
71
|
+
overlap[0] = max(r1[0], r2[0])
|
72
|
+
overlap[1] = min(r1[1], r2[1])
|
73
|
+
return self._omega_function(r1, overlap)
|
74
|
+
|
75
|
+
def _omega_function(self, rrange, overlap):
|
76
|
+
anomaly_length = rrange[1] - rrange[0] + 1
|
77
|
+
my_positional_bias = 0
|
78
|
+
max_positional_bias = 0
|
79
|
+
temp_bias = 0
|
80
|
+
for i in range(1, anomaly_length + 1):
|
81
|
+
temp_bias = self._delta_function(i, anomaly_length)
|
82
|
+
max_positional_bias += temp_bias
|
83
|
+
j = rrange[0] + i - 1
|
84
|
+
if j >= overlap[0] and j <= overlap[1]:
|
85
|
+
my_positional_bias += temp_bias
|
86
|
+
if max_positional_bias > 0:
|
87
|
+
res = my_positional_bias / max_positional_bias
|
88
|
+
return res
|
89
|
+
else:
|
90
|
+
return 0
|
91
|
+
|
92
|
+
def _delta_function(self, t, anomaly_length):
|
93
|
+
return self._delta_select(self.bias, t, anomaly_length)
|
94
|
+
|
95
|
+
def _delta_select(self, delta, t, anomaly_length):
|
96
|
+
if delta == "flat":
|
97
|
+
return 1.0
|
98
|
+
elif delta == "front":
|
99
|
+
return float(anomaly_length - t + 1.0)
|
100
|
+
elif delta == "middle":
|
101
|
+
if t <= anomaly_length / 2.0:
|
102
|
+
return float(t)
|
103
|
+
else:
|
104
|
+
return float(anomaly_length - t + 1.0)
|
105
|
+
elif delta == "back":
|
106
|
+
return float(t)
|
107
|
+
elif delta == "udf_delta":
|
108
|
+
return self._udf_delta(t, anomaly_length)
|
109
|
+
else:
|
110
|
+
raise Exception("Invalid positional bias value")
|
111
|
+
|
112
|
+
def _udf_delta(self):
|
113
|
+
"""
|
114
|
+
user defined delta function
|
115
|
+
"""
|
116
|
+
|
117
|
+
return 1.0
|
118
|
+
|
119
|
+
def _shift(self, arr, num, fill_value=np.nan):
|
120
|
+
arr = np.roll(arr, num)
|
121
|
+
if num < 0:
|
122
|
+
arr[num:] = fill_value
|
123
|
+
elif num > 0:
|
124
|
+
arr[:num] = fill_value
|
125
|
+
return arr
|
126
|
+
|
127
|
+
def _prepare_data(self, values_real, values_pred):
|
128
|
+
|
129
|
+
assert len(values_real) == len(values_pred)
|
130
|
+
assert np.allclose(np.unique(values_real), np.array([0, 1])) or np.allclose(
|
131
|
+
np.unique(values_real), np.array([1])
|
132
|
+
)
|
133
|
+
assert np.allclose(np.unique(values_pred), np.array([0, 1])) or np.allclose(
|
134
|
+
np.unique(values_pred), np.array([1])
|
135
|
+
)
|
136
|
+
|
137
|
+
predicted_anomalies_ = np.argwhere(values_pred == 1).ravel()
|
138
|
+
predicted_anomalies_shift_forward = self._shift(predicted_anomalies_, 1, fill_value=predicted_anomalies_[0])
|
139
|
+
predicted_anomalies_shift_backward = self._shift(predicted_anomalies_, -1, fill_value=predicted_anomalies_[-1])
|
140
|
+
predicted_anomalies_start = np.argwhere(
|
141
|
+
(predicted_anomalies_shift_forward - predicted_anomalies_) != -1
|
142
|
+
).ravel()
|
143
|
+
predicted_anomalies_finish = np.argwhere(
|
144
|
+
(predicted_anomalies_ - predicted_anomalies_shift_backward) != -1
|
145
|
+
).ravel()
|
146
|
+
predicted_anomalies = np.hstack(
|
147
|
+
[
|
148
|
+
predicted_anomalies_[predicted_anomalies_start].reshape(-1, 1),
|
149
|
+
predicted_anomalies_[predicted_anomalies_finish].reshape(-1, 1),
|
150
|
+
]
|
151
|
+
)
|
152
|
+
|
153
|
+
real_anomalies_ = np.argwhere(values_real == 1).ravel()
|
154
|
+
real_anomalies_shift_forward = self._shift(real_anomalies_, 1, fill_value=real_anomalies_[0])
|
155
|
+
real_anomalies_shift_backward = self._shift(real_anomalies_, -1, fill_value=real_anomalies_[-1])
|
156
|
+
real_anomalies_start = np.argwhere((real_anomalies_shift_forward - real_anomalies_) != -1).ravel()
|
157
|
+
real_anomalies_finish = np.argwhere((real_anomalies_ - real_anomalies_shift_backward) != -1).ravel()
|
158
|
+
real_anomalies = np.hstack(
|
159
|
+
[
|
160
|
+
real_anomalies_[real_anomalies_start].reshape(-1, 1),
|
161
|
+
real_anomalies_[real_anomalies_finish].reshape(-1, 1),
|
162
|
+
]
|
163
|
+
)
|
164
|
+
|
165
|
+
return real_anomalies, predicted_anomalies
|
@@ -0,0 +1,121 @@
|
|
1
|
+
from .time_series_metrics.fscore import TimeSeriesFScore
|
2
|
+
from .time_series_metrics.precision import TimeSeriesPrecision
|
3
|
+
from .time_series_metrics.recall import TimeSeriesRecall
|
4
|
+
|
5
|
+
|
6
|
+
def ts_precision(real, pred, alpha=0.0, cardinality="one", bias="flat"):
|
7
|
+
"""Compute the range based precision.
|
8
|
+
|
9
|
+
The range based precision is the average of "Precision_Ti", where "Precision_Ti" is
|
10
|
+
the precision score of each predicted anomaly range.
|
11
|
+
"Precision_Ti" for a single predicted anomaly range is calculated by the following formula.
|
12
|
+
Precision_Ti = α x ExistenceReward + (1 - α) x OverlapReward , where 0 ≤ α ≤ 1
|
13
|
+
α represents the relative importance of rewarding existence, whereas
|
14
|
+
(1 − α) represents the relative importance of rewarding size, position, and cardinality.
|
15
|
+
|
16
|
+
"ExistenceReward" is 1 if a real anomaly range has overlap with even a single point of
|
17
|
+
the predicted anomaly range, 0 otherwise.
|
18
|
+
Note: For prediction, there is no need for an existence reward, since precision by definition
|
19
|
+
emphasizes prediction quality, and existence by itself is too low a bar for judging
|
20
|
+
the quality of a prediction (i.e., α = 0).
|
21
|
+
|
22
|
+
"OverlapReward" is calculated by the following formula.
|
23
|
+
OverlapReward = CardinalityFactor x Sum of ω
|
24
|
+
"CardinalityFactor" is 1 if the predicted anomaly range overlaps with only one real anomaly range.
|
25
|
+
Otherwise it receives 0 ≤ γ() ≤ 1 defined by the application.
|
26
|
+
"CardinalityFactor" serves as a scaling factor for the rewards "ω"s, which is earned from overlap
|
27
|
+
size and position.
|
28
|
+
In determing "ω", we consider the size of the correctly predicted portion of an predicted anomaly
|
29
|
+
range and the relative position of the correctly predicted portion of an predicted anomaly range.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
real: np.ndarray
|
33
|
+
One-dimensional array of correct answers with values of 1 or 0.
|
34
|
+
pred: np.ndarray
|
35
|
+
One-dimensional array of predicted answers with values of 1 or 0.
|
36
|
+
alpha: float, default=0.0
|
37
|
+
Relative importance of existence reward. 0 ≤ alpha ≤ 1.
|
38
|
+
cardinality: string, default="one"
|
39
|
+
Cardinality type. This should be "one", "reciprocal" or "udf_gamma".
|
40
|
+
bias: string, default="flat"
|
41
|
+
Positional bias. This should be "flat", "front", "middle", or "back"
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
float: precision.score
|
45
|
+
"""
|
46
|
+
precision = TimeSeriesPrecision(alpha, cardinality, bias)
|
47
|
+
return precision.score(real, pred)
|
48
|
+
|
49
|
+
|
50
|
+
def ts_recall(real, pred, alpha=0.0, cardinality="one", bias="flat"):
|
51
|
+
"""Compute the range based recall.
|
52
|
+
|
53
|
+
The range based recall is the average of "Recall_Ti", where "Recall_Ti" is
|
54
|
+
the recall score of each real anomaly range.
|
55
|
+
"Recall_Ti" for a single real anomaly range is calculated by the following formula.
|
56
|
+
Recall_Ti = α x ExistenceReward + (1 - α) x OverlapReward , where 0 ≤ α ≤ 1
|
57
|
+
α represents the relative importance of rewarding existence, whereas
|
58
|
+
(1 − α) represents the relative importance of rewarding size, position, and cardinality.
|
59
|
+
|
60
|
+
"ExistenceReward" is 1 if a prediction captures even a single point of the real anomaly range, 0 otherwise.
|
61
|
+
|
62
|
+
"OverlapReward" is calculated by the following formula.
|
63
|
+
OverlapReward = CardinalityFactor x Sum of ω
|
64
|
+
"CardinalityFactor" is 1 if the real anomaly range overlaps with only one predicted anomaly range.
|
65
|
+
Otherwise it receives 0 ≤ γ() ≤ 1 defined by the application.
|
66
|
+
"CardinalityFactor" serves as a scaling factor for the rewards "ω"s, which is earned from overlap
|
67
|
+
size and position.
|
68
|
+
In determing "ω", we consider the size of the correctly predicted portion of the real anomaly range
|
69
|
+
and the relative
|
70
|
+
position of the correctly predicted portion of the real anomaly range.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
real: np.ndarray
|
74
|
+
One-dimensional array of correct answers with values of 1 or 0.
|
75
|
+
pred: np.ndarray
|
76
|
+
One-dimensional array of predicted answers with values of 1 or 0.
|
77
|
+
alpha: float, default=0.0
|
78
|
+
Relative importance of existence reward. 0 ≤ alpha ≤ 1.
|
79
|
+
cardinality: string, default="one"
|
80
|
+
Cardinality type. This should be "one", "reciprocal" or "udf_gamma".
|
81
|
+
bias: string, default="flat"
|
82
|
+
Positional bias. This should be "flat", "front", "middle", or "back"
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
float: recall.score
|
86
|
+
"""
|
87
|
+
recall = TimeSeriesRecall(alpha, cardinality, bias)
|
88
|
+
return recall.score(real, pred)
|
89
|
+
|
90
|
+
|
91
|
+
def ts_fscore(real, pred, beta=1.0, p_alpha=0.0, r_alpha=0.0, cardinality="one", p_bias="flat", r_bias="flat"):
|
92
|
+
"""Compute the range based f-score
|
93
|
+
|
94
|
+
The F-beta score is the weighted harmonic mean of precision and recall,
|
95
|
+
reaching its optimal value at 1 and its worst value at 0.
|
96
|
+
The beta parameter determines the weight of recall in the combined score.
|
97
|
+
beta < 1 lends more weight to precision, while beta > 1 favors recall
|
98
|
+
(beta -> 0 considers only precision, beta -> +inf only recall).
|
99
|
+
|
100
|
+
Args:
|
101
|
+
real: np.ndarray
|
102
|
+
One-dimensional array of correct answers with values of 1 or 0.
|
103
|
+
pred: np.ndarray
|
104
|
+
One-dimensional array of predicted answers with values of 1 or 0.
|
105
|
+
p_alpha: float, default=0.0
|
106
|
+
Relative importance of existence reward for precision. 0 ≤ alpha ≤ 1.
|
107
|
+
r_alpha: float, default=0.0
|
108
|
+
Relative importance of existence reward for recall. 0 ≤ alpha ≤ 1.
|
109
|
+
cardinality: string, default="one"
|
110
|
+
Cardinality type. This should be "one", "reciprocal" or "udf_gamma".
|
111
|
+
p_bias: string, default="flat"
|
112
|
+
Positional bias for precision. This should be "flat", "front", "middle", or "back"
|
113
|
+
r_bias: string, default="flat"
|
114
|
+
Positional bias for recall. This should be "flat", "front", "middle", or "back"
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
float: f.score
|
118
|
+
"""
|
119
|
+
|
120
|
+
fscore = TimeSeriesFScore(beta, p_alpha, r_alpha, cardinality, p_bias, r_bias)
|
121
|
+
return fscore.score(real, pred)
|
File without changes
|