tsadmetrics 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. entorno/bin/activate_this.py +32 -0
  2. entorno/bin/rst2html.py +23 -0
  3. entorno/bin/rst2html4.py +26 -0
  4. entorno/bin/rst2html5.py +33 -0
  5. entorno/bin/rst2latex.py +26 -0
  6. entorno/bin/rst2man.py +27 -0
  7. entorno/bin/rst2odt.py +28 -0
  8. entorno/bin/rst2odt_prepstyles.py +20 -0
  9. entorno/bin/rst2pseudoxml.py +23 -0
  10. entorno/bin/rst2s5.py +24 -0
  11. entorno/bin/rst2xetex.py +27 -0
  12. entorno/bin/rst2xml.py +23 -0
  13. entorno/bin/rstpep2html.py +25 -0
  14. experiments/scripts/compute_metrics.py +187 -0
  15. experiments/scripts/metrics_complexity_analysis.py +109 -0
  16. experiments/scripts/metro_experiment.py +133 -0
  17. experiments/scripts/opt_metro_experiment.py +343 -0
  18. tests/__init__.py +0 -0
  19. tests/test_binary.py +759 -0
  20. tests/test_non_binary.py +371 -0
  21. tsadmetrics/_tsadeval/affiliation/__init__.py +0 -0
  22. tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +86 -0
  23. tsadmetrics/_tsadeval/affiliation/_integral_interval.py +464 -0
  24. tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +68 -0
  25. tsadmetrics/_tsadeval/affiliation/generics.py +135 -0
  26. tsadmetrics/_tsadeval/affiliation/metrics.py +114 -0
  27. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +175 -0
  28. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +50 -0
  29. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +184 -0
  30. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/__init__.py +0 -0
  31. tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
  32. tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +386 -0
  33. tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +362 -0
  34. tsadmetrics/_tsadeval/prts/__init__.py +0 -0
  35. tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
  36. tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +165 -0
  37. tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +121 -0
  38. tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
  39. tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +61 -0
  40. tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +86 -0
  41. tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +21 -0
  42. tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +85 -0
  43. {tsadmetrics-0.1.4.dist-info → tsadmetrics-0.1.5.dist-info}/METADATA +1 -1
  44. tsadmetrics-0.1.5.dist-info/RECORD +62 -0
  45. tsadmetrics-0.1.5.dist-info/top_level.txt +4 -0
  46. tsadmetrics-0.1.4.dist-info/RECORD +0 -20
  47. tsadmetrics-0.1.4.dist-info/top_level.txt +0 -1
  48. {tsadmetrics-0.1.4.dist-info → tsadmetrics-0.1.5.dist-info}/WHEEL +0 -0
@@ -0,0 +1,362 @@
1
+ import argparse
2
+ from typing import Callable
3
+ import math
4
+ import copy
5
+ from .DataManage import File_IO, Range as rng#, Time_Plot
6
+
7
+
8
+ class TaPR:
9
+ def __init__(self, theta: float, delta: int, past_range: bool = False):
10
+ self._past_range = past_range
11
+ self._predictions = [] # list of Ranges
12
+ self._anomalies = [] # list of Ranges
13
+ self._ambiguous_inst = [] # list of Ranges
14
+
15
+ self._set_predictions = False
16
+ self._set_anomalies = False
17
+
18
+ #self._rho = theta
19
+ #self._pi = theta
20
+ self._theta = theta
21
+ self._delta = delta
22
+
23
+ pass
24
+
25
+ def set_anomalies(self, anomaly_list: list) -> None:
26
+ self._anomalies = copy.deepcopy(anomaly_list)
27
+ self._gen_ambiguous()
28
+ self._set_anomalies = True
29
+
30
+ def set_predictions(self, prediction_list: list) -> None:
31
+ self._predictions = copy.deepcopy(prediction_list)
32
+ self._set_predictions = True
33
+
34
+ def _gen_ambiguous(self):
35
+ for i in range(len(self._anomalies)):
36
+ start_id = self._anomalies[i].get_time()[1] + 1
37
+ end_id = end_id = start_id + self._delta
38
+
39
+ if self._past_range:
40
+ end_id = start_id + int(self._delta * (self._anomalies[i].get_time()[1] - self._anomalies[i].get_time()[0]))
41
+
42
+ #if the next anomaly occurs during the theta, update the end_id
43
+ if i+1 < len(self._anomalies) and end_id > self._anomalies[i+1].get_time()[0]:
44
+ end_id = self._anomalies[i+1].get_time()[0] - 1
45
+
46
+ if start_id > end_id:
47
+ start_id = -2
48
+ end_id = -1
49
+
50
+ self._ambiguous_inst.append(rng.Range(start_id, end_id, str(i)))
51
+
52
+ def get_n_predictions(self):
53
+ return len(self._predictions)
54
+
55
+ def get_n_anomalies(self):
56
+ return len(self._anomalies)
57
+
58
+ def _ids_2_objects(self, id_list, range_list):
59
+ result = []
60
+ for id in id_list:
61
+ result.append(range_list[id])
62
+ return result
63
+
64
+ def TaR_d(self) -> float and list:
65
+ score, detected_id_list = self._TaR_d(self._anomalies, self._ambiguous_inst, self._predictions, self._theta)
66
+ return score, self._ids_2_objects(detected_id_list, self._anomalies)
67
+
68
+ def _TaR_d(self, anomalies: list, ambiguous_inst: list, predictions: list, threshold: float) -> float and list:
69
+ total_score = 0.0
70
+ detected_anomalies = []
71
+ total_score_p = 0.0
72
+ for anomaly_id in range(len(anomalies)):
73
+ anomaly = anomalies[anomaly_id]
74
+ ambiguous = ambiguous_inst[anomaly_id]
75
+
76
+ max_score = self._sum_of_func(anomaly.get_time()[0], anomaly.get_time()[1],
77
+ anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
78
+
79
+ score = 0.0
80
+ for prediction in predictions:
81
+ score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
82
+
83
+ total_score_p += min(1.0, score/max_score)
84
+ if min(1.0, score / max_score) >= threshold:
85
+ total_score += 1.0
86
+ detected_anomalies.append(anomaly_id)
87
+
88
+ if len(anomalies) == 0:
89
+ self.TaR_p_value = 0
90
+ return 0.0, []
91
+ else:
92
+ self.TaR_p_value = total_score_p / len(anomalies)
93
+ return total_score / len(anomalies), detected_anomalies
94
+
95
+ def TaR_p(self) -> float:
96
+ total_score = 0.0
97
+ for anomaly_id in range(len(self._anomalies)):
98
+ anomaly = self._anomalies[anomaly_id]
99
+ ambiguous = self._ambiguous_inst[anomaly_id]
100
+
101
+ max_score = self._sum_of_func(anomaly.get_time()[0], anomaly.get_time()[1],
102
+ anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
103
+
104
+ score = 0.0
105
+ for prediction in self._predictions:
106
+ score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
107
+
108
+ total_score += min(1.0, score/max_score)
109
+
110
+ if len(self._anomalies) == 0:
111
+ return 0.0
112
+ else:
113
+ return total_score / len(self._anomalies)
114
+
115
+
116
+ def TaP_d(self) -> float and list:
117
+ score, correct_id_list = self._TaP_d(self._anomalies, self._ambiguous_inst, self._predictions, self._theta)
118
+ return score, self._ids_2_objects(correct_id_list, self._predictions)
119
+
120
+ def _TaP_d(self, anomalies, ambiguous_inst, predictions, threshold):
121
+ #Compute TaP_d and TaP_p in one function to optimize the performance
122
+ correct_predictions = []
123
+ total_score = 0.0
124
+ total_score_p = 0.0
125
+ for prediction_id in range(len(predictions)):
126
+ max_score = predictions[prediction_id].get_time()[1] - predictions[prediction_id].get_time()[0] + 1
127
+
128
+ score = 0.0
129
+ for anomaly_id in range(len(anomalies)):
130
+ anomaly = anomalies[anomaly_id]
131
+ ambiguous = ambiguous_inst[anomaly_id]
132
+
133
+ score += self._overlap_and_subsequent_score(anomaly, ambiguous, predictions[prediction_id])
134
+ total_score_p += score / max_score
135
+ if (score/max_score) >= threshold:
136
+ total_score += 1.0
137
+ correct_predictions.append(prediction_id)
138
+
139
+ if len(predictions) == 0:
140
+ self.TaP_p_value = 0
141
+ return 0.0, []
142
+
143
+ else:
144
+ self.TaP_p_value = total_score_p / len(predictions)
145
+ return total_score / len(predictions), correct_predictions
146
+
147
+
148
+
149
+ def _detect(self, src_range: rng.Range, ranges: list, theta: int) -> bool:
150
+ rest_len = src_range.get_time()[1] - src_range.get_time()[0] + 1
151
+ for dst_range in ranges:
152
+ len = self._overlapped_len(src_range, dst_range)
153
+ if len != -1:
154
+ rest_len -= len
155
+ return (float)(rest_len) / (src_range.get_time()[1] - src_range.get_time()[0] + 1) <= (1.0 - theta)
156
+
157
+ def _overlapped_len(self, range1: rng.Range, range2: rng.Range) -> int:
158
+ detected_start = max(range1.get_time()[0], range2.get_time()[0])
159
+ detected_end = min(range1.get_time()[1], range2.get_time()[1])
160
+
161
+ if detected_end < detected_start:
162
+ return 0
163
+ else:
164
+ return detected_end - detected_start + 1
165
+
166
+ def _min_max_norm(self, value: int, org_min: int, org_max: int, new_min: int, new_max: int) -> float:
167
+ if org_min == org_max:
168
+ return new_min
169
+ else:
170
+ return (float)(new_min) + (float)(value - org_min) * (new_max - new_min) / (org_max - org_min)
171
+
172
+ def _decaying_func(self, val: float) -> float:
173
+ assert (-6 <= val <= 6)
174
+ return 1 / (1 + math.exp(val))
175
+
176
+ def _ascending_func(self, val: float) -> float:
177
+ assert (-6 <= val <= 6)
178
+ return 1 / (1 + math.exp(val * -1))
179
+
180
+ def _uniform_func(self, val: float) -> float:
181
+ return 1.0
182
+
183
+ def _sum_of_func(self, start_time: int, end_time: int, org_start: int, org_end: int,
184
+ func: Callable[[float], float]) -> float:
185
+ val = 0.0
186
+ for timestamp in range(start_time, end_time + 1):
187
+ val += func(self._min_max_norm(timestamp, org_start, org_end, -6, 6))
188
+ return val
189
+
190
+ def _overlap_and_subsequent_score(self, anomaly: rng.Range, ambiguous: rng.Range, prediction: rng.Range) -> float:
191
+ score = 0.0
192
+
193
+ detected_start = max(anomaly.get_time()[0], prediction.get_time()[0])
194
+ detected_end = min(anomaly.get_time()[1], prediction.get_time()[1])
195
+
196
+ score += self._sum_of_func(detected_start, detected_end,
197
+ anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
198
+
199
+ if ambiguous.get_time()[0] < ambiguous.get_time()[1]:
200
+ detected_start = max(ambiguous.get_time()[0], prediction.get_time()[0])
201
+ detected_end = min(ambiguous.get_time()[1], prediction.get_time()[1])
202
+
203
+ score += self._sum_of_func(detected_start, detected_end,
204
+ ambiguous.get_time()[0], ambiguous.get_time()[1], self._decaying_func)
205
+
206
+ return score
207
+
208
+ def TaR_p(self) -> float:
209
+ total_score = 0.0
210
+ for anomaly_id in range(len(self._anomalies)):
211
+ anomaly = self._anomalies[anomaly_id]
212
+ ambiguous = self._ambiguous_inst[anomaly_id]
213
+
214
+ max_score = self._sum_of_func(anomaly.get_time()[0], anomaly.get_time()[1],
215
+ anomaly.get_time()[0], anomaly.get_time()[1], self._uniform_func)
216
+
217
+ score = 0.0
218
+ for prediction in self._predictions:
219
+ score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
220
+
221
+ total_score += min(1.0, score/max_score)
222
+
223
+ if len(self._anomalies) == 0:
224
+ return 0.0
225
+ else:
226
+ return total_score / len(self._anomalies)
227
+
228
+ def TaP_p(self) -> float:
229
+ total_score = 0.0
230
+ for prediction in self._predictions:
231
+ max_score = prediction.get_time()[1] - prediction.get_time()[0] + 1
232
+
233
+ score = 0.0
234
+ for anomaly_id in range(len(self._anomalies)):
235
+ anomaly = self._anomalies[anomaly_id]
236
+ ambiguous = self._ambiguous_inst[anomaly_id]
237
+
238
+ score += self._overlap_and_subsequent_score(anomaly, ambiguous, prediction)
239
+
240
+ total_score += score/max_score
241
+
242
+ if len(self._predictions) == 0:
243
+ return 0.0
244
+ else:
245
+ return total_score / len(self._predictions)
246
+
247
+
248
+ def compute(anomalies: list, predictions: list, alpha: float, theta: float, delta: int) -> dict:
249
+ ev = TaPR(theta, delta)
250
+
251
+ ev.set_anomalies(anomalies)
252
+ ev.set_predictions(predictions)
253
+
254
+ tard_value, detected_list = ev.TaR_d()
255
+ tarp_value = ev.TaR_p()
256
+
257
+ tapd_value, correct_list = ev.TaP_d()
258
+ tapp_value = ev.TaP_p()
259
+
260
+ result = {}
261
+ tar_value = alpha * tard_value + (1 - alpha) * tarp_value
262
+ result['TaR'] = tar_value
263
+ result['TaRd'] = tard_value
264
+ result['TaRp'] = tarp_value
265
+
266
+ tap_value = alpha * tapd_value + (1 - alpha) * tapp_value
267
+ result['TaP'] = tap_value
268
+ result['TaPd'] = tapd_value
269
+ result['TaPp'] = tapp_value
270
+
271
+ detected_anomalies = []
272
+ for value in detected_list:
273
+ detected_anomalies.append(value.get_name())
274
+
275
+ result['Detected_Anomalies'] = detected_anomalies
276
+ result['Detected_Anomalies_Ranges'] = detected_list
277
+ result['Correct_Predictions_Ranges'] = correct_list
278
+
279
+ if tar_value + tap_value == 0:
280
+ result['f1'] = 0.0
281
+ else:
282
+ result['f1'] = (2 * tar_value * tap_value) / (tar_value + tap_value)
283
+
284
+ return result
285
+
286
+
287
+ def compute_with_load(anomaly_file: str, prediction_file: str, file_type: str, alpha: float, theta: float, delta: int) -> dict:
288
+ anomalies = File_IO.load_file(anomaly_file, file_type)
289
+ predictions = File_IO.load_file(prediction_file, file_type)
290
+ return compute(anomalies, predictions, alpha, theta, delta)
291
+
292
+
293
+ def print_result(anomalies: list, predictions: list, alpha: float, theta: float, delta: int, verbose: bool, graph: str) -> None:
294
+ org_predictions = copy.deepcopy(predictions)
295
+ result = compute(anomalies, predictions, alpha, theta, delta)
296
+
297
+ print("The parameters (alpha, theta, delta) are set as %g, %g, and %d." % (alpha, theta, delta))
298
+
299
+ print('\n[TaR]:', "%0.5f" % result['TaR'])
300
+ print("\t* Detection score:", "%0.5f" % result['TaRd'])
301
+ print("\t* Portion score:", "%0.5f" % result['TaRp'])
302
+ if verbose:
303
+ buf = '\t\tdetected anomalies: '
304
+ if len(result['Detected_Anomalies_Ranges']) == 0:
305
+ buf += "None "
306
+ else:
307
+ for value in result['Detected_Anomalies_Ranges']:
308
+ buf += value.get_name() + '(' + str(value.get_time()[0]) + ':' + str(value.get_time()[1]) + '), '
309
+ print(buf[:-2])
310
+
311
+
312
+ print('\n[TaP]:', "%0.5f" % result['TaP'])
313
+ print("\t* Detection score:", "%0.5f" % result['TaPd'])
314
+ print("\t* Portion score:", "%0.5f" % result['TaPp'])
315
+ if verbose:
316
+ buf = '\t\tcorrect predictions: '
317
+ if len(result['Correct_Predictions_Ranges']) == 0:
318
+ buf += "None "
319
+ else:
320
+ for value in result['Correct_Predictions_Ranges']:
321
+ buf += value.get_name() + '(' + str(value.get_time()[0]) + ':' + str(value.get_time()[1]) + '), '
322
+ print(buf[:-2])
323
+
324
+
325
+ assert(graph == 'screen' or graph == 'file' or graph == 'none' or graph == 'all')
326
+ if graph == 'screen' or graph == 'file' or graph == 'all':
327
+ Time_Plot.draw_graphs(anomalies, org_predictions, graph)
328
+
329
+
330
+ if __name__ == '__main__':
331
+ argument_parser = argparse.ArgumentParser()
332
+ argument_parser.add_argument("--anomalies", help="anomaly file name (ground truth)", required=True)
333
+ argument_parser.add_argument("--predictions", help="prediction file name", required=True)
334
+ argument_parser.add_argument("--filetype", help="choose the file type between range and stream", required=True)
335
+ argument_parser.add_argument("--graph", help="show graph of results")
336
+
337
+ argument_parser.add_argument("--verbose", help="show detail results", action='store_true')
338
+ argument_parser.add_argument("--theta", help="set parameter theta")
339
+ argument_parser.add_argument("--alpha", help="set parameter alpha")
340
+ argument_parser.add_argument("--delta", help="set parameter delta")
341
+ arguments = argument_parser.parse_args()
342
+
343
+ arguments = argument_parser.parse_args()
344
+ theta, alpha, delta, graph = 0.5, 0.8, 600, 'none' #default values
345
+ if arguments.theta is not None:
346
+ theta = float(arguments.theta)
347
+ if arguments.alpha is not None:
348
+ alpha = float(arguments.alpha)
349
+ if arguments.delta is not None:
350
+ delta = int(arguments.delta)
351
+ if arguments.graph is not None:
352
+ graph = arguments.graph
353
+
354
+ assert(0.0 <= theta <= 1.0)
355
+ assert(0.0 <= alpha <= 1.0)
356
+ assert(isinstance(delta, int))
357
+ assert(graph == 'screen' or graph == 'file' or graph == 'none' or graph == 'all')
358
+
359
+ anomalies = File_IO.load_file(arguments.anomalies, arguments.filetype)
360
+ predictions = File_IO.load_file(arguments.predictions, arguments.filetype)
361
+
362
+ print_result(anomalies, predictions, alpha, theta, delta, arguments.verbose, graph)
File without changes
File without changes
@@ -0,0 +1,165 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+
5
+
6
+ class BaseTimeSeriesMetrics:
7
+ """Base class for time series metrics """
8
+
9
+ def score(self, real: np.ndarray, pred: np.ndarray) -> Any:
10
+ """
11
+
12
+ Args:
13
+ real:
14
+ pred:
15
+
16
+ Returns:
17
+
18
+ """
19
+ ...
20
+
21
+ def _udf_gamma(self):
22
+ """The function of the user-defined gamma.
23
+
24
+ Returns:
25
+ float: the value of the user-defined gamma
26
+ """
27
+
28
+ return 1.0
29
+
30
+ def _gamma_select(self, gamma: str, overlap: int) -> float:
31
+ """The function of selecting the gamma value according to the parameters.
32
+
33
+ Args:
34
+ gamma: str
35
+ - 'one': the value 1
36
+ - 'reciprocal';: a reciprocal of the overlap
37
+ - 'udf_gamma': user defined gamma
38
+ overlap: int
39
+ overlap between real and pred
40
+
41
+ Returns:
42
+ float: the selected gamma value
43
+ """
44
+ assert type(overlap) == int, TypeError("")
45
+
46
+ if gamma == "one":
47
+ return 1.0
48
+ elif gamma == "reciprocal":
49
+ if overlap > 1:
50
+ return 1.0 / overlap
51
+ else:
52
+ return 1.0
53
+ elif gamma == "udf_gamma":
54
+ if overlap > 1:
55
+ return 1.0 / self._udf_gamma()
56
+ else:
57
+ return 1.0
58
+ else:
59
+ raise ValueError(f"Expected one of one, reciprocal, udf_gamma. gamma type string: {gamma}")
60
+
61
+ def _gamma_function(self, overlap_count):
62
+ overlap = overlap_count[0]
63
+ return self._gamma_select(self.cardinality, overlap)
64
+
65
+ def _compute_omega_reward(self, r1, r2, overlap_count):
66
+ if r1[1] < r2[0] or r1[0] > r2[1]:
67
+ return 0
68
+ else:
69
+ overlap_count[0] += 1
70
+ overlap = np.zeros(r1.shape)
71
+ overlap[0] = max(r1[0], r2[0])
72
+ overlap[1] = min(r1[1], r2[1])
73
+ return self._omega_function(r1, overlap)
74
+
75
+ def _omega_function(self, rrange, overlap):
76
+ anomaly_length = rrange[1] - rrange[0] + 1
77
+ my_positional_bias = 0
78
+ max_positional_bias = 0
79
+ temp_bias = 0
80
+ for i in range(1, anomaly_length + 1):
81
+ temp_bias = self._delta_function(i, anomaly_length)
82
+ max_positional_bias += temp_bias
83
+ j = rrange[0] + i - 1
84
+ if j >= overlap[0] and j <= overlap[1]:
85
+ my_positional_bias += temp_bias
86
+ if max_positional_bias > 0:
87
+ res = my_positional_bias / max_positional_bias
88
+ return res
89
+ else:
90
+ return 0
91
+
92
+ def _delta_function(self, t, anomaly_length):
93
+ return self._delta_select(self.bias, t, anomaly_length)
94
+
95
+ def _delta_select(self, delta, t, anomaly_length):
96
+ if delta == "flat":
97
+ return 1.0
98
+ elif delta == "front":
99
+ return float(anomaly_length - t + 1.0)
100
+ elif delta == "middle":
101
+ if t <= anomaly_length / 2.0:
102
+ return float(t)
103
+ else:
104
+ return float(anomaly_length - t + 1.0)
105
+ elif delta == "back":
106
+ return float(t)
107
+ elif delta == "udf_delta":
108
+ return self._udf_delta(t, anomaly_length)
109
+ else:
110
+ raise Exception("Invalid positional bias value")
111
+
112
+ def _udf_delta(self):
113
+ """
114
+ user defined delta function
115
+ """
116
+
117
+ return 1.0
118
+
119
+ def _shift(self, arr, num, fill_value=np.nan):
120
+ arr = np.roll(arr, num)
121
+ if num < 0:
122
+ arr[num:] = fill_value
123
+ elif num > 0:
124
+ arr[:num] = fill_value
125
+ return arr
126
+
127
+ def _prepare_data(self, values_real, values_pred):
128
+
129
+ assert len(values_real) == len(values_pred)
130
+ assert np.allclose(np.unique(values_real), np.array([0, 1])) or np.allclose(
131
+ np.unique(values_real), np.array([1])
132
+ )
133
+ assert np.allclose(np.unique(values_pred), np.array([0, 1])) or np.allclose(
134
+ np.unique(values_pred), np.array([1])
135
+ )
136
+
137
+ predicted_anomalies_ = np.argwhere(values_pred == 1).ravel()
138
+ predicted_anomalies_shift_forward = self._shift(predicted_anomalies_, 1, fill_value=predicted_anomalies_[0])
139
+ predicted_anomalies_shift_backward = self._shift(predicted_anomalies_, -1, fill_value=predicted_anomalies_[-1])
140
+ predicted_anomalies_start = np.argwhere(
141
+ (predicted_anomalies_shift_forward - predicted_anomalies_) != -1
142
+ ).ravel()
143
+ predicted_anomalies_finish = np.argwhere(
144
+ (predicted_anomalies_ - predicted_anomalies_shift_backward) != -1
145
+ ).ravel()
146
+ predicted_anomalies = np.hstack(
147
+ [
148
+ predicted_anomalies_[predicted_anomalies_start].reshape(-1, 1),
149
+ predicted_anomalies_[predicted_anomalies_finish].reshape(-1, 1),
150
+ ]
151
+ )
152
+
153
+ real_anomalies_ = np.argwhere(values_real == 1).ravel()
154
+ real_anomalies_shift_forward = self._shift(real_anomalies_, 1, fill_value=real_anomalies_[0])
155
+ real_anomalies_shift_backward = self._shift(real_anomalies_, -1, fill_value=real_anomalies_[-1])
156
+ real_anomalies_start = np.argwhere((real_anomalies_shift_forward - real_anomalies_) != -1).ravel()
157
+ real_anomalies_finish = np.argwhere((real_anomalies_ - real_anomalies_shift_backward) != -1).ravel()
158
+ real_anomalies = np.hstack(
159
+ [
160
+ real_anomalies_[real_anomalies_start].reshape(-1, 1),
161
+ real_anomalies_[real_anomalies_finish].reshape(-1, 1),
162
+ ]
163
+ )
164
+
165
+ return real_anomalies, predicted_anomalies
@@ -0,0 +1,121 @@
1
+ from .time_series_metrics.fscore import TimeSeriesFScore
2
+ from .time_series_metrics.precision import TimeSeriesPrecision
3
+ from .time_series_metrics.recall import TimeSeriesRecall
4
+
5
+
6
+ def ts_precision(real, pred, alpha=0.0, cardinality="one", bias="flat"):
7
+ """Compute the range based precision.
8
+
9
+ The range based precision is the average of "Precision_Ti", where "Precision_Ti" is
10
+ the precision score of each predicted anomaly range.
11
+ "Precision_Ti" for a single predicted anomaly range is calculated by the following formula.
12
+ Precision_Ti = α x ExistenceReward + (1 - α) x OverlapReward , where 0 ≤ α ≤ 1
13
+ α represents the relative importance of rewarding existence, whereas
14
+ (1 − α) represents the relative importance of rewarding size, position, and cardinality.
15
+
16
+ "ExistenceReward" is 1 if a real anomaly range has overlap with even a single point of
17
+ the predicted anomaly range, 0 otherwise.
18
+ Note: For prediction, there is no need for an existence reward, since precision by definition
19
+ emphasizes prediction quality, and existence by itself is too low a bar for judging
20
+ the quality of a prediction (i.e., α = 0).
21
+
22
+ "OverlapReward" is calculated by the following formula.
23
+ OverlapReward = CardinalityFactor x Sum of ω
24
+ "CardinalityFactor" is 1 if the predicted anomaly range overlaps with only one real anomaly range.
25
+ Otherwise it receives 0 ≤ γ() ≤ 1 defined by the application.
26
+ "CardinalityFactor" serves as a scaling factor for the rewards "ω"s, which is earned from overlap
27
+ size and position.
28
+ In determing "ω", we consider the size of the correctly predicted portion of an predicted anomaly
29
+ range and the relative position of the correctly predicted portion of an predicted anomaly range.
30
+
31
+ Args:
32
+ real: np.ndarray
33
+ One-dimensional array of correct answers with values of 1 or 0.
34
+ pred: np.ndarray
35
+ One-dimensional array of predicted answers with values of 1 or 0.
36
+ alpha: float, default=0.0
37
+ Relative importance of existence reward. 0 ≤ alpha ≤ 1.
38
+ cardinality: string, default="one"
39
+ Cardinality type. This should be "one", "reciprocal" or "udf_gamma".
40
+ bias: string, default="flat"
41
+ Positional bias. This should be "flat", "front", "middle", or "back"
42
+
43
+ Returns:
44
+ float: precision.score
45
+ """
46
+ precision = TimeSeriesPrecision(alpha, cardinality, bias)
47
+ return precision.score(real, pred)
48
+
49
+
50
+ def ts_recall(real, pred, alpha=0.0, cardinality="one", bias="flat"):
51
+ """Compute the range based recall.
52
+
53
+ The range based recall is the average of "Recall_Ti", where "Recall_Ti" is
54
+ the recall score of each real anomaly range.
55
+ "Recall_Ti" for a single real anomaly range is calculated by the following formula.
56
+ Recall_Ti = α x ExistenceReward + (1 - α) x OverlapReward , where 0 ≤ α ≤ 1
57
+ α represents the relative importance of rewarding existence, whereas
58
+ (1 − α) represents the relative importance of rewarding size, position, and cardinality.
59
+
60
+ "ExistenceReward" is 1 if a prediction captures even a single point of the real anomaly range, 0 otherwise.
61
+
62
+ "OverlapReward" is calculated by the following formula.
63
+ OverlapReward = CardinalityFactor x Sum of ω
64
+ "CardinalityFactor" is 1 if the real anomaly range overlaps with only one predicted anomaly range.
65
+ Otherwise it receives 0 ≤ γ() ≤ 1 defined by the application.
66
+ "CardinalityFactor" serves as a scaling factor for the rewards "ω"s, which is earned from overlap
67
+ size and position.
68
+ In determing "ω", we consider the size of the correctly predicted portion of the real anomaly range
69
+ and the relative
70
+ position of the correctly predicted portion of the real anomaly range.
71
+
72
+ Args:
73
+ real: np.ndarray
74
+ One-dimensional array of correct answers with values of 1 or 0.
75
+ pred: np.ndarray
76
+ One-dimensional array of predicted answers with values of 1 or 0.
77
+ alpha: float, default=0.0
78
+ Relative importance of existence reward. 0 ≤ alpha ≤ 1.
79
+ cardinality: string, default="one"
80
+ Cardinality type. This should be "one", "reciprocal" or "udf_gamma".
81
+ bias: string, default="flat"
82
+ Positional bias. This should be "flat", "front", "middle", or "back"
83
+
84
+ Returns:
85
+ float: recall.score
86
+ """
87
+ recall = TimeSeriesRecall(alpha, cardinality, bias)
88
+ return recall.score(real, pred)
89
+
90
+
91
+ def ts_fscore(real, pred, beta=1.0, p_alpha=0.0, r_alpha=0.0, cardinality="one", p_bias="flat", r_bias="flat"):
92
+ """Compute the range based f-score
93
+
94
+ The F-beta score is the weighted harmonic mean of precision and recall,
95
+ reaching its optimal value at 1 and its worst value at 0.
96
+ The beta parameter determines the weight of recall in the combined score.
97
+ beta < 1 lends more weight to precision, while beta > 1 favors recall
98
+ (beta -> 0 considers only precision, beta -> +inf only recall).
99
+
100
+ Args:
101
+ real: np.ndarray
102
+ One-dimensional array of correct answers with values of 1 or 0.
103
+ pred: np.ndarray
104
+ One-dimensional array of predicted answers with values of 1 or 0.
105
+ p_alpha: float, default=0.0
106
+ Relative importance of existence reward for precision. 0 ≤ alpha ≤ 1.
107
+ r_alpha: float, default=0.0
108
+ Relative importance of existence reward for recall. 0 ≤ alpha ≤ 1.
109
+ cardinality: string, default="one"
110
+ Cardinality type. This should be "one", "reciprocal" or "udf_gamma".
111
+ p_bias: string, default="flat"
112
+ Positional bias for precision. This should be "flat", "front", "middle", or "back"
113
+ r_bias: string, default="flat"
114
+ Positional bias for recall. This should be "flat", "front", "middle", or "back"
115
+
116
+ Returns:
117
+ float: f.score
118
+ """
119
+
120
+ fscore = TimeSeriesFScore(beta, p_alpha, r_alpha, cardinality, p_bias, r_bias)
121
+ return fscore.score(real, pred)