tsadmetrics 0.1.16__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/api_doc/conf.py +67 -0
- docs/{conf.py → full_doc/conf.py} +1 -1
- docs/manual_doc/conf.py +67 -0
- examples/example_direct_data.py +28 -0
- examples/example_direct_single_data.py +25 -0
- examples/example_file_reference.py +24 -0
- examples/example_global_config_file.py +13 -0
- examples/example_metric_config_file.py +19 -0
- examples/example_simple_metric.py +8 -0
- examples/specific_examples/AbsoluteDetectionDistance_example.py +24 -0
- examples/specific_examples/AffiliationbasedFScore_example.py +24 -0
- examples/specific_examples/AverageDetectionCount_example.py +24 -0
- examples/specific_examples/CompositeFScore_example.py +24 -0
- examples/specific_examples/DelayThresholdedPointadjustedFScore_example.py +24 -0
- examples/specific_examples/DetectionAccuracyInRange_example.py +24 -0
- examples/specific_examples/EnhancedTimeseriesAwareFScore_example.py +24 -0
- examples/specific_examples/LatencySparsityawareFScore_example.py +24 -0
- examples/specific_examples/MeanTimeToDetect_example.py +24 -0
- examples/specific_examples/NabScore_example.py +24 -0
- examples/specific_examples/PateFScore_example.py +24 -0
- examples/specific_examples/Pate_example.py +24 -0
- examples/specific_examples/PointadjustedAtKFScore_example.py +24 -0
- examples/specific_examples/PointadjustedAucPr_example.py +24 -0
- examples/specific_examples/PointadjustedAucRoc_example.py +24 -0
- examples/specific_examples/PointadjustedFScore_example.py +24 -0
- examples/specific_examples/RangebasedFScore_example.py +24 -0
- examples/specific_examples/SegmentwiseFScore_example.py +24 -0
- examples/specific_examples/TemporalDistance_example.py +24 -0
- examples/specific_examples/TimeTolerantFScore_example.py +24 -0
- examples/specific_examples/TimeseriesAwareFScore_example.py +24 -0
- examples/specific_examples/TotalDetectedInRange_example.py +24 -0
- examples/specific_examples/VusPr_example.py +24 -0
- examples/specific_examples/VusRoc_example.py +24 -0
- examples/specific_examples/WeightedDetectionDifference_example.py +24 -0
- tests/test_dpm.py +212 -0
- tests/test_ptdm.py +366 -0
- tests/test_registry.py +58 -0
- tests/test_runner.py +185 -0
- tests/test_spm.py +213 -0
- tests/test_tmem.py +198 -0
- tests/test_tpdm.py +369 -0
- tests/test_tstm.py +338 -0
- tsadmetrics/__init__.py +0 -21
- tsadmetrics/base/Metric.py +188 -0
- tsadmetrics/evaluation/Report.py +25 -0
- tsadmetrics/evaluation/Runner.py +253 -0
- tsadmetrics/metrics/Registry.py +141 -0
- tsadmetrics/metrics/__init__.py +2 -0
- tsadmetrics/metrics/spm/PointwiseAucPr.py +62 -0
- tsadmetrics/metrics/spm/PointwiseAucRoc.py +63 -0
- tsadmetrics/metrics/spm/PointwiseFScore.py +86 -0
- tsadmetrics/metrics/spm/PrecisionAtK.py +81 -0
- tsadmetrics/metrics/spm/__init__.py +9 -0
- tsadmetrics/metrics/tem/dpm/DelayThresholdedPointadjustedFScore.py +83 -0
- tsadmetrics/metrics/tem/dpm/LatencySparsityawareFScore.py +76 -0
- tsadmetrics/metrics/tem/dpm/MeanTimeToDetect.py +47 -0
- tsadmetrics/metrics/tem/dpm/NabScore.py +60 -0
- tsadmetrics/metrics/tem/dpm/__init__.py +11 -0
- tsadmetrics/metrics/tem/ptdm/AverageDetectionCount.py +53 -0
- tsadmetrics/metrics/tem/ptdm/DetectionAccuracyInRange.py +66 -0
- tsadmetrics/metrics/tem/ptdm/PointadjustedAtKFScore.py +80 -0
- tsadmetrics/metrics/tem/ptdm/TimeseriesAwareFScore.py +248 -0
- tsadmetrics/metrics/tem/ptdm/TotalDetectedInRange.py +65 -0
- tsadmetrics/metrics/tem/ptdm/WeightedDetectionDifference.py +97 -0
- tsadmetrics/metrics/tem/ptdm/__init__.py +12 -0
- tsadmetrics/metrics/tem/tmem/AbsoluteDetectionDistance.py +48 -0
- tsadmetrics/metrics/tem/tmem/EnhancedTimeseriesAwareFScore.py +252 -0
- tsadmetrics/metrics/tem/tmem/TemporalDistance.py +68 -0
- tsadmetrics/metrics/tem/tmem/__init__.py +9 -0
- tsadmetrics/metrics/tem/tpdm/CompositeFScore.py +104 -0
- tsadmetrics/metrics/tem/tpdm/PointadjustedAucPr.py +123 -0
- tsadmetrics/metrics/tem/tpdm/PointadjustedAucRoc.py +119 -0
- tsadmetrics/metrics/tem/tpdm/PointadjustedFScore.py +96 -0
- tsadmetrics/metrics/tem/tpdm/RangebasedFScore.py +236 -0
- tsadmetrics/metrics/tem/tpdm/SegmentwiseFScore.py +73 -0
- tsadmetrics/metrics/tem/tpdm/__init__.py +12 -0
- tsadmetrics/metrics/tem/tstm/AffiliationbasedFScore.py +68 -0
- tsadmetrics/metrics/tem/tstm/Pate.py +62 -0
- tsadmetrics/metrics/tem/tstm/PateFScore.py +61 -0
- tsadmetrics/metrics/tem/tstm/TimeTolerantFScore.py +85 -0
- tsadmetrics/metrics/tem/tstm/VusPr.py +51 -0
- tsadmetrics/metrics/tem/tstm/VusRoc.py +55 -0
- tsadmetrics/metrics/tem/tstm/__init__.py +15 -0
- tsadmetrics/{_tsadeval/affiliation/_integral_interval.py → utils/functions_affiliation.py} +377 -9
- tsadmetrics/utils/functions_auc.py +393 -0
- tsadmetrics/utils/functions_conversion.py +63 -0
- tsadmetrics/utils/functions_counting_metrics.py +26 -0
- tsadmetrics/{_tsadeval/latency_sparsity_aware.py → utils/functions_latency_sparsity_aware.py} +1 -1
- tsadmetrics/{_tsadeval/nabscore.py → utils/functions_nabscore.py} +15 -1
- tsadmetrics-1.0.0.dist-info/METADATA +69 -0
- tsadmetrics-1.0.0.dist-info/RECORD +99 -0
- {tsadmetrics-0.1.16.dist-info → tsadmetrics-1.0.0.dist-info}/top_level.txt +1 -1
- entorno/bin/activate_this.py +0 -32
- entorno/bin/rst2html.py +0 -23
- entorno/bin/rst2html4.py +0 -26
- entorno/bin/rst2html5.py +0 -33
- entorno/bin/rst2latex.py +0 -26
- entorno/bin/rst2man.py +0 -27
- entorno/bin/rst2odt.py +0 -28
- entorno/bin/rst2odt_prepstyles.py +0 -20
- entorno/bin/rst2pseudoxml.py +0 -23
- entorno/bin/rst2s5.py +0 -24
- entorno/bin/rst2xetex.py +0 -27
- entorno/bin/rst2xml.py +0 -23
- entorno/bin/rstpep2html.py +0 -25
- tests/test_binary.py +0 -946
- tests/test_non_binary.py +0 -420
- tests/test_utils.py +0 -49
- tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +0 -86
- tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +0 -68
- tsadmetrics/_tsadeval/affiliation/generics.py +0 -135
- tsadmetrics/_tsadeval/affiliation/metrics.py +0 -114
- tsadmetrics/_tsadeval/auc_roc_pr_plot.py +0 -295
- tsadmetrics/_tsadeval/discontinuity_graph.py +0 -109
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +0 -175
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +0 -50
- tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +0 -184
- tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
- tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +0 -386
- tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +0 -362
- tsadmetrics/_tsadeval/metrics.py +0 -698
- tsadmetrics/_tsadeval/prts/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +0 -165
- tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +0 -121
- tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
- tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +0 -61
- tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +0 -86
- tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +0 -21
- tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +0 -85
- tsadmetrics/_tsadeval/tests.py +0 -376
- tsadmetrics/_tsadeval/threshold_plt.py +0 -30
- tsadmetrics/_tsadeval/time_tolerant.py +0 -33
- tsadmetrics/binary_metrics.py +0 -1652
- tsadmetrics/metric_utils.py +0 -98
- tsadmetrics/non_binary_metrics.py +0 -398
- tsadmetrics/scripts/__init__.py +0 -0
- tsadmetrics/scripts/compute_metrics.py +0 -42
- tsadmetrics/utils.py +0 -122
- tsadmetrics/validation.py +0 -35
- tsadmetrics-0.1.16.dist-info/METADATA +0 -23
- tsadmetrics-0.1.16.dist-info/RECORD +0 -64
- tsadmetrics-0.1.16.dist-info/entry_points.txt +0 -2
- /tsadmetrics/{_tsadeval → base}/__init__.py +0 -0
- /tsadmetrics/{_tsadeval/affiliation → evaluation}/__init__.py +0 -0
- /tsadmetrics/{_tsadeval/eTaPR_pkg/DataManage → metrics/tem}/__init__.py +0 -0
- /tsadmetrics/{_tsadeval/vus_utils.py → utils/functions_vus.py} +0 -0
- {tsadmetrics-0.1.16.dist-info → tsadmetrics-1.0.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,393 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import warnings
|
3
|
+
from functools import partial
|
4
|
+
|
5
|
+
|
6
|
+
def average_binary_score(binary_metric, y_true, y_score):
|
7
|
+
"""Average a binary metric for multilabel classification.
|
8
|
+
|
9
|
+
Parameters
|
10
|
+
----------
|
11
|
+
y_true : array, shape = [n_samples] or [n_samples, n_classes]
|
12
|
+
True binary labels in binary label indicators.
|
13
|
+
|
14
|
+
y_score : array, shape = [n_samples] or [n_samples, n_classes]
|
15
|
+
Target scores, can either be probability estimates of the positive
|
16
|
+
class, confidence values, or binary decisions.
|
17
|
+
|
18
|
+
|
19
|
+
Returns
|
20
|
+
-------
|
21
|
+
score : float or array of shape [n_classes]
|
22
|
+
If not ``None``, average the score, else return the score for each
|
23
|
+
classes.
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
|
28
|
+
not_average_axis = 1
|
29
|
+
average_weight = None
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
if y_true.ndim == 1:
|
34
|
+
y_true = y_true.reshape((-1, 1))
|
35
|
+
|
36
|
+
if y_score.ndim == 1:
|
37
|
+
y_score = y_score.reshape((-1, 1))
|
38
|
+
|
39
|
+
n_classes = y_score.shape[not_average_axis]
|
40
|
+
score = np.zeros((n_classes,))
|
41
|
+
for c in range(n_classes):
|
42
|
+
y_true_c = y_true.take([c], axis=not_average_axis).ravel()
|
43
|
+
y_score_c = y_score.take([c], axis=not_average_axis).ravel()
|
44
|
+
score[c] = binary_metric(y_true_c, y_score_c)
|
45
|
+
|
46
|
+
# Average the results
|
47
|
+
return score
|
48
|
+
|
49
|
+
|
50
|
+
def auc(x, y):
|
51
|
+
"""Compute Area Under the Curve (AUC) using the trapezoidal rule.
|
52
|
+
|
53
|
+
This is a general function, given points on a curve. For computing the
|
54
|
+
area under the ROC-curve, see :func:`roc_auc_score`. For an alternative
|
55
|
+
way to summarize a precision-recall curve, see
|
56
|
+
:func:`average_precision_score`.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
x : array-like of shape (n,)
|
61
|
+
X coordinates. These must be either monotonic increasing or monotonic
|
62
|
+
decreasing.
|
63
|
+
y : array-like of shape (n,)
|
64
|
+
Y coordinates.
|
65
|
+
|
66
|
+
Returns
|
67
|
+
-------
|
68
|
+
auc : float
|
69
|
+
Area Under the Curve.
|
70
|
+
|
71
|
+
"""
|
72
|
+
|
73
|
+
|
74
|
+
if len(x) < 2:
|
75
|
+
raise ValueError(
|
76
|
+
"At least 2 points are needed to compute area under curve, but x.shape = %s"
|
77
|
+
% x.shape
|
78
|
+
)
|
79
|
+
|
80
|
+
direction = 1
|
81
|
+
dx = np.diff(x)
|
82
|
+
if np.any(dx < 0):
|
83
|
+
if np.all(dx <= 0):
|
84
|
+
direction = -1
|
85
|
+
else:
|
86
|
+
raise ValueError("x is neither increasing nor decreasing : {}.".format(x))
|
87
|
+
|
88
|
+
area = direction * np.trapz(y, x)
|
89
|
+
if isinstance(area, np.memmap):
|
90
|
+
# Reductions such as .sum used internally in trapezoid do not return a
|
91
|
+
# scalar by default for numpy.memmap instances contrary to
|
92
|
+
# regular numpy.ndarray instances.
|
93
|
+
area = area.dtype.type(area)
|
94
|
+
return area
|
95
|
+
|
96
|
+
def stable_cumsum(arr, axis=None):
|
97
|
+
"""Use high precision for cumsum and check that final value matches sum.
|
98
|
+
|
99
|
+
Warns if the final cumulative sum does not match the sum (up to the chosen
|
100
|
+
tolerance).
|
101
|
+
|
102
|
+
Parameters
|
103
|
+
----------
|
104
|
+
arr : array-like
|
105
|
+
To be cumulatively summed as flat.
|
106
|
+
axis : int, default=None
|
107
|
+
Axis along which the cumulative sum is computed.
|
108
|
+
The default (None) is to compute the cumsum over the flattened array.
|
109
|
+
|
110
|
+
Returns
|
111
|
+
-------
|
112
|
+
out : ndarray
|
113
|
+
Array with the cumulative sums along the chosen axis.
|
114
|
+
"""
|
115
|
+
out = np.cumsum(arr, axis=axis, dtype=np.float64)
|
116
|
+
|
117
|
+
return out
|
118
|
+
|
119
|
+
def binary_clf_curve(y_true, y_score):
|
120
|
+
"""Calculate true and false positives per binary classification threshold.
|
121
|
+
|
122
|
+
Parameters
|
123
|
+
----------
|
124
|
+
y_true : ndarray of shape (n_samples,)
|
125
|
+
True targets of binary classification.
|
126
|
+
|
127
|
+
y_score : ndarray of shape (n_samples,)
|
128
|
+
Estimated probabilities or output of a decision function.
|
129
|
+
|
130
|
+
pos_label : int, float, bool or str, default=None
|
131
|
+
The label of the positive class.
|
132
|
+
|
133
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
134
|
+
Sample weights.
|
135
|
+
|
136
|
+
Returns
|
137
|
+
-------
|
138
|
+
fps : ndarray of shape (n_thresholds,)
|
139
|
+
A count of false positives, at index i being the number of negative
|
140
|
+
samples assigned a score >= thresholds[i]. The total number of
|
141
|
+
negative samples is equal to fps[-1] (thus true negatives are given by
|
142
|
+
fps[-1] - fps).
|
143
|
+
|
144
|
+
tps : ndarray of shape (n_thresholds,)
|
145
|
+
An increasing count of true positives, at index i being the number
|
146
|
+
of positive samples assigned a score >= thresholds[i]. The total
|
147
|
+
number of positive samples is equal to tps[-1] (thus false negatives
|
148
|
+
are given by tps[-1] - tps).
|
149
|
+
|
150
|
+
thresholds : ndarray of shape (n_thresholds,)
|
151
|
+
Decreasing score values.
|
152
|
+
"""
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
# make y_true a boolean vector
|
158
|
+
y_true = y_true == 1
|
159
|
+
|
160
|
+
# sort scores and corresponding truth values
|
161
|
+
desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
|
162
|
+
y_score = y_score[desc_score_indices]
|
163
|
+
y_true = y_true[desc_score_indices]
|
164
|
+
weight = 1.0
|
165
|
+
|
166
|
+
# y_score typically has many tied values. Here we extract
|
167
|
+
# the indices associated with the distinct values. We also
|
168
|
+
# concatenate a value for the end of the curve.
|
169
|
+
distinct_value_indices = np.where(np.diff(y_score))[0]
|
170
|
+
threshold_idxs = np.r_[distinct_value_indices, len(y_true) - 1]
|
171
|
+
|
172
|
+
# accumulate the true positives with decreasing threshold
|
173
|
+
tps = stable_cumsum(y_true * weight)[threshold_idxs]
|
174
|
+
fps = 1 + threshold_idxs - tps
|
175
|
+
return fps, tps, y_score[threshold_idxs]
|
176
|
+
|
177
|
+
|
178
|
+
def roc_curve(
|
179
|
+
y_true, y_score
|
180
|
+
):
|
181
|
+
"""Compute Receiver operating characteristic (ROC).
|
182
|
+
|
183
|
+
Note: this implementation is restricted to the binary classification task.
|
184
|
+
|
185
|
+
Read more in the :ref:`User Guide <roc_metrics>`.
|
186
|
+
|
187
|
+
Parameters
|
188
|
+
----------
|
189
|
+
y_true : array-like of shape (n_samples,)
|
190
|
+
True binary labels. If labels are not either {-1, 1} or {0, 1}, then
|
191
|
+
pos_label should be explicitly given.
|
192
|
+
|
193
|
+
y_score : array-like of shape (n_samples,)
|
194
|
+
Target scores, can either be probability estimates of the positive
|
195
|
+
class, confidence values, or non-thresholded measure of decisions
|
196
|
+
(as returned by "decision_function" on some classifiers).
|
197
|
+
|
198
|
+
Returns
|
199
|
+
-------
|
200
|
+
fpr : ndarray of shape (>2,)
|
201
|
+
Increasing false positive rates such that element i is the false
|
202
|
+
positive rate of predictions with score >= `thresholds[i]`.
|
203
|
+
|
204
|
+
tpr : ndarray of shape (>2,)
|
205
|
+
Increasing true positive rates such that element `i` is the true
|
206
|
+
positive rate of predictions with score >= `thresholds[i]`.
|
207
|
+
|
208
|
+
thresholds : ndarray of shape (n_thresholds,)
|
209
|
+
Decreasing thresholds on the decision function used to compute
|
210
|
+
fpr and tpr. `thresholds[0]` represents no instances being predicted
|
211
|
+
and is arbitrarily set to `np.inf`.
|
212
|
+
|
213
|
+
"""
|
214
|
+
|
215
|
+
fps, tps, thresholds = binary_clf_curve(
|
216
|
+
y_true, y_score
|
217
|
+
)
|
218
|
+
|
219
|
+
if len(fps) > 2:
|
220
|
+
optimal_idxs = np.where(
|
221
|
+
np.r_[True, np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True]
|
222
|
+
)[0]
|
223
|
+
fps = fps[optimal_idxs]
|
224
|
+
tps = tps[optimal_idxs]
|
225
|
+
thresholds = thresholds[optimal_idxs]
|
226
|
+
|
227
|
+
# Add an extra threshold position
|
228
|
+
# to make sure that the curve starts at (0, 0)
|
229
|
+
tps = np.r_[0, tps]
|
230
|
+
fps = np.r_[0, fps]
|
231
|
+
# get dtype of `y_score` even if it is an array-like
|
232
|
+
thresholds = np.r_[np.inf, thresholds]
|
233
|
+
if fps[-1] <= 0:
|
234
|
+
warnings.warn(
|
235
|
+
"No negative samples in y_true, false positive value should be meaningless."
|
236
|
+
)
|
237
|
+
fpr = np.repeat(np.nan, fps.shape)
|
238
|
+
else:
|
239
|
+
fpr = fps / fps[-1]
|
240
|
+
|
241
|
+
if tps[-1] <= 0:
|
242
|
+
warnings.warn(
|
243
|
+
"No positive samples in y_true, true positive value should be meaningless"
|
244
|
+
)
|
245
|
+
tpr = np.repeat(np.nan, tps.shape)
|
246
|
+
else:
|
247
|
+
tpr = tps / tps[-1]
|
248
|
+
|
249
|
+
return fpr, tpr, thresholds
|
250
|
+
|
251
|
+
|
252
|
+
def precision_recall_curve(
|
253
|
+
y_true, probas_pred,
|
254
|
+
):
|
255
|
+
"""Compute precision-recall pairs for different probability thresholds.
|
256
|
+
|
257
|
+
Note: this implementation is restricted to the binary classification task.
|
258
|
+
|
259
|
+
The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
|
260
|
+
true positives and ``fp`` the number of false positives. The precision is
|
261
|
+
intuitively the ability of the classifier not to label as positive a sample
|
262
|
+
that is negative.
|
263
|
+
|
264
|
+
The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
|
265
|
+
true positives and ``fn`` the number of false negatives. The recall is
|
266
|
+
intuitively the ability of the classifier to find all the positive samples.
|
267
|
+
|
268
|
+
The last precision and recall values are 1. and 0. respectively and do not
|
269
|
+
have a corresponding threshold. This ensures that the graph starts on the
|
270
|
+
y axis.
|
271
|
+
|
272
|
+
The first precision and recall values are precision=class balance and recall=1.0
|
273
|
+
which corresponds to a classifier that always predicts the positive class.
|
274
|
+
|
275
|
+
Parameters
|
276
|
+
----------
|
277
|
+
y_true : array-like of shape (n_samples,)
|
278
|
+
True binary labels. If labels are not either {-1, 1} or {0, 1}, then
|
279
|
+
pos_label should be explicitly given.
|
280
|
+
|
281
|
+
probas_pred : array-like of shape (n_samples,)
|
282
|
+
Target scores, can either be probability estimates of the positive
|
283
|
+
class, or non-thresholded measure of decisions (as returned by
|
284
|
+
`decision_function` on some classifiers).
|
285
|
+
|
286
|
+
|
287
|
+
Returns
|
288
|
+
-------
|
289
|
+
precision : ndarray of shape (n_thresholds + 1,)
|
290
|
+
Precision values such that element i is the precision of
|
291
|
+
predictions with score >= thresholds[i] and the last element is 1.
|
292
|
+
|
293
|
+
recall : ndarray of shape (n_thresholds + 1,)
|
294
|
+
Decreasing recall values such that element i is the recall of
|
295
|
+
predictions with score >= thresholds[i] and the last element is 0.
|
296
|
+
|
297
|
+
thresholds : ndarray of shape (n_thresholds,)
|
298
|
+
Increasing thresholds on the decision function used to compute
|
299
|
+
precision and recall where `n_thresholds = len(np.unique(probas_pred))`.
|
300
|
+
|
301
|
+
"""
|
302
|
+
fps, tps, thresholds = binary_clf_curve(
|
303
|
+
y_true, probas_pred
|
304
|
+
)
|
305
|
+
|
306
|
+
ps = tps + fps
|
307
|
+
# Initialize the result array with zeros to make sure that precision[ps == 0]
|
308
|
+
# does not contain uninitialized values.
|
309
|
+
precision = np.zeros_like(tps)
|
310
|
+
np.divide(tps, ps, out=precision, where=(ps != 0))
|
311
|
+
|
312
|
+
# When no positive label in y_true, recall is set to 1 for all thresholds
|
313
|
+
# tps[-1] == 0 <=> y_true == all negative labels
|
314
|
+
if tps[-1] == 0:
|
315
|
+
warnings.warn(
|
316
|
+
"No positive class found in y_true, "
|
317
|
+
"recall is set to one for all thresholds."
|
318
|
+
)
|
319
|
+
recall = np.ones_like(tps)
|
320
|
+
else:
|
321
|
+
recall = tps / tps[-1]
|
322
|
+
|
323
|
+
# reverse the outputs so recall is decreasing
|
324
|
+
sl = slice(None, None, -1)
|
325
|
+
return np.hstack((precision[sl], 1)), np.hstack((recall[sl], 0)), thresholds[sl]
|
326
|
+
|
327
|
+
|
328
|
+
def average_precision_score(
|
329
|
+
y_true, y_score,
|
330
|
+
):
|
331
|
+
"""Compute average precision (AP) from prediction scores.
|
332
|
+
|
333
|
+
AP summarizes a precision-recall curve as the weighted mean of precisions
|
334
|
+
achieved at each threshold, with the increase in recall from the previous
|
335
|
+
threshold used as the weight:
|
336
|
+
|
337
|
+
.. math::
|
338
|
+
\\text{AP} = \\sum_n (R_n - R_{n-1}) P_n
|
339
|
+
|
340
|
+
where :math:`P_n` and :math:`R_n` are the precision and recall at the nth
|
341
|
+
threshold [1]_. This implementation is not interpolated and is different
|
342
|
+
from computing the area under the precision-recall curve with the
|
343
|
+
trapezoidal rule, which uses linear interpolation and can be too
|
344
|
+
optimistic.
|
345
|
+
|
346
|
+
Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
|
347
|
+
|
348
|
+
Parameters
|
349
|
+
----------
|
350
|
+
y_true : array-like of shape (n_samples,) or (n_samples, n_classes)
|
351
|
+
True binary labels or binary label indicators.
|
352
|
+
|
353
|
+
y_score : array-like of shape (n_samples,) or (n_samples, n_classes)
|
354
|
+
Target scores, can either be probability estimates of the positive
|
355
|
+
class, confidence values, or non-thresholded measure of decisions
|
356
|
+
(as returned by :term:`decision_function` on some classifiers).
|
357
|
+
|
358
|
+
|
359
|
+
|
360
|
+
Returns
|
361
|
+
-------
|
362
|
+
average_precision : float
|
363
|
+
Average precision score.
|
364
|
+
|
365
|
+
|
366
|
+
|
367
|
+
|
368
|
+
"""
|
369
|
+
|
370
|
+
def _binary_uninterpolated_average_precision(
|
371
|
+
y_true, y_score, pos_label=1, sample_weight=None
|
372
|
+
):
|
373
|
+
precision, recall, _ = precision_recall_curve(
|
374
|
+
y_true, y_score, pos_label=pos_label, sample_weight=sample_weight
|
375
|
+
)
|
376
|
+
# Return the step function integral
|
377
|
+
# The following works because the last entry of precision is
|
378
|
+
# guaranteed to be 1, as returned by precision_recall_curve
|
379
|
+
return -np.sum(np.diff(recall) * np.array(precision)[:-1])
|
380
|
+
|
381
|
+
|
382
|
+
# Convert to Python primitive type to avoid NumPy type / Python str
|
383
|
+
# comparison. See https://github.com/numpy/numpy/issues/6784
|
384
|
+
present_labels = np.unique(y_true).tolist()
|
385
|
+
|
386
|
+
|
387
|
+
|
388
|
+
average_precision = partial(
|
389
|
+
_binary_uninterpolated_average_precision, pos_label=1
|
390
|
+
)
|
391
|
+
return average_binary_score(
|
392
|
+
average_precision, y_true, y_score
|
393
|
+
)
|
@@ -0,0 +1,63 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
def pointwise_to_segmentwise(pointwise):
|
4
|
+
"""Reformat anomaly time series from pointwise to segmentwise"""
|
5
|
+
segmentwise = []
|
6
|
+
|
7
|
+
prev = -10
|
8
|
+
for point in pointwise:
|
9
|
+
if point > prev + 1:
|
10
|
+
segmentwise.append([point, point])
|
11
|
+
else:
|
12
|
+
segmentwise[-1][-1] += 1
|
13
|
+
prev = point
|
14
|
+
return np.array(segmentwise)
|
15
|
+
|
16
|
+
|
17
|
+
def segmentwise_to_pointwise(segmentwise):
|
18
|
+
"""Reformat anomaly time series from segmentwise to pointwise"""
|
19
|
+
pointwise = []
|
20
|
+
|
21
|
+
for start, end in segmentwise:
|
22
|
+
for point in range(start, end + 1):
|
23
|
+
pointwise.append(point)
|
24
|
+
|
25
|
+
return np.array(pointwise)
|
26
|
+
|
27
|
+
|
28
|
+
def segmentwise_to_full_series(segmentwise, length):
|
29
|
+
"""Reformat anomaly time series from segmentwise to full_series"""
|
30
|
+
pw = segmentwise_to_pointwise(segmentwise)
|
31
|
+
|
32
|
+
return pointwise_to_full_series(pw, length)
|
33
|
+
|
34
|
+
def pointwise_to_full_series(pointwise, length):
|
35
|
+
"""Reformat anomaly time series from pointwise to full_series"""
|
36
|
+
anomalies_full_series = np.zeros(length)
|
37
|
+
if len(pointwise) > 0:
|
38
|
+
assert pointwise[-1] < length
|
39
|
+
anomalies_full_series[pointwise] = 1
|
40
|
+
return np.array(anomalies_full_series)
|
41
|
+
|
42
|
+
def full_series_to_pointwise(full_series):
|
43
|
+
"""Reformat anomaly time series from full_series to pointwise"""
|
44
|
+
anomalies_pointwise = []
|
45
|
+
for i in range(len(full_series)):
|
46
|
+
if full_series[i]==1:
|
47
|
+
anomalies_pointwise.append(i)
|
48
|
+
return np.array(anomalies_pointwise)
|
49
|
+
|
50
|
+
def full_series_to_segmentwise(full_series):
|
51
|
+
"""Reformat anomaly time series from full_series to segmentwise"""
|
52
|
+
anomalies_segmentwise = []
|
53
|
+
i=0
|
54
|
+
while i < len(full_series):
|
55
|
+
if full_series[i] == 1:
|
56
|
+
start = i
|
57
|
+
while i < len(full_series) and full_series[i] == 1:
|
58
|
+
i += 1
|
59
|
+
end = i - 1
|
60
|
+
anomalies_segmentwise.append([start, end])
|
61
|
+
else:
|
62
|
+
i += 1
|
63
|
+
return np.array(anomalies_segmentwise)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
def counting_method(y_true: np.array, y_pred: np.array, k: int):
|
4
|
+
em,da,ma,fa = 0,0,0,0
|
5
|
+
for i_gt in range(len(y_true)):
|
6
|
+
i_pa = i_gt
|
7
|
+
gt = y_true[i_gt]
|
8
|
+
pa = y_pred[i_pa]
|
9
|
+
if gt==1 and pa==1:
|
10
|
+
em+=1
|
11
|
+
elif gt==0 and pa==1:
|
12
|
+
fa+=1
|
13
|
+
elif gt==1 and pa==0:
|
14
|
+
anom_range = y_pred[i_gt-k:i_pa+k+1]
|
15
|
+
detected = False
|
16
|
+
for r in anom_range:
|
17
|
+
if r==1:
|
18
|
+
em+=1
|
19
|
+
detected=True
|
20
|
+
break
|
21
|
+
if not detected:
|
22
|
+
ma+=1
|
23
|
+
elif gt==0 and pa==0:
|
24
|
+
pass
|
25
|
+
|
26
|
+
return em,da,ma,fa
|
tsadmetrics/{_tsadeval/latency_sparsity_aware.py → utils/functions_latency_sparsity_aware.py}
RENAMED
@@ -4,7 +4,7 @@
|
|
4
4
|
# All rights reserved.
|
5
5
|
#
|
6
6
|
# Redistribution and use in source and binary forms, with or without modification,
|
7
|
-
# are permitted provided that the following conditions are
|
7
|
+
# are permitted provided that the following conditions are tem:
|
8
8
|
#
|
9
9
|
# * Redistributions of source code must retain the above copyright notice, this
|
10
10
|
# list of conditions and the following disclaimer.
|
@@ -20,7 +20,8 @@
|
|
20
20
|
from collections import namedtuple
|
21
21
|
import logging
|
22
22
|
import math
|
23
|
-
|
23
|
+
import numpy as np
|
24
|
+
from .functions_conversion import pointwise_to_full_series, pointwise_to_segmentwise
|
24
25
|
logger = logging.getLogger(__name__)
|
25
26
|
AnomalyPoint = namedtuple(
|
26
27
|
"AnomalyPoint",
|
@@ -311,3 +312,16 @@ class Sweeper(object):
|
|
311
312
|
[x.sweepScore for x in anomalyList],
|
312
313
|
matchingRow
|
313
314
|
)
|
315
|
+
|
316
|
+
def calculate_scores(sweeper,y_true, y_pred,length):
|
317
|
+
anomaly_scores = pointwise_to_full_series(y_pred, length)
|
318
|
+
timestamps = np.arange(length)
|
319
|
+
windowLimits = pointwise_to_segmentwise(y_true)
|
320
|
+
dataSetName = "dummyname"
|
321
|
+
anomalyList = sweeper.calcSweepScore(timestamps, anomaly_scores, windowLimits, dataSetName)
|
322
|
+
scoresByThreshold = sweeper.calcScoreByThreshold(anomalyList)
|
323
|
+
|
324
|
+
assert scoresByThreshold[0].threshold == 1.1 # all points regarded normal
|
325
|
+
assert scoresByThreshold[1].threshold == 1.0 # anomal points regarded anomal
|
326
|
+
|
327
|
+
return sweeper, scoresByThreshold[0].score, scoresByThreshold[1].score
|
@@ -0,0 +1,69 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: tsadmetrics
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: Librería para evaluación de detección de anomalías en series temporales
|
5
|
+
Home-page: https://github.com/pathsko/TSADmetrics
|
6
|
+
Author: Pedro Rafael Velasco Priego
|
7
|
+
Author-email: Pedro Rafael Velasco Priego <i12veprp@uco.es>
|
8
|
+
Requires-Python: >=3.8
|
9
|
+
Description-Content-Type: text/markdown
|
10
|
+
Requires-Dist: joblib==1.4.2
|
11
|
+
Requires-Dist: numpy==1.24.4
|
12
|
+
Requires-Dist: pandas==2.0.3
|
13
|
+
Requires-Dist: PATE==0.1.1
|
14
|
+
Requires-Dist: patsy==0.5.6
|
15
|
+
Requires-Dist: python-dateutil==2.9.0.post0
|
16
|
+
Requires-Dist: pytz==2024.1
|
17
|
+
Requires-Dist: scikit-learn==1.3.2
|
18
|
+
Requires-Dist: scipy==1.10.1
|
19
|
+
Requires-Dist: six==1.16.0
|
20
|
+
Requires-Dist: statsmodels==0.14.1
|
21
|
+
Requires-Dist: threadpoolctl==3.5.0
|
22
|
+
Requires-Dist: tzdata==2024.1
|
23
|
+
|
24
|
+
# TSADmetrics - Time Series Anomaly Detection Metrics
|
25
|
+
|
26
|
+
**TSADmetrics** is a Python library for evaluating anomaly detection algorithms in time series data.
|
27
|
+
It provides a comprehensive set of metrics specifically designed to handle the temporal nature of anomalies.
|
28
|
+
|
29
|
+
---
|
30
|
+
|
31
|
+
## Features
|
32
|
+
|
33
|
+
- **Metric Taxonomy**: Metrics are categorized into types based on how they handle temporal context:
|
34
|
+
|
35
|
+
- **MPI Metrics**: Evaluate predictions at each point independently, ignoring temporal continuity.
|
36
|
+
- **MET Metrics**: Consider temporal context, analyzing when and how anomalies occur.
|
37
|
+
- **MDPT**: Partial detection within a real anomaly event counts as correct.
|
38
|
+
- **MDTP**: Requires detection to cover a significant fraction of the real anomaly.
|
39
|
+
- **MECT**: Measures alignment of real vs predicted anomaly events.
|
40
|
+
- **MPR**: Penalizes late detections.
|
41
|
+
- **MTDT**: Allows temporal tolerance for early or late detections.
|
42
|
+
|
43
|
+
- **Direct Metric Usage**: Instantiate any metric class and call `compute()` for individual evaluation.
|
44
|
+
|
45
|
+
- **Batch Evaluation**: Use `Runner` to evaluate multiple datasets and metrics at once, with support for both direct data and CSV/JSON input.
|
46
|
+
|
47
|
+
- **Flexible Configuration**: Load metrics from YAML configuration files or global evaluation config files.
|
48
|
+
|
49
|
+
- **CLI Tool**: Compute metrics directly from files without writing Python code.
|
50
|
+
|
51
|
+
---
|
52
|
+
|
53
|
+
## Installation
|
54
|
+
|
55
|
+
Install TSADmetrics via pip:
|
56
|
+
|
57
|
+
```bash
|
58
|
+
pip install tsadmetrics
|
59
|
+
```
|
60
|
+
|
61
|
+
## Documentation
|
62
|
+
|
63
|
+
The complete documentation for TSADmetrics is available at:
|
64
|
+
📚 [https://tsadmetrics.readthedocs.io/](https://tsadmetrics.readthedocs.io/)
|
65
|
+
|
66
|
+
## Acknowledgements
|
67
|
+
|
68
|
+
This library is based on the concepts and implementations from:
|
69
|
+
Sørbø, S., & Ruocco, M. (2023). *Navigating the metric maze: a taxonomy of evaluation metrics for anomaly detection in time series*. https://doi.org/10.1007/s10618-023-00988-8
|