tsadmetrics 0.1.16__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. docs/api_doc/conf.py +67 -0
  2. docs/{conf.py → full_doc/conf.py} +1 -1
  3. docs/manual_doc/conf.py +67 -0
  4. examples/example_direct_data.py +28 -0
  5. examples/example_direct_single_data.py +25 -0
  6. examples/example_file_reference.py +24 -0
  7. examples/example_global_config_file.py +13 -0
  8. examples/example_metric_config_file.py +19 -0
  9. examples/example_simple_metric.py +8 -0
  10. examples/specific_examples/AbsoluteDetectionDistance_example.py +24 -0
  11. examples/specific_examples/AffiliationbasedFScore_example.py +24 -0
  12. examples/specific_examples/AverageDetectionCount_example.py +24 -0
  13. examples/specific_examples/CompositeFScore_example.py +24 -0
  14. examples/specific_examples/DelayThresholdedPointadjustedFScore_example.py +24 -0
  15. examples/specific_examples/DetectionAccuracyInRange_example.py +24 -0
  16. examples/specific_examples/EnhancedTimeseriesAwareFScore_example.py +24 -0
  17. examples/specific_examples/LatencySparsityawareFScore_example.py +24 -0
  18. examples/specific_examples/MeanTimeToDetect_example.py +24 -0
  19. examples/specific_examples/NabScore_example.py +24 -0
  20. examples/specific_examples/PateFScore_example.py +24 -0
  21. examples/specific_examples/Pate_example.py +24 -0
  22. examples/specific_examples/PointadjustedAtKFScore_example.py +24 -0
  23. examples/specific_examples/PointadjustedAucPr_example.py +24 -0
  24. examples/specific_examples/PointadjustedAucRoc_example.py +24 -0
  25. examples/specific_examples/PointadjustedFScore_example.py +24 -0
  26. examples/specific_examples/RangebasedFScore_example.py +24 -0
  27. examples/specific_examples/SegmentwiseFScore_example.py +24 -0
  28. examples/specific_examples/TemporalDistance_example.py +24 -0
  29. examples/specific_examples/TimeTolerantFScore_example.py +24 -0
  30. examples/specific_examples/TimeseriesAwareFScore_example.py +24 -0
  31. examples/specific_examples/TotalDetectedInRange_example.py +24 -0
  32. examples/specific_examples/VusPr_example.py +24 -0
  33. examples/specific_examples/VusRoc_example.py +24 -0
  34. examples/specific_examples/WeightedDetectionDifference_example.py +24 -0
  35. tests/test_dpm.py +212 -0
  36. tests/test_ptdm.py +366 -0
  37. tests/test_registry.py +58 -0
  38. tests/test_runner.py +185 -0
  39. tests/test_spm.py +213 -0
  40. tests/test_tmem.py +198 -0
  41. tests/test_tpdm.py +369 -0
  42. tests/test_tstm.py +338 -0
  43. tsadmetrics/__init__.py +0 -21
  44. tsadmetrics/base/Metric.py +188 -0
  45. tsadmetrics/evaluation/Report.py +25 -0
  46. tsadmetrics/evaluation/Runner.py +253 -0
  47. tsadmetrics/metrics/Registry.py +141 -0
  48. tsadmetrics/metrics/__init__.py +2 -0
  49. tsadmetrics/metrics/spm/PointwiseAucPr.py +62 -0
  50. tsadmetrics/metrics/spm/PointwiseAucRoc.py +63 -0
  51. tsadmetrics/metrics/spm/PointwiseFScore.py +86 -0
  52. tsadmetrics/metrics/spm/PrecisionAtK.py +81 -0
  53. tsadmetrics/metrics/spm/__init__.py +9 -0
  54. tsadmetrics/metrics/tem/dpm/DelayThresholdedPointadjustedFScore.py +83 -0
  55. tsadmetrics/metrics/tem/dpm/LatencySparsityawareFScore.py +76 -0
  56. tsadmetrics/metrics/tem/dpm/MeanTimeToDetect.py +47 -0
  57. tsadmetrics/metrics/tem/dpm/NabScore.py +60 -0
  58. tsadmetrics/metrics/tem/dpm/__init__.py +11 -0
  59. tsadmetrics/metrics/tem/ptdm/AverageDetectionCount.py +53 -0
  60. tsadmetrics/metrics/tem/ptdm/DetectionAccuracyInRange.py +66 -0
  61. tsadmetrics/metrics/tem/ptdm/PointadjustedAtKFScore.py +80 -0
  62. tsadmetrics/metrics/tem/ptdm/TimeseriesAwareFScore.py +248 -0
  63. tsadmetrics/metrics/tem/ptdm/TotalDetectedInRange.py +65 -0
  64. tsadmetrics/metrics/tem/ptdm/WeightedDetectionDifference.py +97 -0
  65. tsadmetrics/metrics/tem/ptdm/__init__.py +12 -0
  66. tsadmetrics/metrics/tem/tmem/AbsoluteDetectionDistance.py +48 -0
  67. tsadmetrics/metrics/tem/tmem/EnhancedTimeseriesAwareFScore.py +252 -0
  68. tsadmetrics/metrics/tem/tmem/TemporalDistance.py +68 -0
  69. tsadmetrics/metrics/tem/tmem/__init__.py +9 -0
  70. tsadmetrics/metrics/tem/tpdm/CompositeFScore.py +104 -0
  71. tsadmetrics/metrics/tem/tpdm/PointadjustedAucPr.py +123 -0
  72. tsadmetrics/metrics/tem/tpdm/PointadjustedAucRoc.py +119 -0
  73. tsadmetrics/metrics/tem/tpdm/PointadjustedFScore.py +96 -0
  74. tsadmetrics/metrics/tem/tpdm/RangebasedFScore.py +236 -0
  75. tsadmetrics/metrics/tem/tpdm/SegmentwiseFScore.py +73 -0
  76. tsadmetrics/metrics/tem/tpdm/__init__.py +12 -0
  77. tsadmetrics/metrics/tem/tstm/AffiliationbasedFScore.py +68 -0
  78. tsadmetrics/metrics/tem/tstm/Pate.py +62 -0
  79. tsadmetrics/metrics/tem/tstm/PateFScore.py +61 -0
  80. tsadmetrics/metrics/tem/tstm/TimeTolerantFScore.py +85 -0
  81. tsadmetrics/metrics/tem/tstm/VusPr.py +51 -0
  82. tsadmetrics/metrics/tem/tstm/VusRoc.py +55 -0
  83. tsadmetrics/metrics/tem/tstm/__init__.py +15 -0
  84. tsadmetrics/{_tsadeval/affiliation/_integral_interval.py → utils/functions_affiliation.py} +377 -9
  85. tsadmetrics/utils/functions_auc.py +393 -0
  86. tsadmetrics/utils/functions_conversion.py +63 -0
  87. tsadmetrics/utils/functions_counting_metrics.py +26 -0
  88. tsadmetrics/{_tsadeval/latency_sparsity_aware.py → utils/functions_latency_sparsity_aware.py} +1 -1
  89. tsadmetrics/{_tsadeval/nabscore.py → utils/functions_nabscore.py} +15 -1
  90. tsadmetrics-1.0.0.dist-info/METADATA +69 -0
  91. tsadmetrics-1.0.0.dist-info/RECORD +99 -0
  92. {tsadmetrics-0.1.16.dist-info → tsadmetrics-1.0.0.dist-info}/top_level.txt +1 -1
  93. entorno/bin/activate_this.py +0 -32
  94. entorno/bin/rst2html.py +0 -23
  95. entorno/bin/rst2html4.py +0 -26
  96. entorno/bin/rst2html5.py +0 -33
  97. entorno/bin/rst2latex.py +0 -26
  98. entorno/bin/rst2man.py +0 -27
  99. entorno/bin/rst2odt.py +0 -28
  100. entorno/bin/rst2odt_prepstyles.py +0 -20
  101. entorno/bin/rst2pseudoxml.py +0 -23
  102. entorno/bin/rst2s5.py +0 -24
  103. entorno/bin/rst2xetex.py +0 -27
  104. entorno/bin/rst2xml.py +0 -23
  105. entorno/bin/rstpep2html.py +0 -25
  106. tests/test_binary.py +0 -946
  107. tests/test_non_binary.py +0 -420
  108. tests/test_utils.py +0 -49
  109. tsadmetrics/_tsadeval/affiliation/_affiliation_zone.py +0 -86
  110. tsadmetrics/_tsadeval/affiliation/_single_ground_truth_event.py +0 -68
  111. tsadmetrics/_tsadeval/affiliation/generics.py +0 -135
  112. tsadmetrics/_tsadeval/affiliation/metrics.py +0 -114
  113. tsadmetrics/_tsadeval/auc_roc_pr_plot.py +0 -295
  114. tsadmetrics/_tsadeval/discontinuity_graph.py +0 -109
  115. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/File_IO.py +0 -175
  116. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Range.py +0 -50
  117. tsadmetrics/_tsadeval/eTaPR_pkg/DataManage/Time_Plot.py +0 -184
  118. tsadmetrics/_tsadeval/eTaPR_pkg/__init__.py +0 -0
  119. tsadmetrics/_tsadeval/eTaPR_pkg/etapr.py +0 -386
  120. tsadmetrics/_tsadeval/eTaPR_pkg/tapr.py +0 -362
  121. tsadmetrics/_tsadeval/metrics.py +0 -698
  122. tsadmetrics/_tsadeval/prts/__init__.py +0 -0
  123. tsadmetrics/_tsadeval/prts/base/__init__.py +0 -0
  124. tsadmetrics/_tsadeval/prts/base/time_series_metrics.py +0 -165
  125. tsadmetrics/_tsadeval/prts/basic_metrics_ts.py +0 -121
  126. tsadmetrics/_tsadeval/prts/time_series_metrics/__init__.py +0 -0
  127. tsadmetrics/_tsadeval/prts/time_series_metrics/fscore.py +0 -61
  128. tsadmetrics/_tsadeval/prts/time_series_metrics/precision.py +0 -86
  129. tsadmetrics/_tsadeval/prts/time_series_metrics/precision_recall.py +0 -21
  130. tsadmetrics/_tsadeval/prts/time_series_metrics/recall.py +0 -85
  131. tsadmetrics/_tsadeval/tests.py +0 -376
  132. tsadmetrics/_tsadeval/threshold_plt.py +0 -30
  133. tsadmetrics/_tsadeval/time_tolerant.py +0 -33
  134. tsadmetrics/binary_metrics.py +0 -1652
  135. tsadmetrics/metric_utils.py +0 -98
  136. tsadmetrics/non_binary_metrics.py +0 -398
  137. tsadmetrics/scripts/__init__.py +0 -0
  138. tsadmetrics/scripts/compute_metrics.py +0 -42
  139. tsadmetrics/utils.py +0 -122
  140. tsadmetrics/validation.py +0 -35
  141. tsadmetrics-0.1.16.dist-info/METADATA +0 -23
  142. tsadmetrics-0.1.16.dist-info/RECORD +0 -64
  143. tsadmetrics-0.1.16.dist-info/entry_points.txt +0 -2
  144. /tsadmetrics/{_tsadeval → base}/__init__.py +0 -0
  145. /tsadmetrics/{_tsadeval/affiliation → evaluation}/__init__.py +0 -0
  146. /tsadmetrics/{_tsadeval/eTaPR_pkg/DataManage → metrics/tem}/__init__.py +0 -0
  147. /tsadmetrics/{_tsadeval/vus_utils.py → utils/functions_vus.py} +0 -0
  148. {tsadmetrics-0.1.16.dist-info → tsadmetrics-1.0.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,85 @@
1
+ from ....base.Metric import Metric
2
+ import numpy as np
3
+
4
+ class TimeTolerantFScore(Metric):
5
+ """
6
+ Calculate time tolerant F-score for anomaly detection in time series.
7
+ This metric is based on the standard F-score, but applies a temporal adjustment
8
+ to the predictions before computing it. Specifically, a predicted anomalous point is considered
9
+ a true positive if it lies within a temporal window of size :math:`{\\tau}` around any ground-truth anomalous point.
10
+ This allows for small temporal deviations in the predictions to be tolerated. The adjusted predictions are then used
11
+ to _compute the standard point-wise F-Score.
12
+
13
+ Implementation of https://link.springer.com/article/10.1007/s10618-023-00988-8
14
+
15
+ For more information, see the original paper:
16
+ https://arxiv.org/abs/2008.05788
17
+
18
+ Parameters:
19
+ t (int):
20
+ The time tolerance parameter.
21
+ beta (float):
22
+ The beta value, which determines the weight of precision in the combined score.
23
+ Default is 1, which gives equal weight to precision and recall.
24
+ """
25
+ name = "ttf"
26
+ binary_prediction = True
27
+ param_schema = {
28
+ "t": {
29
+ "default": 5,
30
+ "type": int
31
+ },
32
+ "beta": {
33
+ "default": 1.0,
34
+ "type": float
35
+ }
36
+ }
37
+
38
+ def __init__(self, **kwargs):
39
+ super().__init__(name="ttf", **kwargs)
40
+
41
+ def _compute(self, y_true, y_pred):
42
+ """
43
+ Calculate the time tolerant F-score (optimized version).
44
+ """
45
+ t = self.params['t']
46
+ beta = self.params['beta']
47
+
48
+ # Precompute masks for efficiency
49
+ true_anomalies = y_true == 1
50
+ predictions = y_pred == 1
51
+
52
+ # Create P′1 for recall: for each true anomaly, check if any prediction within ±t
53
+ p_prime1 = np.zeros_like(y_true, dtype=bool)
54
+
55
+ for i in np.where(true_anomalies)[0]:
56
+ start = max(0, i - t)
57
+ end = min(len(y_pred), i + t + 1)
58
+ if np.any(predictions[start:end]):
59
+ p_prime1[i] = True
60
+
61
+ # Create P′2 for precision: for each prediction, check if any true anomaly within ±t
62
+ p_prime2 = np.zeros_like(y_pred, dtype=bool)
63
+
64
+ for j in np.where(predictions)[0]:
65
+ start = max(0, j - t)
66
+ end = min(len(y_true), j + t + 1)
67
+ if np.any(true_anomalies[start:end]):
68
+ p_prime2[j] = True
69
+
70
+ # Calculate recall using P′1
71
+ tp_recall = np.sum(true_anomalies & p_prime1)
72
+ fn_recall = np.sum(true_anomalies & ~p_prime1)
73
+ recall = tp_recall / (tp_recall + fn_recall) if (tp_recall + fn_recall) > 0 else 0.0
74
+
75
+ # Calculate precision using P′2
76
+ tp_precision = np.sum(predictions & p_prime2)
77
+ fp_precision = np.sum(predictions & ~p_prime2)
78
+ precision = tp_precision / (tp_precision + fp_precision) if (tp_precision + fp_precision) > 0 else 0.0
79
+
80
+ # Calculate F-score
81
+ if precision == 0 and recall == 0:
82
+ return 0.0
83
+
84
+ f_score = ((1 + beta**2) * precision * recall) / (beta**2 * precision + recall)
85
+ return f_score
@@ -0,0 +1,51 @@
1
+ from ....base.Metric import Metric
2
+ from ....utils.functions_vus import generate_curve
3
+ import numpy as np
4
+ class VusPr(Metric):
5
+ """
6
+ Calculate the VUS-PR (Volume Under the PR Surface) score for anomaly detection in time series.
7
+
8
+ This metric is an extension of the classical AUC-PR, incorporating a temporal tolerance parameter `window`
9
+ that smooths the binary ground-truth labels. It allows for some flexibility in the detection of
10
+ anomalies that are temporally close to the true events. The final metric integrates the PR-AUC
11
+ over several levels of temporal tolerance (from 0 to `window`), yielding a volume under the PR surface.
12
+
13
+ Implementation of https://link.springer.com/article/10.1007/s10618-023-00988-8
14
+
15
+ For more information, see the original paper:
16
+ https://dl.acm.org/doi/10.14778/3551793.3551830
17
+
18
+ Parameters:
19
+ window (int):
20
+ Maximum temporal tolerance used to smooth the evaluation.
21
+ Default is 4.
22
+ """
23
+ name = "vus_pr"
24
+ binary_prediction = False
25
+ param_schema = {
26
+ "window": {
27
+ "default": 4,
28
+ "type": int
29
+ }
30
+ }
31
+
32
+ def __init__(self, **kwargs):
33
+ super().__init__(name="vus_pr", **kwargs)
34
+
35
+ def _compute(self, y_true, y_anomaly_scores):
36
+ """
37
+ Calculate the VUS-PR score.
38
+
39
+ Parameters:
40
+ y_true (np.array):
41
+ Ground-truth binary labels (0 = normal, 1 = anomaly).
42
+ y_anomaly_scores (np.array):
43
+ Anomaly scores for each time point.
44
+
45
+ Returns:
46
+ float: VUS-PR score.
47
+ """
48
+ window = self.params["window"]
49
+ _, _, _, _, _, _, _, pr = generate_curve(y_true, y_anomaly_scores, slidingWindow=window)
50
+
51
+ return pr
@@ -0,0 +1,55 @@
1
+ from ....base.Metric import Metric
2
+ from ....utils.functions_vus import generate_curve
3
+ import numpy as np
4
+
5
+ class VusRoc(Metric):
6
+ """
7
+ Calculate the VUS-ROC (Volume Under the ROC Surface) score for anomaly detection in time series.
8
+
9
+ This metric extends the classical AUC-ROC by introducing a temporal tolerance parameter `l`, which
10
+ smooths the binary ground-truth labels. The idea is to allow a flexible evaluation that tolerates
11
+ small misalignments in the detection of anomalies. The final score is computed by integrating
12
+ the ROC-AUC over different values of the tolerance parameter, from 0 to `window`, thus producing
13
+ a volume under the ROC surface.
14
+
15
+ Implementation of https://link.springer.com/article/10.1007/s10618-023-00988-8
16
+
17
+ For more information, see the original paper:
18
+ https://dl.acm.org/doi/10.14778/3551793.3551830
19
+
20
+ Parameters:
21
+ window (int):
22
+ Maximum temporal tolerance `l` used to smooth the evaluation.
23
+ Default is 4.
24
+ """
25
+ name = "vus_roc"
26
+ binary_prediction = False
27
+ param_schema = {
28
+ "window": {
29
+ "default": 4,
30
+ "type": int
31
+ }
32
+ }
33
+
34
+ def __init__(self, **kwargs):
35
+ super().__init__(name="vus_roc", **kwargs)
36
+
37
+ def _compute(self, y_true, y_anomaly_scores):
38
+ """
39
+ Calculate the VUS-ROC score.
40
+
41
+ Parameters:
42
+ y_true (np.array):
43
+ Ground-truth binary labels (0 = normal, 1 = anomaly).
44
+ y_anomaly_scores (np.array):
45
+ Anomaly scores for each time point.
46
+
47
+ Returns:
48
+ float: VUS-ROC score.
49
+ """
50
+
51
+ _, _, _, _, _, _, roc, _ = generate_curve(
52
+ y_true, y_anomaly_scores, self.params["window"]
53
+ )
54
+
55
+ return roc
@@ -0,0 +1,15 @@
1
+ from .AffiliationbasedFScore import AffiliationbasedFScore
2
+ from .TimeTolerantFScore import TimeTolerantFScore
3
+ from .VusPr import VusPr
4
+ from .VusRoc import VusRoc
5
+ from .PateFScore import PateFScore
6
+ from .Pate import Pate
7
+
8
+ __all__ = [
9
+ "AffiliationbasedFScore",
10
+ "TimeTolerantFScore",
11
+ "VusPr",
12
+ "VusRoc",
13
+ "PateFScore",
14
+ "Pate"
15
+ ]
@@ -1,15 +1,244 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  import math
4
- from .generics import _sum_wo_nan
5
- """
6
- In order to shorten the length of the variables,
7
- the general convention in this file is to let:
8
- - I for a predicted event (start, stop),
9
- - Is for a list of predicted events,
10
- - J for a ground truth event,
11
- - Js for a list of ground truth events.
12
- """
4
+ import glob
5
+ import os
6
+ import gzip
7
+ from itertools import groupby
8
+ from operator import itemgetter
9
+
10
+ #Generics
11
+ def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]):
12
+ """
13
+ Convert a binary vector (indicating 1 for the anomalous instances)
14
+ to a list of events. The events are considered as durations,
15
+ i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
16
+
17
+ :param vector: a list of elements belonging to {0, 1}
18
+ :return: a list of couples, each couple representing the start and stop of
19
+ each event
20
+ """
21
+ positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
22
+ events = []
23
+ for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]):
24
+ cur_cut = list(map(itemgetter(1), g))
25
+ events.append((cur_cut[0], cur_cut[-1]))
26
+
27
+ # Consistent conversion in case of range anomalies (for indexes):
28
+ # A positive index i is considered as the interval [i, i+1),
29
+ # so the last index should be moved by 1
30
+ events = [(x, y+1) for (x,y) in events]
31
+
32
+ return(events)
33
+
34
+
35
+ def infer_Trange(events_pred, events_gt):
36
+ """
37
+ Given the list of events events_pred and events_gt, get the
38
+ smallest possible Trange corresponding to the start and stop indexes
39
+ of the whole series.
40
+ Trange will not influence the measure of distances, but will impact the
41
+ measures of probabilities.
42
+
43
+ :param events_pred: a list of couples corresponding to predicted events
44
+ :param events_gt: a list of couples corresponding to ground truth events
45
+ :return: a couple corresponding to the smallest range containing the events
46
+ """
47
+ if len(events_gt) == 0:
48
+ raise ValueError('The gt events should contain at least one event')
49
+ if len(events_pred) == 0:
50
+ # empty prediction, base Trange only on events_gt (which is non empty)
51
+ return(infer_Trange(events_gt, events_gt))
52
+
53
+ min_pred = min([x[0] for x in events_pred])
54
+ min_gt = min([x[0] for x in events_gt])
55
+ max_pred = max([x[1] for x in events_pred])
56
+ max_gt = max([x[1] for x in events_gt])
57
+ Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
58
+ return(Trange)
59
+
60
+ def has_point_anomalies(events):
61
+ """
62
+ Checking whether events contain point anomalies, i.e.
63
+ events starting and stopping at the same time.
64
+
65
+ :param events: a list of couples corresponding to predicted events
66
+ :return: True is the events have any point anomalies, False otherwise
67
+ """
68
+ if len(events) == 0:
69
+ return(False)
70
+ return(min([x[1] - x[0] for x in events]) == 0)
71
+
72
+ def _sum_wo_nan(vec):
73
+ """
74
+ Sum of elements, ignoring math.isnan ones
75
+
76
+ :param vec: vector of floating numbers
77
+ :return: sum of the elements, ignoring math.isnan ones
78
+ """
79
+ vec_wo_nan = [e for e in vec if not math.isnan(e)]
80
+ return(sum(vec_wo_nan))
81
+
82
+ def _len_wo_nan(vec):
83
+ """
84
+ Count of elements, ignoring math.isnan ones
85
+
86
+ :param vec: vector of floating numbers
87
+ :return: count of the elements, ignoring math.isnan ones
88
+ """
89
+ vec_wo_nan = [e for e in vec if not math.isnan(e)]
90
+ return(len(vec_wo_nan))
91
+
92
+ def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'):
93
+ """
94
+ Load a file compressed with gz, such that each line of the
95
+ file is either 0 (representing a normal instance) or 1 (representing)
96
+ an anomalous instance.
97
+ :param filename: file path to the gz compressed file
98
+ :return: list of integers with either 0 or 1
99
+ """
100
+ with gzip.open(filename, 'rb') as f:
101
+ content = f.read().splitlines()
102
+ content = [int(x) for x in content]
103
+ return(content)
104
+
105
+ def read_all_as_events():
106
+ """
107
+ Load the files contained in the folder `data/` and convert
108
+ to events. The length of the series is kept.
109
+ The convention for the file name is: `dataset_algorithm.gz`
110
+ :return: two dictionaries:
111
+ - the first containing the list of events for each dataset and algorithm,
112
+ - the second containing the range of the series for each dataset
113
+ """
114
+ filepaths = glob.glob('data/*.gz')
115
+ datasets = dict()
116
+ Tranges = dict()
117
+ for filepath in filepaths:
118
+ vector = read_gz_data(filepath)
119
+ events = convert_vector_to_events(vector)
120
+ # ad hoc cut for those files
121
+ cut_filepath = (os.path.split(filepath)[1]).split('_')
122
+ data_name = cut_filepath[0]
123
+ algo_name = (cut_filepath[1]).split('.')[0]
124
+ if not data_name in datasets:
125
+ datasets[data_name] = dict()
126
+ Tranges[data_name] = (0, len(vector))
127
+ datasets[data_name][algo_name] = events
128
+ return(datasets, Tranges)
129
+
130
+
131
+
132
+ #Affiliation zone
133
+ def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
134
+ """
135
+ Helper for `E_gt_func`
136
+
137
+ :param j: index from 0 to len(Js) (included) on which to get the start
138
+ :param Js: ground truth events, as a list of couples
139
+ :param Trange: range of the series where Js is included
140
+ :return: generalized start such that the middle of t_start and t_stop
141
+ always gives the affiliation zone
142
+ """
143
+ b = max(Trange)
144
+ n = len(Js)
145
+ if j == n:
146
+ return(2*b - t_stop(n-1, Js, Trange))
147
+ else:
148
+ return(Js[j][0])
149
+
150
+
151
+ def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
152
+ """
153
+ Helper for `E_gt_func`
154
+
155
+ :param j: index from 0 to len(Js) (included) on which to get the stop
156
+ :param Js: ground truth events, as a list of couples
157
+ :param Trange: range of the series where Js is included
158
+ :return: generalized stop such that the middle of t_start and t_stop
159
+ always gives the affiliation zone
160
+ """
161
+ if j == -1:
162
+ a = min(Trange)
163
+ return(2*a - t_start(0, Js, Trange))
164
+ else:
165
+ return(Js[j][1])
166
+
167
+
168
+ def E_gt_func(j, Js, Trange):
169
+ """
170
+ Get the affiliation zone of element j of the ground truth
171
+
172
+ :param j: index from 0 to len(Js) (excluded) on which to get the zone
173
+ :param Js: ground truth events, as a list of couples
174
+ :param Trange: range of the series where Js is included, can
175
+ be (-math.inf, math.inf) for distance measures
176
+ :return: affiliation zone of element j of the ground truth represented
177
+ as a couple
178
+ """
179
+ range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2
180
+ range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2
181
+ return((range_left, range_right))
182
+
183
+ def get_all_E_gt_func(Js, Trange):
184
+ """
185
+ Get the affiliation partition from the ground truth point of view
186
+
187
+ :param Js: ground truth events, as a list of couples
188
+ :param Trange: range of the series where Js is included, can
189
+ be (-math.inf, math.inf) for distance measures
190
+ :return: affiliation partition of the events
191
+ """
192
+ # E_gt is the limit of affiliation/attraction for each ground truth event
193
+ E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
194
+ return(E_gt)
195
+
196
+
197
+ def interval_intersection(I = (1, 3), J = (2, 4)):
198
+ """
199
+ Intersection between two intervals I and J
200
+ I and J should be either empty or represent a positive interval (no point)
201
+
202
+ :param I: an interval represented by start and stop
203
+ :param J: a second interval of the same form
204
+ :return: an interval representing the start and stop of the intersection (or None if empty)
205
+ """
206
+ if I is None:
207
+ return(None)
208
+ if J is None:
209
+ return(None)
210
+
211
+ I_inter_J = (max(I[0], J[0]), min(I[1], J[1]))
212
+ if I_inter_J[0] >= I_inter_J[1]:
213
+ return(None)
214
+ else:
215
+ return(I_inter_J)
216
+
217
+
218
+ def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]):
219
+ """
220
+ Cut the events into the affiliation zones
221
+ The presentation given here is from the ground truth point of view,
222
+ but it is also used in the reversed direction in the main function.
223
+
224
+ :param Is: events as a list of couples
225
+ :param E_gt: range of the affiliation zones
226
+ :return: a list of list of intervals (each interval represented by either
227
+ a couple or None for empty interval). The outer list is indexed by each
228
+ affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`.
229
+ """
230
+ out = [None] * len(E_gt)
231
+ for j in range(len(E_gt)):
232
+ E_gt_j = E_gt[j]
233
+ discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E
234
+ discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
235
+ kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
236
+ Is_j = [x for x, y in zip(Is, kept_index)]
237
+ out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
238
+ return(out)
239
+
240
+ # Single ground truth event
241
+
13
242
 
14
243
  def interval_length(J = (1,2)):
15
244
  """
@@ -462,3 +691,142 @@ def integral_interval_probaCDF_recall(I, J, E):
462
691
  d_right = f(cut_into_three[2])
463
692
  # It's an integral so summable
464
693
  return(d_left + d_middle + d_right)
694
+
695
+
696
+ def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
697
+ """
698
+ Compute the individual precision probability from Is to a single ground truth J
699
+
700
+ :param Is: list of predicted events within the affiliation zone of J
701
+ :param J: couple representating the start and stop of a ground truth interval
702
+ :param E: couple representing the start and stop of the zone of affiliation of J
703
+ :return: individual precision probability in [0, 1], or math.nan if undefined
704
+ """
705
+ if all([I is None for I in Is]): # no prediction in the current area
706
+ return(math.nan) # undefined
707
+ return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
708
+
709
+ def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
710
+ """
711
+ Compute the individual recall probability from a single ground truth J to Is
712
+
713
+ :param Is: list of predicted events within the affiliation zone of J
714
+ :param J: couple representating the start and stop of a ground truth interval
715
+ :param E: couple representing the start and stop of the zone of affiliation of J
716
+ :return: individual recall probability in [0, 1]
717
+ """
718
+ Is = [I for I in Is if I is not None] # filter possible None in Is
719
+ if len(Is) == 0: # there is no prediction in the current area
720
+ return(0)
721
+ E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
722
+ Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
723
+ return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
724
+ def test_events(events):
725
+ """
726
+ Verify the validity of the input events
727
+ :param events: list of events, each represented by a couple (start, stop)
728
+ :return: None. Raise an error for incorrect formed or non ordered events
729
+ """
730
+ if type(events) is not list:
731
+ raise TypeError('Input `events` should be a list of couples')
732
+ if not all([type(x) is tuple for x in events]):
733
+ raise TypeError('Input `events` should be a list of tuples')
734
+ if not all([len(x) == 2 for x in events]):
735
+ raise ValueError('Input `events` should be a list of couples (start, stop)')
736
+ if not all([x[0] <= x[1] for x in events]):
737
+ raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
738
+ if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]):
739
+ raise ValueError('Couples of input `events` should be disjoint and ordered')
740
+
741
+ def pr_from_events(events_pred, events_gt, Trange):
742
+ """
743
+ Compute the affiliation metrics including the precision/recall in [0,1],
744
+ along with the individual precision/recall distances and probabilities
745
+
746
+ :param events_pred: list of predicted events, each represented by a couple
747
+ indicating the start and the stop of the event
748
+ :param events_gt: list of ground truth events, each represented by a couple
749
+ indicating the start and the stop of the event
750
+ :param Trange: range of the series where events_pred and events_gt are included,
751
+ represented as a couple (start, stop)
752
+ :return: dictionary with precision, recall, and the individual metrics
753
+ """
754
+ # testing the inputs
755
+ # test_events(events_pred)
756
+ # test_events(events_gt)
757
+
758
+ # other tests
759
+ minimal_Trange = infer_Trange(events_pred, events_gt)
760
+ if not Trange[0] <= minimal_Trange[0]:
761
+ raise ValueError('`Trange` should include all the events')
762
+ if not minimal_Trange[1] <= Trange[1]:
763
+ raise ValueError('`Trange` should include all the events')
764
+
765
+ if len(events_gt) == 0:
766
+ raise ValueError('Input `events_gt` should have at least one event')
767
+
768
+ if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
769
+ raise ValueError('Cannot manage point anomalies currently')
770
+
771
+ if Trange is None:
772
+ # Set as default, but Trange should be indicated if probabilities are used
773
+ raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
774
+
775
+ E_gt = get_all_E_gt_func(events_gt, Trange)
776
+ aff_partition = affiliation_partition(events_pred, E_gt)
777
+
778
+ # # Computing precision distance
779
+ # d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
780
+
781
+ # # Computing recall distance
782
+ # d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
783
+
784
+ # Computing precision
785
+ p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
786
+
787
+ # Computing recall
788
+ p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
789
+
790
+ if _len_wo_nan(p_precision) > 0:
791
+ p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
792
+ else:
793
+ p_precision_average = p_precision[0] # math.nan
794
+ p_recall_average = sum(p_recall) / len(p_recall)
795
+
796
+ dict_out = dict({'precision': p_precision_average,
797
+ 'recall': p_recall_average,
798
+ 'individual_precision_probabilities': p_precision,
799
+ 'individual_recall_probabilities': p_recall})
800
+ return(dict_out)
801
+
802
+ def produce_all_results():
803
+ """
804
+ Produce the affiliation precision/recall for all files
805
+ contained in the `data` repository
806
+ :return: a dictionary indexed by data names, each containing a dictionary
807
+ indexed by algorithm names, each containing the results of the affiliation
808
+ metrics (precision, recall, individual probabilities and distances)
809
+ """
810
+ datasets, Tranges = read_all_as_events() # read all the events in folder `data`
811
+ results = dict()
812
+ for data_name in datasets.keys():
813
+ results_data = dict()
814
+ for algo_name in datasets[data_name].keys():
815
+ if algo_name != 'groundtruth':
816
+ results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
817
+ datasets[data_name]['groundtruth'],
818
+ Tranges[data_name])
819
+ results[data_name] = results_data
820
+ return(results)
821
+
822
+
823
+ def reformat_segments(segments):
824
+ segments = include_end_of_segments(segments)
825
+ segments = tuplify_segments(segments)
826
+ return segments
827
+
828
+ def include_end_of_segments(segments):
829
+ return [[start, end + 1] for start, end in segments]
830
+
831
+ def tuplify_segments(segments):
832
+ return [tuple(segment) for segment in segments]