valor-lite 0.32.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/__init__.py +0 -0
- valor_lite/detection/__init__.py +56 -0
- valor_lite/detection/annotation.py +54 -0
- valor_lite/detection/computation.py +506 -0
- valor_lite/detection/manager.py +845 -0
- valor_lite/detection/metric.py +357 -0
- valor_lite/schemas.py +15 -0
- valor_lite-0.32.2a2.dist-info/LICENSE +21 -0
- valor_lite-0.32.2a2.dist-info/METADATA +40 -0
- valor_lite-0.32.2a2.dist-info/RECORD +12 -0
- valor_lite-0.32.2a2.dist-info/WHEEL +5 -0
- valor_lite-0.32.2a2.dist-info/top_level.txt +1 -0
valor_lite/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from .annotation import Bitmask, BoundingBox, Detection
|
|
2
|
+
from .computation import (
|
|
3
|
+
compute_detailed_pr_curve,
|
|
4
|
+
compute_iou,
|
|
5
|
+
compute_metrics,
|
|
6
|
+
compute_ranked_pairs,
|
|
7
|
+
)
|
|
8
|
+
from .manager import DataLoader, Evaluator
|
|
9
|
+
from .metric import (
|
|
10
|
+
AP,
|
|
11
|
+
AR,
|
|
12
|
+
F1,
|
|
13
|
+
Accuracy,
|
|
14
|
+
APAveragedOverIOUs,
|
|
15
|
+
ARAveragedOverScores,
|
|
16
|
+
Counts,
|
|
17
|
+
DetailedPrecisionRecallCurve,
|
|
18
|
+
DetailedPrecisionRecallPoint,
|
|
19
|
+
MetricType,
|
|
20
|
+
Precision,
|
|
21
|
+
PrecisionRecallCurve,
|
|
22
|
+
Recall,
|
|
23
|
+
mAP,
|
|
24
|
+
mAPAveragedOverIOUs,
|
|
25
|
+
mAR,
|
|
26
|
+
mARAveragedOverScores,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"Bitmask",
|
|
31
|
+
"BoundingBox",
|
|
32
|
+
"Detection",
|
|
33
|
+
"MetricType",
|
|
34
|
+
"Counts",
|
|
35
|
+
"Precision",
|
|
36
|
+
"Recall",
|
|
37
|
+
"Accuracy",
|
|
38
|
+
"F1",
|
|
39
|
+
"AP",
|
|
40
|
+
"mAP",
|
|
41
|
+
"AR",
|
|
42
|
+
"mAR",
|
|
43
|
+
"APAveragedOverIOUs",
|
|
44
|
+
"mAPAveragedOverIOUs",
|
|
45
|
+
"ARAveragedOverScores",
|
|
46
|
+
"mARAveragedOverScores",
|
|
47
|
+
"PrecisionRecallCurve",
|
|
48
|
+
"DetailedPrecisionRecallPoint",
|
|
49
|
+
"DetailedPrecisionRecallCurve",
|
|
50
|
+
"compute_iou",
|
|
51
|
+
"compute_ranked_pairs",
|
|
52
|
+
"compute_metrics",
|
|
53
|
+
"compute_detailed_pr_curve",
|
|
54
|
+
"DataLoader",
|
|
55
|
+
"Evaluator",
|
|
56
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from numpy.typing import NDArray
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class BoundingBox:
|
|
9
|
+
xmin: float
|
|
10
|
+
xmax: float
|
|
11
|
+
ymin: float
|
|
12
|
+
ymax: float
|
|
13
|
+
labels: list[tuple[str, str]]
|
|
14
|
+
scores: list[float] = field(default_factory=list)
|
|
15
|
+
|
|
16
|
+
def __post_init__(self):
|
|
17
|
+
if len(self.scores) > 0 and len(self.labels) != len(self.scores):
|
|
18
|
+
raise ValueError(
|
|
19
|
+
"If scores are defined, there must be a 1:1 pairing with labels."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def extrema(self) -> tuple[float, float, float, float]:
|
|
24
|
+
return (self.xmin, self.xmax, self.ymin, self.ymax)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Bitmask:
|
|
29
|
+
mask: NDArray[np.bool_]
|
|
30
|
+
labels: list[tuple[str, str]]
|
|
31
|
+
scores: list[float] = field(default_factory=list)
|
|
32
|
+
|
|
33
|
+
def __post_init__(self):
|
|
34
|
+
if len(self.scores) > 0 and len(self.labels) != len(self.scores):
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"If scores are defined, there must be a 1:1 pairing with labels."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def to_box(self) -> BoundingBox:
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Detection:
|
|
45
|
+
uid: str
|
|
46
|
+
groundtruths: list[BoundingBox]
|
|
47
|
+
predictions: list[BoundingBox]
|
|
48
|
+
|
|
49
|
+
def __post_init__(self):
|
|
50
|
+
for prediction in self.predictions:
|
|
51
|
+
if len(prediction.scores) != len(prediction.labels):
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"Predictions must provide a score for every label."
|
|
54
|
+
)
|
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.typing import NDArray
|
|
3
|
+
|
|
4
|
+
# datum id 0
|
|
5
|
+
# gt 1
|
|
6
|
+
# pd 2
|
|
7
|
+
# iou 3
|
|
8
|
+
# gt label 4
|
|
9
|
+
# pd label 5
|
|
10
|
+
# score 6
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
14
|
+
|
|
15
|
+
xmin1, xmax1, ymin1, ymax1 = (
|
|
16
|
+
data[:, 0],
|
|
17
|
+
data[:, 1],
|
|
18
|
+
data[:, 2],
|
|
19
|
+
data[:, 3],
|
|
20
|
+
)
|
|
21
|
+
xmin2, xmax2, ymin2, ymax2 = (
|
|
22
|
+
data[:, 4],
|
|
23
|
+
data[:, 5],
|
|
24
|
+
data[:, 6],
|
|
25
|
+
data[:, 7],
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
xmin = np.maximum(xmin1, xmin2)
|
|
29
|
+
ymin = np.maximum(ymin1, ymin2)
|
|
30
|
+
xmax = np.minimum(xmax1, xmax2)
|
|
31
|
+
ymax = np.minimum(ymax1, ymax2)
|
|
32
|
+
|
|
33
|
+
intersection_width = np.maximum(0, xmax - xmin)
|
|
34
|
+
intersection_height = np.maximum(0, ymax - ymin)
|
|
35
|
+
intersection_area = intersection_width * intersection_height
|
|
36
|
+
|
|
37
|
+
area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
|
|
38
|
+
area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
|
|
39
|
+
|
|
40
|
+
union_area = area1 + area2 - intersection_area
|
|
41
|
+
|
|
42
|
+
iou = np.zeros(data.shape[0])
|
|
43
|
+
valid_union_mask = union_area >= 1e-9
|
|
44
|
+
iou[valid_union_mask] = (
|
|
45
|
+
intersection_area[valid_union_mask] / union_area[valid_union_mask]
|
|
46
|
+
)
|
|
47
|
+
return iou
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _compute_ranked_pairs_for_datum(
|
|
51
|
+
data: np.ndarray,
|
|
52
|
+
label_counts: np.ndarray,
|
|
53
|
+
) -> np.ndarray:
|
|
54
|
+
"""
|
|
55
|
+
Computes ranked pairs for a datum.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# remove null predictions
|
|
59
|
+
data = data[data[:, 2] >= 0.0]
|
|
60
|
+
|
|
61
|
+
# sort by gt_id, iou, score
|
|
62
|
+
indices = np.lexsort(
|
|
63
|
+
(
|
|
64
|
+
data[:, 1],
|
|
65
|
+
-data[:, 3],
|
|
66
|
+
-data[:, 6],
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
data = data[indices]
|
|
70
|
+
|
|
71
|
+
# remove ignored predictions
|
|
72
|
+
for label_idx, count in enumerate(label_counts[:, 0]):
|
|
73
|
+
if count > 0:
|
|
74
|
+
continue
|
|
75
|
+
data = data[data[:, 5] != label_idx]
|
|
76
|
+
|
|
77
|
+
# only keep the highest ranked pair
|
|
78
|
+
_, indices = np.unique(data[:, [0, 2, 5]], axis=0, return_index=True)
|
|
79
|
+
|
|
80
|
+
# np.unique orders its results by value, we need to sort the indices to maintain the results of the lexsort
|
|
81
|
+
data = data[indices, :]
|
|
82
|
+
|
|
83
|
+
return data
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def compute_ranked_pairs(
|
|
87
|
+
data: list[NDArray[np.floating]],
|
|
88
|
+
label_counts: NDArray[np.integer],
|
|
89
|
+
) -> NDArray[np.floating]:
|
|
90
|
+
pairs = np.concatenate(
|
|
91
|
+
[
|
|
92
|
+
_compute_ranked_pairs_for_datum(
|
|
93
|
+
datum,
|
|
94
|
+
label_counts=label_counts,
|
|
95
|
+
)
|
|
96
|
+
for datum in data
|
|
97
|
+
],
|
|
98
|
+
axis=0,
|
|
99
|
+
)
|
|
100
|
+
indices = np.lexsort(
|
|
101
|
+
(
|
|
102
|
+
-pairs[:, 3], # iou
|
|
103
|
+
-pairs[:, 6], # score
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
return pairs[indices]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def compute_metrics(
|
|
110
|
+
data: np.ndarray,
|
|
111
|
+
label_counts: np.ndarray,
|
|
112
|
+
iou_thresholds: np.ndarray,
|
|
113
|
+
score_thresholds: np.ndarray,
|
|
114
|
+
) -> tuple[
|
|
115
|
+
tuple[
|
|
116
|
+
NDArray[np.floating],
|
|
117
|
+
NDArray[np.floating],
|
|
118
|
+
NDArray[np.floating],
|
|
119
|
+
NDArray[np.floating],
|
|
120
|
+
],
|
|
121
|
+
tuple[
|
|
122
|
+
NDArray[np.floating],
|
|
123
|
+
NDArray[np.floating],
|
|
124
|
+
NDArray[np.floating],
|
|
125
|
+
NDArray[np.floating],
|
|
126
|
+
],
|
|
127
|
+
NDArray[np.floating],
|
|
128
|
+
NDArray[np.floating],
|
|
129
|
+
]:
|
|
130
|
+
"""
|
|
131
|
+
Computes Object Detection metrics.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
tuple[NDArray, NDArray, NDArray NDArray]
|
|
136
|
+
Average Precision results.
|
|
137
|
+
tuple[NDArray, NDArray, NDArray NDArray]
|
|
138
|
+
Average Recall results.
|
|
139
|
+
np.ndarray
|
|
140
|
+
Precision, Recall, TP, FP, FN, F1 Score, Accuracy.
|
|
141
|
+
np.ndarray
|
|
142
|
+
Interpolated Precision-Recall Curves.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
n_rows = data.shape[0]
|
|
146
|
+
n_labels = label_counts.shape[0]
|
|
147
|
+
n_ious = iou_thresholds.shape[0]
|
|
148
|
+
n_scores = score_thresholds.shape[0]
|
|
149
|
+
|
|
150
|
+
average_precision = np.zeros((n_ious, n_labels))
|
|
151
|
+
average_recall = np.zeros((n_scores, n_labels))
|
|
152
|
+
precision_recall = np.zeros((n_ious, n_scores, n_labels, 7))
|
|
153
|
+
|
|
154
|
+
pd_labels = data[:, 5].astype(int)
|
|
155
|
+
unique_pd_labels = np.unique(pd_labels)
|
|
156
|
+
gt_count = label_counts[:, 0]
|
|
157
|
+
running_total_count = np.zeros(
|
|
158
|
+
(n_ious, n_rows),
|
|
159
|
+
dtype=np.float64,
|
|
160
|
+
)
|
|
161
|
+
running_tp_count = np.zeros_like(running_total_count)
|
|
162
|
+
running_gt_count = np.zeros_like(running_total_count)
|
|
163
|
+
pr_curve = np.zeros((n_ious, n_labels, 101))
|
|
164
|
+
|
|
165
|
+
mask_score_nonzero = data[:, 6] > 1e-9
|
|
166
|
+
mask_gt_exists = data[:, 1] >= 0.0
|
|
167
|
+
mask_labels_match = np.isclose(data[:, 4], data[:, 5])
|
|
168
|
+
|
|
169
|
+
mask_gt_exists_labels_match = mask_gt_exists & mask_labels_match
|
|
170
|
+
|
|
171
|
+
mask_tp = mask_score_nonzero & mask_gt_exists_labels_match
|
|
172
|
+
mask_fp = mask_score_nonzero
|
|
173
|
+
mask_fn = mask_gt_exists_labels_match
|
|
174
|
+
|
|
175
|
+
for iou_idx in range(n_ious):
|
|
176
|
+
mask_iou = data[:, 3] >= iou_thresholds[iou_idx]
|
|
177
|
+
|
|
178
|
+
mask_tp_outer = mask_tp & mask_iou
|
|
179
|
+
mask_fp_outer = mask_fp & (
|
|
180
|
+
(~mask_gt_exists_labels_match & mask_iou) | ~mask_iou
|
|
181
|
+
)
|
|
182
|
+
mask_fn_outer = mask_fn & mask_iou
|
|
183
|
+
|
|
184
|
+
for score_idx in range(n_scores):
|
|
185
|
+
mask_score_thresh = data[:, 6] >= score_thresholds[score_idx]
|
|
186
|
+
|
|
187
|
+
mask_tp_inner = mask_tp_outer & mask_score_thresh
|
|
188
|
+
mask_fp_inner = mask_fp_outer & mask_score_thresh
|
|
189
|
+
mask_fn_inner = mask_fn_outer & ~mask_score_thresh
|
|
190
|
+
|
|
191
|
+
# create true-positive mask score threshold
|
|
192
|
+
tp_candidates = data[mask_tp_inner]
|
|
193
|
+
_, indices_gt_unique = np.unique(
|
|
194
|
+
tp_candidates[:, [0, 1, 4]], axis=0, return_index=True
|
|
195
|
+
)
|
|
196
|
+
mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=bool)
|
|
197
|
+
mask_gt_unique[indices_gt_unique] = True
|
|
198
|
+
true_positives_mask = np.zeros(n_rows, dtype=bool)
|
|
199
|
+
true_positives_mask[mask_tp_inner] = mask_gt_unique
|
|
200
|
+
|
|
201
|
+
# calculate intermediates
|
|
202
|
+
pd_count = np.bincount(pd_labels, minlength=n_labels).astype(float)
|
|
203
|
+
tp_count = np.bincount(
|
|
204
|
+
pd_labels,
|
|
205
|
+
weights=true_positives_mask,
|
|
206
|
+
minlength=n_labels,
|
|
207
|
+
).astype(float)
|
|
208
|
+
|
|
209
|
+
fp_count = np.bincount(
|
|
210
|
+
pd_labels[mask_fp_inner],
|
|
211
|
+
minlength=n_labels,
|
|
212
|
+
).astype(float)
|
|
213
|
+
|
|
214
|
+
fn_count = np.bincount(
|
|
215
|
+
pd_labels[mask_fn_inner],
|
|
216
|
+
minlength=n_labels,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# calculate component metrics
|
|
220
|
+
recall = np.zeros_like(tp_count)
|
|
221
|
+
precision = np.zeros_like(tp_count)
|
|
222
|
+
np.divide(tp_count, gt_count, where=gt_count > 1e-9, out=recall)
|
|
223
|
+
np.divide(tp_count, pd_count, where=pd_count > 1e-9, out=precision)
|
|
224
|
+
fn_count = gt_count - tp_count
|
|
225
|
+
|
|
226
|
+
f1_score = np.zeros_like(precision)
|
|
227
|
+
np.divide(
|
|
228
|
+
np.multiply(precision, recall),
|
|
229
|
+
(precision + recall),
|
|
230
|
+
where=(precision + recall) > 1e-9,
|
|
231
|
+
out=f1_score,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
accuracy = np.zeros_like(tp_count)
|
|
235
|
+
np.divide(
|
|
236
|
+
tp_count,
|
|
237
|
+
(gt_count + pd_count),
|
|
238
|
+
where=(gt_count + pd_count) > 1e-9,
|
|
239
|
+
out=accuracy,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
precision_recall[iou_idx][score_idx] = np.concatenate(
|
|
243
|
+
(
|
|
244
|
+
tp_count[:, np.newaxis],
|
|
245
|
+
fp_count[:, np.newaxis],
|
|
246
|
+
fn_count[:, np.newaxis],
|
|
247
|
+
precision[:, np.newaxis],
|
|
248
|
+
recall[:, np.newaxis],
|
|
249
|
+
f1_score[:, np.newaxis],
|
|
250
|
+
accuracy[:, np.newaxis],
|
|
251
|
+
),
|
|
252
|
+
axis=1,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# calculate recall for AR
|
|
256
|
+
average_recall[score_idx] += recall
|
|
257
|
+
|
|
258
|
+
# create true-positive mask score threshold
|
|
259
|
+
tp_candidates = data[mask_tp_outer]
|
|
260
|
+
_, indices_gt_unique = np.unique(
|
|
261
|
+
tp_candidates[:, [0, 1, 4]], axis=0, return_index=True
|
|
262
|
+
)
|
|
263
|
+
mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=bool)
|
|
264
|
+
mask_gt_unique[indices_gt_unique] = True
|
|
265
|
+
true_positives_mask = np.zeros(n_rows, dtype=bool)
|
|
266
|
+
true_positives_mask[mask_tp_outer] = mask_gt_unique
|
|
267
|
+
|
|
268
|
+
# count running tp and total for AP
|
|
269
|
+
for pd_label in unique_pd_labels:
|
|
270
|
+
mask_pd_label = pd_labels == pd_label
|
|
271
|
+
running_gt_count[iou_idx][mask_pd_label] = gt_count[pd_label]
|
|
272
|
+
running_total_count[iou_idx][mask_pd_label] = np.arange(
|
|
273
|
+
1, mask_pd_label.sum() + 1
|
|
274
|
+
)
|
|
275
|
+
mask_tp_for_counting = mask_pd_label & true_positives_mask
|
|
276
|
+
running_tp_count[iou_idx][mask_tp_for_counting] = np.arange(
|
|
277
|
+
1, mask_tp_for_counting.sum() + 1
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# calculate running precision-recall points for AP
|
|
281
|
+
precision = np.zeros_like(running_total_count)
|
|
282
|
+
np.divide(
|
|
283
|
+
running_tp_count,
|
|
284
|
+
running_total_count,
|
|
285
|
+
where=running_total_count > 1e-9,
|
|
286
|
+
out=precision,
|
|
287
|
+
)
|
|
288
|
+
recall = np.zeros_like(running_total_count)
|
|
289
|
+
np.divide(
|
|
290
|
+
running_tp_count,
|
|
291
|
+
running_gt_count,
|
|
292
|
+
where=running_gt_count > 1e-9,
|
|
293
|
+
out=recall,
|
|
294
|
+
)
|
|
295
|
+
recall_index = np.floor(recall * 100.0).astype(int)
|
|
296
|
+
for iou_idx in range(n_ious):
|
|
297
|
+
p = precision[iou_idx]
|
|
298
|
+
r = recall_index[iou_idx]
|
|
299
|
+
pr_curve[iou_idx, pd_labels, r] = np.maximum(
|
|
300
|
+
pr_curve[iou_idx, pd_labels, r], p
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# calculate average precision
|
|
304
|
+
running_max = np.zeros((n_ious, n_labels))
|
|
305
|
+
for recall in range(100, -1, -1):
|
|
306
|
+
precision = pr_curve[:, :, recall]
|
|
307
|
+
running_max = np.maximum(precision, running_max)
|
|
308
|
+
average_precision += running_max
|
|
309
|
+
pr_curve[:, :, recall] = running_max
|
|
310
|
+
average_precision = average_precision / 101.0
|
|
311
|
+
|
|
312
|
+
# calculate average recall
|
|
313
|
+
average_recall /= n_ious
|
|
314
|
+
|
|
315
|
+
# calculate mAP and mAR
|
|
316
|
+
label_key_mapping = label_counts[unique_pd_labels, 2]
|
|
317
|
+
label_keys = np.unique(label_counts[:, 2])
|
|
318
|
+
mAP = np.ones((n_ious, label_keys.shape[0])) * -1.0
|
|
319
|
+
mAR = np.ones((n_scores, label_keys.shape[0])) * -1.0
|
|
320
|
+
for key in np.unique(label_key_mapping):
|
|
321
|
+
labels = unique_pd_labels[label_key_mapping == key]
|
|
322
|
+
key_idx = int(key)
|
|
323
|
+
mAP[:, key_idx] = average_precision[:, labels].mean(axis=1)
|
|
324
|
+
mAR[:, key_idx] = average_recall[:, labels].mean(axis=1)
|
|
325
|
+
|
|
326
|
+
# calculate AP and mAP averaged over iou thresholds
|
|
327
|
+
APAveragedOverIoUs = average_precision.mean(axis=0)
|
|
328
|
+
mAPAveragedOverIoUs = mAP.mean(axis=0)
|
|
329
|
+
|
|
330
|
+
# calculate AR and mAR averaged over score thresholds
|
|
331
|
+
ARAveragedOverIoUs = average_recall.mean(axis=0)
|
|
332
|
+
mARAveragedOverIoUs = mAR.mean(axis=0)
|
|
333
|
+
|
|
334
|
+
ap_results = (
|
|
335
|
+
average_precision,
|
|
336
|
+
mAP,
|
|
337
|
+
APAveragedOverIoUs,
|
|
338
|
+
mAPAveragedOverIoUs,
|
|
339
|
+
)
|
|
340
|
+
ar_results = (
|
|
341
|
+
average_recall,
|
|
342
|
+
mAR,
|
|
343
|
+
ARAveragedOverIoUs,
|
|
344
|
+
mARAveragedOverIoUs,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
return (
|
|
348
|
+
ap_results,
|
|
349
|
+
ar_results,
|
|
350
|
+
precision_recall,
|
|
351
|
+
pr_curve,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def compute_detailed_pr_curve(
|
|
356
|
+
data: np.ndarray,
|
|
357
|
+
label_counts: np.ndarray,
|
|
358
|
+
iou_thresholds: np.ndarray,
|
|
359
|
+
score_thresholds: np.ndarray,
|
|
360
|
+
n_samples: int,
|
|
361
|
+
) -> np.ndarray:
|
|
362
|
+
|
|
363
|
+
"""
|
|
364
|
+
0 label
|
|
365
|
+
1 tp
|
|
366
|
+
...
|
|
367
|
+
2 fp - 1
|
|
368
|
+
3 fp - 2
|
|
369
|
+
4 fn - misclassification
|
|
370
|
+
5 fn - hallucination
|
|
371
|
+
"""
|
|
372
|
+
|
|
373
|
+
n_labels = label_counts.shape[0]
|
|
374
|
+
n_ious = iou_thresholds.shape[0]
|
|
375
|
+
n_scores = score_thresholds.shape[0]
|
|
376
|
+
n_metrics = 5 * (n_samples + 1)
|
|
377
|
+
|
|
378
|
+
tp_idx = 0
|
|
379
|
+
fp_misclf_idx = tp_idx + n_samples + 1
|
|
380
|
+
fp_halluc_idx = fp_misclf_idx + n_samples + 1
|
|
381
|
+
fn_misclf_idx = fp_halluc_idx + n_samples + 1
|
|
382
|
+
fn_misprd_idx = fn_misclf_idx + n_samples + 1
|
|
383
|
+
|
|
384
|
+
detailed_pr_curve = np.ones((n_ious, n_scores, n_labels, n_metrics)) * -1.0
|
|
385
|
+
|
|
386
|
+
mask_gt_exists = data[:, 1] > -0.5
|
|
387
|
+
mask_pd_exists = data[:, 2] > -0.5
|
|
388
|
+
mask_label_match = np.isclose(data[:, 4], data[:, 5])
|
|
389
|
+
|
|
390
|
+
mask_gt_pd_exists = mask_gt_exists & mask_pd_exists
|
|
391
|
+
mask_gt_pd_match = mask_gt_pd_exists & mask_label_match
|
|
392
|
+
mask_gt_pd_mismatch = mask_gt_pd_exists & ~mask_label_match
|
|
393
|
+
|
|
394
|
+
for iou_idx in range(n_ious):
|
|
395
|
+
mask_iou = data[:, 3] >= iou_thresholds[iou_idx]
|
|
396
|
+
mask_gt_pd_match_iou = mask_gt_pd_match & mask_iou
|
|
397
|
+
mask_gt_pd_mismatch_iou = mask_gt_pd_mismatch & mask_iou
|
|
398
|
+
for score_idx in range(n_scores):
|
|
399
|
+
mask_score = data[:, 6] >= score_thresholds[score_idx]
|
|
400
|
+
mask_tp = mask_gt_pd_match_iou & mask_score
|
|
401
|
+
mask_fp_misclf = mask_gt_pd_mismatch_iou & mask_score
|
|
402
|
+
mask_fn_misclf = mask_gt_pd_match_iou & ~mask_score
|
|
403
|
+
mask_halluc_missing = ~(
|
|
404
|
+
mask_gt_pd_match_iou | (mask_gt_pd_mismatch & mask_score)
|
|
405
|
+
)
|
|
406
|
+
mask_fp_halluc = mask_halluc_missing & mask_pd_exists
|
|
407
|
+
mask_fn_misprd = mask_halluc_missing & mask_gt_exists
|
|
408
|
+
|
|
409
|
+
tp_slice = data[mask_tp]
|
|
410
|
+
fp_misclf_slice = data[mask_fp_misclf]
|
|
411
|
+
fp_halluc_slice = data[mask_fp_halluc]
|
|
412
|
+
fn_misclf_slice = data[mask_fn_misclf]
|
|
413
|
+
fn_misprd_slice = data[mask_fn_misprd]
|
|
414
|
+
|
|
415
|
+
tp_count = np.bincount(
|
|
416
|
+
tp_slice[:, 5].astype(int), minlength=n_labels
|
|
417
|
+
)
|
|
418
|
+
fp_misclf_count = np.bincount(
|
|
419
|
+
fp_misclf_slice[:, 5].astype(int), minlength=n_labels
|
|
420
|
+
)
|
|
421
|
+
fp_halluc_count = np.bincount(
|
|
422
|
+
fp_halluc_slice[:, 5].astype(int), minlength=n_labels
|
|
423
|
+
)
|
|
424
|
+
fn_misclf_count = np.bincount(
|
|
425
|
+
fn_misclf_slice[:, 4].astype(int), minlength=n_labels
|
|
426
|
+
)
|
|
427
|
+
fn_misprd_count = np.bincount(
|
|
428
|
+
fn_misprd_slice[:, 4].astype(int), minlength=n_labels
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
detailed_pr_curve[iou_idx, score_idx, :, tp_idx] = tp_count
|
|
432
|
+
detailed_pr_curve[
|
|
433
|
+
iou_idx, score_idx, :, fp_misclf_idx
|
|
434
|
+
] = fp_misclf_count
|
|
435
|
+
detailed_pr_curve[
|
|
436
|
+
iou_idx, score_idx, :, fp_halluc_idx
|
|
437
|
+
] = fp_halluc_count
|
|
438
|
+
detailed_pr_curve[
|
|
439
|
+
iou_idx, score_idx, :, fn_misclf_idx
|
|
440
|
+
] = fn_misclf_count
|
|
441
|
+
detailed_pr_curve[
|
|
442
|
+
iou_idx, score_idx, :, fn_misprd_idx
|
|
443
|
+
] = fn_misprd_count
|
|
444
|
+
|
|
445
|
+
if n_samples > 0:
|
|
446
|
+
for label_idx in range(n_labels):
|
|
447
|
+
tp_examples = tp_slice[
|
|
448
|
+
tp_slice[:, 5].astype(int) == label_idx
|
|
449
|
+
][:n_samples, 0]
|
|
450
|
+
fp_misclf_examples = fp_misclf_slice[
|
|
451
|
+
fp_misclf_slice[:, 5].astype(int) == label_idx
|
|
452
|
+
][:n_samples, 0]
|
|
453
|
+
fp_halluc_examples = fp_halluc_slice[
|
|
454
|
+
fp_halluc_slice[:, 5].astype(int) == label_idx
|
|
455
|
+
][:n_samples, 0]
|
|
456
|
+
fn_misclf_examples = fn_misclf_slice[
|
|
457
|
+
fn_misclf_slice[:, 4].astype(int) == label_idx
|
|
458
|
+
][:n_samples, 0]
|
|
459
|
+
fn_misprd_examples = fn_misprd_slice[
|
|
460
|
+
fn_misprd_slice[:, 4].astype(int) == label_idx
|
|
461
|
+
][:n_samples, 0]
|
|
462
|
+
|
|
463
|
+
detailed_pr_curve[
|
|
464
|
+
iou_idx,
|
|
465
|
+
score_idx,
|
|
466
|
+
label_idx,
|
|
467
|
+
tp_idx + 1 : tp_idx + 1 + tp_examples.shape[0],
|
|
468
|
+
] = tp_examples
|
|
469
|
+
detailed_pr_curve[
|
|
470
|
+
iou_idx,
|
|
471
|
+
score_idx,
|
|
472
|
+
label_idx,
|
|
473
|
+
fp_misclf_idx
|
|
474
|
+
+ 1 : fp_misclf_idx
|
|
475
|
+
+ 1
|
|
476
|
+
+ fp_misclf_examples.shape[0],
|
|
477
|
+
] = fp_misclf_examples
|
|
478
|
+
detailed_pr_curve[
|
|
479
|
+
iou_idx,
|
|
480
|
+
score_idx,
|
|
481
|
+
label_idx,
|
|
482
|
+
fp_halluc_idx
|
|
483
|
+
+ 1 : fp_halluc_idx
|
|
484
|
+
+ 1
|
|
485
|
+
+ fp_halluc_examples.shape[0],
|
|
486
|
+
] = fp_halluc_examples
|
|
487
|
+
detailed_pr_curve[
|
|
488
|
+
iou_idx,
|
|
489
|
+
score_idx,
|
|
490
|
+
label_idx,
|
|
491
|
+
fn_misclf_idx
|
|
492
|
+
+ 1 : fn_misclf_idx
|
|
493
|
+
+ 1
|
|
494
|
+
+ fn_misclf_examples.shape[0],
|
|
495
|
+
] = fn_misclf_examples
|
|
496
|
+
detailed_pr_curve[
|
|
497
|
+
iou_idx,
|
|
498
|
+
score_idx,
|
|
499
|
+
label_idx,
|
|
500
|
+
fn_misprd_idx
|
|
501
|
+
+ 1 : fn_misprd_idx
|
|
502
|
+
+ 1
|
|
503
|
+
+ fn_misprd_examples.shape[0],
|
|
504
|
+
] = fn_misprd_examples
|
|
505
|
+
|
|
506
|
+
return detailed_pr_curve
|