valor-lite 0.32.2a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

valor_lite/__init__.py ADDED
File without changes
@@ -0,0 +1,56 @@
1
+ from .annotation import Bitmask, BoundingBox, Detection
2
+ from .computation import (
3
+ compute_detailed_pr_curve,
4
+ compute_iou,
5
+ compute_metrics,
6
+ compute_ranked_pairs,
7
+ )
8
+ from .manager import DataLoader, Evaluator
9
+ from .metric import (
10
+ AP,
11
+ AR,
12
+ F1,
13
+ Accuracy,
14
+ APAveragedOverIOUs,
15
+ ARAveragedOverScores,
16
+ Counts,
17
+ DetailedPrecisionRecallCurve,
18
+ DetailedPrecisionRecallPoint,
19
+ MetricType,
20
+ Precision,
21
+ PrecisionRecallCurve,
22
+ Recall,
23
+ mAP,
24
+ mAPAveragedOverIOUs,
25
+ mAR,
26
+ mARAveragedOverScores,
27
+ )
28
+
29
+ __all__ = [
30
+ "Bitmask",
31
+ "BoundingBox",
32
+ "Detection",
33
+ "MetricType",
34
+ "Counts",
35
+ "Precision",
36
+ "Recall",
37
+ "Accuracy",
38
+ "F1",
39
+ "AP",
40
+ "mAP",
41
+ "AR",
42
+ "mAR",
43
+ "APAveragedOverIOUs",
44
+ "mAPAveragedOverIOUs",
45
+ "ARAveragedOverScores",
46
+ "mARAveragedOverScores",
47
+ "PrecisionRecallCurve",
48
+ "DetailedPrecisionRecallPoint",
49
+ "DetailedPrecisionRecallCurve",
50
+ "compute_iou",
51
+ "compute_ranked_pairs",
52
+ "compute_metrics",
53
+ "compute_detailed_pr_curve",
54
+ "DataLoader",
55
+ "Evaluator",
56
+ ]
@@ -0,0 +1,54 @@
1
+ from dataclasses import dataclass, field
2
+
3
+ import numpy as np
4
+ from numpy.typing import NDArray
5
+
6
+
7
+ @dataclass
8
+ class BoundingBox:
9
+ xmin: float
10
+ xmax: float
11
+ ymin: float
12
+ ymax: float
13
+ labels: list[tuple[str, str]]
14
+ scores: list[float] = field(default_factory=list)
15
+
16
+ def __post_init__(self):
17
+ if len(self.scores) > 0 and len(self.labels) != len(self.scores):
18
+ raise ValueError(
19
+ "If scores are defined, there must be a 1:1 pairing with labels."
20
+ )
21
+
22
+ @property
23
+ def extrema(self) -> tuple[float, float, float, float]:
24
+ return (self.xmin, self.xmax, self.ymin, self.ymax)
25
+
26
+
27
+ @dataclass
28
+ class Bitmask:
29
+ mask: NDArray[np.bool_]
30
+ labels: list[tuple[str, str]]
31
+ scores: list[float] = field(default_factory=list)
32
+
33
+ def __post_init__(self):
34
+ if len(self.scores) > 0 and len(self.labels) != len(self.scores):
35
+ raise ValueError(
36
+ "If scores are defined, there must be a 1:1 pairing with labels."
37
+ )
38
+
39
+ def to_box(self) -> BoundingBox:
40
+ raise NotImplementedError
41
+
42
+
43
+ @dataclass
44
+ class Detection:
45
+ uid: str
46
+ groundtruths: list[BoundingBox]
47
+ predictions: list[BoundingBox]
48
+
49
+ def __post_init__(self):
50
+ for prediction in self.predictions:
51
+ if len(prediction.scores) != len(prediction.labels):
52
+ raise ValueError(
53
+ "Predictions must provide a score for every label."
54
+ )
@@ -0,0 +1,506 @@
1
+ import numpy as np
2
+ from numpy.typing import NDArray
3
+
4
+ # datum id 0
5
+ # gt 1
6
+ # pd 2
7
+ # iou 3
8
+ # gt label 4
9
+ # pd label 5
10
+ # score 6
11
+
12
+
13
+ def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
14
+
15
+ xmin1, xmax1, ymin1, ymax1 = (
16
+ data[:, 0],
17
+ data[:, 1],
18
+ data[:, 2],
19
+ data[:, 3],
20
+ )
21
+ xmin2, xmax2, ymin2, ymax2 = (
22
+ data[:, 4],
23
+ data[:, 5],
24
+ data[:, 6],
25
+ data[:, 7],
26
+ )
27
+
28
+ xmin = np.maximum(xmin1, xmin2)
29
+ ymin = np.maximum(ymin1, ymin2)
30
+ xmax = np.minimum(xmax1, xmax2)
31
+ ymax = np.minimum(ymax1, ymax2)
32
+
33
+ intersection_width = np.maximum(0, xmax - xmin)
34
+ intersection_height = np.maximum(0, ymax - ymin)
35
+ intersection_area = intersection_width * intersection_height
36
+
37
+ area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
38
+ area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
39
+
40
+ union_area = area1 + area2 - intersection_area
41
+
42
+ iou = np.zeros(data.shape[0])
43
+ valid_union_mask = union_area >= 1e-9
44
+ iou[valid_union_mask] = (
45
+ intersection_area[valid_union_mask] / union_area[valid_union_mask]
46
+ )
47
+ return iou
48
+
49
+
50
+ def _compute_ranked_pairs_for_datum(
51
+ data: np.ndarray,
52
+ label_counts: np.ndarray,
53
+ ) -> np.ndarray:
54
+ """
55
+ Computes ranked pairs for a datum.
56
+ """
57
+
58
+ # remove null predictions
59
+ data = data[data[:, 2] >= 0.0]
60
+
61
+ # sort by gt_id, iou, score
62
+ indices = np.lexsort(
63
+ (
64
+ data[:, 1],
65
+ -data[:, 3],
66
+ -data[:, 6],
67
+ )
68
+ )
69
+ data = data[indices]
70
+
71
+ # remove ignored predictions
72
+ for label_idx, count in enumerate(label_counts[:, 0]):
73
+ if count > 0:
74
+ continue
75
+ data = data[data[:, 5] != label_idx]
76
+
77
+ # only keep the highest ranked pair
78
+ _, indices = np.unique(data[:, [0, 2, 5]], axis=0, return_index=True)
79
+
80
+ # np.unique orders its results by value, we need to sort the indices to maintain the results of the lexsort
81
+ data = data[indices, :]
82
+
83
+ return data
84
+
85
+
86
+ def compute_ranked_pairs(
87
+ data: list[NDArray[np.floating]],
88
+ label_counts: NDArray[np.integer],
89
+ ) -> NDArray[np.floating]:
90
+ pairs = np.concatenate(
91
+ [
92
+ _compute_ranked_pairs_for_datum(
93
+ datum,
94
+ label_counts=label_counts,
95
+ )
96
+ for datum in data
97
+ ],
98
+ axis=0,
99
+ )
100
+ indices = np.lexsort(
101
+ (
102
+ -pairs[:, 3], # iou
103
+ -pairs[:, 6], # score
104
+ )
105
+ )
106
+ return pairs[indices]
107
+
108
+
109
+ def compute_metrics(
110
+ data: np.ndarray,
111
+ label_counts: np.ndarray,
112
+ iou_thresholds: np.ndarray,
113
+ score_thresholds: np.ndarray,
114
+ ) -> tuple[
115
+ tuple[
116
+ NDArray[np.floating],
117
+ NDArray[np.floating],
118
+ NDArray[np.floating],
119
+ NDArray[np.floating],
120
+ ],
121
+ tuple[
122
+ NDArray[np.floating],
123
+ NDArray[np.floating],
124
+ NDArray[np.floating],
125
+ NDArray[np.floating],
126
+ ],
127
+ NDArray[np.floating],
128
+ NDArray[np.floating],
129
+ ]:
130
+ """
131
+ Computes Object Detection metrics.
132
+
133
+ Returns
134
+ -------
135
+ tuple[NDArray, NDArray, NDArray NDArray]
136
+ Average Precision results.
137
+ tuple[NDArray, NDArray, NDArray NDArray]
138
+ Average Recall results.
139
+ np.ndarray
140
+ Precision, Recall, TP, FP, FN, F1 Score, Accuracy.
141
+ np.ndarray
142
+ Interpolated Precision-Recall Curves.
143
+ """
144
+
145
+ n_rows = data.shape[0]
146
+ n_labels = label_counts.shape[0]
147
+ n_ious = iou_thresholds.shape[0]
148
+ n_scores = score_thresholds.shape[0]
149
+
150
+ average_precision = np.zeros((n_ious, n_labels))
151
+ average_recall = np.zeros((n_scores, n_labels))
152
+ precision_recall = np.zeros((n_ious, n_scores, n_labels, 7))
153
+
154
+ pd_labels = data[:, 5].astype(int)
155
+ unique_pd_labels = np.unique(pd_labels)
156
+ gt_count = label_counts[:, 0]
157
+ running_total_count = np.zeros(
158
+ (n_ious, n_rows),
159
+ dtype=np.float64,
160
+ )
161
+ running_tp_count = np.zeros_like(running_total_count)
162
+ running_gt_count = np.zeros_like(running_total_count)
163
+ pr_curve = np.zeros((n_ious, n_labels, 101))
164
+
165
+ mask_score_nonzero = data[:, 6] > 1e-9
166
+ mask_gt_exists = data[:, 1] >= 0.0
167
+ mask_labels_match = np.isclose(data[:, 4], data[:, 5])
168
+
169
+ mask_gt_exists_labels_match = mask_gt_exists & mask_labels_match
170
+
171
+ mask_tp = mask_score_nonzero & mask_gt_exists_labels_match
172
+ mask_fp = mask_score_nonzero
173
+ mask_fn = mask_gt_exists_labels_match
174
+
175
+ for iou_idx in range(n_ious):
176
+ mask_iou = data[:, 3] >= iou_thresholds[iou_idx]
177
+
178
+ mask_tp_outer = mask_tp & mask_iou
179
+ mask_fp_outer = mask_fp & (
180
+ (~mask_gt_exists_labels_match & mask_iou) | ~mask_iou
181
+ )
182
+ mask_fn_outer = mask_fn & mask_iou
183
+
184
+ for score_idx in range(n_scores):
185
+ mask_score_thresh = data[:, 6] >= score_thresholds[score_idx]
186
+
187
+ mask_tp_inner = mask_tp_outer & mask_score_thresh
188
+ mask_fp_inner = mask_fp_outer & mask_score_thresh
189
+ mask_fn_inner = mask_fn_outer & ~mask_score_thresh
190
+
191
+ # create true-positive mask score threshold
192
+ tp_candidates = data[mask_tp_inner]
193
+ _, indices_gt_unique = np.unique(
194
+ tp_candidates[:, [0, 1, 4]], axis=0, return_index=True
195
+ )
196
+ mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=bool)
197
+ mask_gt_unique[indices_gt_unique] = True
198
+ true_positives_mask = np.zeros(n_rows, dtype=bool)
199
+ true_positives_mask[mask_tp_inner] = mask_gt_unique
200
+
201
+ # calculate intermediates
202
+ pd_count = np.bincount(pd_labels, minlength=n_labels).astype(float)
203
+ tp_count = np.bincount(
204
+ pd_labels,
205
+ weights=true_positives_mask,
206
+ minlength=n_labels,
207
+ ).astype(float)
208
+
209
+ fp_count = np.bincount(
210
+ pd_labels[mask_fp_inner],
211
+ minlength=n_labels,
212
+ ).astype(float)
213
+
214
+ fn_count = np.bincount(
215
+ pd_labels[mask_fn_inner],
216
+ minlength=n_labels,
217
+ )
218
+
219
+ # calculate component metrics
220
+ recall = np.zeros_like(tp_count)
221
+ precision = np.zeros_like(tp_count)
222
+ np.divide(tp_count, gt_count, where=gt_count > 1e-9, out=recall)
223
+ np.divide(tp_count, pd_count, where=pd_count > 1e-9, out=precision)
224
+ fn_count = gt_count - tp_count
225
+
226
+ f1_score = np.zeros_like(precision)
227
+ np.divide(
228
+ np.multiply(precision, recall),
229
+ (precision + recall),
230
+ where=(precision + recall) > 1e-9,
231
+ out=f1_score,
232
+ )
233
+
234
+ accuracy = np.zeros_like(tp_count)
235
+ np.divide(
236
+ tp_count,
237
+ (gt_count + pd_count),
238
+ where=(gt_count + pd_count) > 1e-9,
239
+ out=accuracy,
240
+ )
241
+
242
+ precision_recall[iou_idx][score_idx] = np.concatenate(
243
+ (
244
+ tp_count[:, np.newaxis],
245
+ fp_count[:, np.newaxis],
246
+ fn_count[:, np.newaxis],
247
+ precision[:, np.newaxis],
248
+ recall[:, np.newaxis],
249
+ f1_score[:, np.newaxis],
250
+ accuracy[:, np.newaxis],
251
+ ),
252
+ axis=1,
253
+ )
254
+
255
+ # calculate recall for AR
256
+ average_recall[score_idx] += recall
257
+
258
+ # create true-positive mask score threshold
259
+ tp_candidates = data[mask_tp_outer]
260
+ _, indices_gt_unique = np.unique(
261
+ tp_candidates[:, [0, 1, 4]], axis=0, return_index=True
262
+ )
263
+ mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=bool)
264
+ mask_gt_unique[indices_gt_unique] = True
265
+ true_positives_mask = np.zeros(n_rows, dtype=bool)
266
+ true_positives_mask[mask_tp_outer] = mask_gt_unique
267
+
268
+ # count running tp and total for AP
269
+ for pd_label in unique_pd_labels:
270
+ mask_pd_label = pd_labels == pd_label
271
+ running_gt_count[iou_idx][mask_pd_label] = gt_count[pd_label]
272
+ running_total_count[iou_idx][mask_pd_label] = np.arange(
273
+ 1, mask_pd_label.sum() + 1
274
+ )
275
+ mask_tp_for_counting = mask_pd_label & true_positives_mask
276
+ running_tp_count[iou_idx][mask_tp_for_counting] = np.arange(
277
+ 1, mask_tp_for_counting.sum() + 1
278
+ )
279
+
280
+ # calculate running precision-recall points for AP
281
+ precision = np.zeros_like(running_total_count)
282
+ np.divide(
283
+ running_tp_count,
284
+ running_total_count,
285
+ where=running_total_count > 1e-9,
286
+ out=precision,
287
+ )
288
+ recall = np.zeros_like(running_total_count)
289
+ np.divide(
290
+ running_tp_count,
291
+ running_gt_count,
292
+ where=running_gt_count > 1e-9,
293
+ out=recall,
294
+ )
295
+ recall_index = np.floor(recall * 100.0).astype(int)
296
+ for iou_idx in range(n_ious):
297
+ p = precision[iou_idx]
298
+ r = recall_index[iou_idx]
299
+ pr_curve[iou_idx, pd_labels, r] = np.maximum(
300
+ pr_curve[iou_idx, pd_labels, r], p
301
+ )
302
+
303
+ # calculate average precision
304
+ running_max = np.zeros((n_ious, n_labels))
305
+ for recall in range(100, -1, -1):
306
+ precision = pr_curve[:, :, recall]
307
+ running_max = np.maximum(precision, running_max)
308
+ average_precision += running_max
309
+ pr_curve[:, :, recall] = running_max
310
+ average_precision = average_precision / 101.0
311
+
312
+ # calculate average recall
313
+ average_recall /= n_ious
314
+
315
+ # calculate mAP and mAR
316
+ label_key_mapping = label_counts[unique_pd_labels, 2]
317
+ label_keys = np.unique(label_counts[:, 2])
318
+ mAP = np.ones((n_ious, label_keys.shape[0])) * -1.0
319
+ mAR = np.ones((n_scores, label_keys.shape[0])) * -1.0
320
+ for key in np.unique(label_key_mapping):
321
+ labels = unique_pd_labels[label_key_mapping == key]
322
+ key_idx = int(key)
323
+ mAP[:, key_idx] = average_precision[:, labels].mean(axis=1)
324
+ mAR[:, key_idx] = average_recall[:, labels].mean(axis=1)
325
+
326
+ # calculate AP and mAP averaged over iou thresholds
327
+ APAveragedOverIoUs = average_precision.mean(axis=0)
328
+ mAPAveragedOverIoUs = mAP.mean(axis=0)
329
+
330
+ # calculate AR and mAR averaged over score thresholds
331
+ ARAveragedOverIoUs = average_recall.mean(axis=0)
332
+ mARAveragedOverIoUs = mAR.mean(axis=0)
333
+
334
+ ap_results = (
335
+ average_precision,
336
+ mAP,
337
+ APAveragedOverIoUs,
338
+ mAPAveragedOverIoUs,
339
+ )
340
+ ar_results = (
341
+ average_recall,
342
+ mAR,
343
+ ARAveragedOverIoUs,
344
+ mARAveragedOverIoUs,
345
+ )
346
+
347
+ return (
348
+ ap_results,
349
+ ar_results,
350
+ precision_recall,
351
+ pr_curve,
352
+ )
353
+
354
+
355
+ def compute_detailed_pr_curve(
356
+ data: np.ndarray,
357
+ label_counts: np.ndarray,
358
+ iou_thresholds: np.ndarray,
359
+ score_thresholds: np.ndarray,
360
+ n_samples: int,
361
+ ) -> np.ndarray:
362
+
363
+ """
364
+ 0 label
365
+ 1 tp
366
+ ...
367
+ 2 fp - 1
368
+ 3 fp - 2
369
+ 4 fn - misclassification
370
+ 5 fn - hallucination
371
+ """
372
+
373
+ n_labels = label_counts.shape[0]
374
+ n_ious = iou_thresholds.shape[0]
375
+ n_scores = score_thresholds.shape[0]
376
+ n_metrics = 5 * (n_samples + 1)
377
+
378
+ tp_idx = 0
379
+ fp_misclf_idx = tp_idx + n_samples + 1
380
+ fp_halluc_idx = fp_misclf_idx + n_samples + 1
381
+ fn_misclf_idx = fp_halluc_idx + n_samples + 1
382
+ fn_misprd_idx = fn_misclf_idx + n_samples + 1
383
+
384
+ detailed_pr_curve = np.ones((n_ious, n_scores, n_labels, n_metrics)) * -1.0
385
+
386
+ mask_gt_exists = data[:, 1] > -0.5
387
+ mask_pd_exists = data[:, 2] > -0.5
388
+ mask_label_match = np.isclose(data[:, 4], data[:, 5])
389
+
390
+ mask_gt_pd_exists = mask_gt_exists & mask_pd_exists
391
+ mask_gt_pd_match = mask_gt_pd_exists & mask_label_match
392
+ mask_gt_pd_mismatch = mask_gt_pd_exists & ~mask_label_match
393
+
394
+ for iou_idx in range(n_ious):
395
+ mask_iou = data[:, 3] >= iou_thresholds[iou_idx]
396
+ mask_gt_pd_match_iou = mask_gt_pd_match & mask_iou
397
+ mask_gt_pd_mismatch_iou = mask_gt_pd_mismatch & mask_iou
398
+ for score_idx in range(n_scores):
399
+ mask_score = data[:, 6] >= score_thresholds[score_idx]
400
+ mask_tp = mask_gt_pd_match_iou & mask_score
401
+ mask_fp_misclf = mask_gt_pd_mismatch_iou & mask_score
402
+ mask_fn_misclf = mask_gt_pd_match_iou & ~mask_score
403
+ mask_halluc_missing = ~(
404
+ mask_gt_pd_match_iou | (mask_gt_pd_mismatch & mask_score)
405
+ )
406
+ mask_fp_halluc = mask_halluc_missing & mask_pd_exists
407
+ mask_fn_misprd = mask_halluc_missing & mask_gt_exists
408
+
409
+ tp_slice = data[mask_tp]
410
+ fp_misclf_slice = data[mask_fp_misclf]
411
+ fp_halluc_slice = data[mask_fp_halluc]
412
+ fn_misclf_slice = data[mask_fn_misclf]
413
+ fn_misprd_slice = data[mask_fn_misprd]
414
+
415
+ tp_count = np.bincount(
416
+ tp_slice[:, 5].astype(int), minlength=n_labels
417
+ )
418
+ fp_misclf_count = np.bincount(
419
+ fp_misclf_slice[:, 5].astype(int), minlength=n_labels
420
+ )
421
+ fp_halluc_count = np.bincount(
422
+ fp_halluc_slice[:, 5].astype(int), minlength=n_labels
423
+ )
424
+ fn_misclf_count = np.bincount(
425
+ fn_misclf_slice[:, 4].astype(int), minlength=n_labels
426
+ )
427
+ fn_misprd_count = np.bincount(
428
+ fn_misprd_slice[:, 4].astype(int), minlength=n_labels
429
+ )
430
+
431
+ detailed_pr_curve[iou_idx, score_idx, :, tp_idx] = tp_count
432
+ detailed_pr_curve[
433
+ iou_idx, score_idx, :, fp_misclf_idx
434
+ ] = fp_misclf_count
435
+ detailed_pr_curve[
436
+ iou_idx, score_idx, :, fp_halluc_idx
437
+ ] = fp_halluc_count
438
+ detailed_pr_curve[
439
+ iou_idx, score_idx, :, fn_misclf_idx
440
+ ] = fn_misclf_count
441
+ detailed_pr_curve[
442
+ iou_idx, score_idx, :, fn_misprd_idx
443
+ ] = fn_misprd_count
444
+
445
+ if n_samples > 0:
446
+ for label_idx in range(n_labels):
447
+ tp_examples = tp_slice[
448
+ tp_slice[:, 5].astype(int) == label_idx
449
+ ][:n_samples, 0]
450
+ fp_misclf_examples = fp_misclf_slice[
451
+ fp_misclf_slice[:, 5].astype(int) == label_idx
452
+ ][:n_samples, 0]
453
+ fp_halluc_examples = fp_halluc_slice[
454
+ fp_halluc_slice[:, 5].astype(int) == label_idx
455
+ ][:n_samples, 0]
456
+ fn_misclf_examples = fn_misclf_slice[
457
+ fn_misclf_slice[:, 4].astype(int) == label_idx
458
+ ][:n_samples, 0]
459
+ fn_misprd_examples = fn_misprd_slice[
460
+ fn_misprd_slice[:, 4].astype(int) == label_idx
461
+ ][:n_samples, 0]
462
+
463
+ detailed_pr_curve[
464
+ iou_idx,
465
+ score_idx,
466
+ label_idx,
467
+ tp_idx + 1 : tp_idx + 1 + tp_examples.shape[0],
468
+ ] = tp_examples
469
+ detailed_pr_curve[
470
+ iou_idx,
471
+ score_idx,
472
+ label_idx,
473
+ fp_misclf_idx
474
+ + 1 : fp_misclf_idx
475
+ + 1
476
+ + fp_misclf_examples.shape[0],
477
+ ] = fp_misclf_examples
478
+ detailed_pr_curve[
479
+ iou_idx,
480
+ score_idx,
481
+ label_idx,
482
+ fp_halluc_idx
483
+ + 1 : fp_halluc_idx
484
+ + 1
485
+ + fp_halluc_examples.shape[0],
486
+ ] = fp_halluc_examples
487
+ detailed_pr_curve[
488
+ iou_idx,
489
+ score_idx,
490
+ label_idx,
491
+ fn_misclf_idx
492
+ + 1 : fn_misclf_idx
493
+ + 1
494
+ + fn_misclf_examples.shape[0],
495
+ ] = fn_misclf_examples
496
+ detailed_pr_curve[
497
+ iou_idx,
498
+ score_idx,
499
+ label_idx,
500
+ fn_misprd_idx
501
+ + 1 : fn_misprd_idx
502
+ + 1
503
+ + fn_misprd_examples.shape[0],
504
+ ] = fn_misprd_examples
505
+
506
+ return detailed_pr_curve