valor-lite 0.33.1__py3-none-any.whl → 0.33.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/detection/annotation.py +14 -2
- valor_lite/detection/computation.py +225 -80
- valor_lite/detection/manager.py +376 -239
- valor_lite/detection/metric.py +32 -7
- {valor_lite-0.33.1.dist-info → valor_lite-0.33.3.dist-info}/METADATA +1 -1
- valor_lite-0.33.3.dist-info/RECORD +12 -0
- valor_lite-0.33.1.dist-info/RECORD +0 -12
- {valor_lite-0.33.1.dist-info → valor_lite-0.33.3.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.1.dist-info → valor_lite-0.33.3.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.1.dist-info → valor_lite-0.33.3.dist-info}/top_level.txt +0 -0
|
@@ -36,8 +36,20 @@ class Bitmask:
|
|
|
36
36
|
"If scores are defined, there must be a 1:1 pairing with labels."
|
|
37
37
|
)
|
|
38
38
|
|
|
39
|
-
def to_box(self) -> BoundingBox:
|
|
40
|
-
|
|
39
|
+
def to_box(self) -> BoundingBox | None:
|
|
40
|
+
|
|
41
|
+
if not self.mask.any():
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
rows, cols = np.nonzero(self.mask)
|
|
45
|
+
return BoundingBox(
|
|
46
|
+
xmin=cols.min(),
|
|
47
|
+
xmax=cols.max(),
|
|
48
|
+
ymin=rows.min(),
|
|
49
|
+
ymax=rows.max(),
|
|
50
|
+
labels=self.labels,
|
|
51
|
+
scores=self.scores,
|
|
52
|
+
)
|
|
41
53
|
|
|
42
54
|
|
|
43
55
|
@dataclass
|
|
@@ -1,16 +1,38 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from numpy.typing import NDArray
|
|
3
3
|
|
|
4
|
-
# datum id 0
|
|
5
|
-
# gt 1
|
|
6
|
-
# pd 2
|
|
7
|
-
# iou 3
|
|
8
|
-
# gt label 4
|
|
9
|
-
# pd label 5
|
|
10
|
-
# score 6
|
|
11
|
-
|
|
12
4
|
|
|
13
5
|
def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
6
|
+
"""
|
|
7
|
+
Computes intersection-over-union (IoU) for axis-aligned bounding boxes.
|
|
8
|
+
|
|
9
|
+
Takes data with shape (N, 8):
|
|
10
|
+
|
|
11
|
+
Index 0 - xmin for Box 1
|
|
12
|
+
Index 1 - xmax for Box 1
|
|
13
|
+
Index 2 - ymin for Box 1
|
|
14
|
+
Index 3 - ymax for Box 1
|
|
15
|
+
Index 4 - xmin for Box 2
|
|
16
|
+
Index 5 - xmax for Box 2
|
|
17
|
+
Index 6 - ymin for Box 2
|
|
18
|
+
Index 7 - ymax for Box 2
|
|
19
|
+
|
|
20
|
+
Returns data with shape (N, 1):
|
|
21
|
+
|
|
22
|
+
Index 0 - IoU
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
data : NDArray[np.floating]
|
|
27
|
+
A sorted array of classification pairs.
|
|
28
|
+
label_metadata : NDArray[np.int32]
|
|
29
|
+
An array containing metadata related to labels.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
NDArray[np.floating]
|
|
34
|
+
Compute IoU's.
|
|
35
|
+
"""
|
|
14
36
|
|
|
15
37
|
xmin1, xmax1, ymin1, ymax1 = (
|
|
16
38
|
data[:, 0],
|
|
@@ -48,9 +70,9 @@ def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
|
48
70
|
|
|
49
71
|
|
|
50
72
|
def _compute_ranked_pairs_for_datum(
|
|
51
|
-
data: np.
|
|
52
|
-
label_metadata: np.
|
|
53
|
-
) -> np.
|
|
73
|
+
data: NDArray[np.floating],
|
|
74
|
+
label_metadata: NDArray[np.int32],
|
|
75
|
+
) -> NDArray[np.floating]:
|
|
54
76
|
"""
|
|
55
77
|
Computes ranked pairs for a datum.
|
|
56
78
|
"""
|
|
@@ -91,25 +113,51 @@ def _compute_ranked_pairs_for_datum(
|
|
|
91
113
|
|
|
92
114
|
def compute_ranked_pairs(
|
|
93
115
|
data: list[NDArray[np.floating]],
|
|
94
|
-
label_metadata: NDArray[np.
|
|
116
|
+
label_metadata: NDArray[np.int32],
|
|
95
117
|
) -> NDArray[np.floating]:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
118
|
+
"""
|
|
119
|
+
Performs pair ranking on input data.
|
|
120
|
+
|
|
121
|
+
Takes data with shape (N, 7):
|
|
122
|
+
|
|
123
|
+
Index 0 - Datum Index
|
|
124
|
+
Index 1 - GroundTruth Index
|
|
125
|
+
Index 2 - Prediction Index
|
|
126
|
+
Index 3 - IoU
|
|
127
|
+
Index 4 - GroundTruth Label Index
|
|
128
|
+
Index 5 - Prediction Label Index
|
|
129
|
+
Index 6 - Score
|
|
130
|
+
|
|
131
|
+
Returns data with shape (N - M, 7)
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
data : NDArray[np.floating]
|
|
136
|
+
A sorted array of classification pairs.
|
|
137
|
+
label_metadata : NDArray[np.int32]
|
|
138
|
+
An array containing metadata related to labels.
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
NDArray[np.floating]
|
|
143
|
+
A filtered array containing only ranked pairs.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
ranked_pairs_by_datum = [
|
|
147
|
+
_compute_ranked_pairs_for_datum(
|
|
148
|
+
data=datum,
|
|
149
|
+
label_metadata=label_metadata,
|
|
150
|
+
)
|
|
151
|
+
for datum in data
|
|
152
|
+
]
|
|
153
|
+
ranked_pairs = np.concatenate(ranked_pairs_by_datum, axis=0)
|
|
106
154
|
indices = np.lexsort(
|
|
107
155
|
(
|
|
108
|
-
-
|
|
109
|
-
-
|
|
156
|
+
-ranked_pairs[:, 3], # iou
|
|
157
|
+
-ranked_pairs[:, 6], # score
|
|
110
158
|
)
|
|
111
159
|
)
|
|
112
|
-
return
|
|
160
|
+
return ranked_pairs[indices]
|
|
113
161
|
|
|
114
162
|
|
|
115
163
|
def compute_metrics(
|
|
@@ -136,6 +184,27 @@ def compute_metrics(
|
|
|
136
184
|
"""
|
|
137
185
|
Computes Object Detection metrics.
|
|
138
186
|
|
|
187
|
+
Takes data with shape (N, 7):
|
|
188
|
+
|
|
189
|
+
Index 0 - Datum Index
|
|
190
|
+
Index 1 - GroundTruth Index
|
|
191
|
+
Index 2 - Prediction Index
|
|
192
|
+
Index 3 - IoU
|
|
193
|
+
Index 4 - GroundTruth Label Index
|
|
194
|
+
Index 5 - Prediction Label Index
|
|
195
|
+
Index 6 - Score
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
data : NDArray[np.floating]
|
|
200
|
+
A sorted array of classification pairs.
|
|
201
|
+
label_metadata : NDArray[np.int32]
|
|
202
|
+
An array containing metadata related to labels.
|
|
203
|
+
iou_thresholds : NDArray[np.floating]
|
|
204
|
+
A 1-D array containing IoU thresholds.
|
|
205
|
+
score_thresholds : NDArray[np.floating]
|
|
206
|
+
A 1-D array containing score thresholds.
|
|
207
|
+
|
|
139
208
|
Returns
|
|
140
209
|
-------
|
|
141
210
|
tuple[NDArray, NDArray, NDArray NDArray]
|
|
@@ -155,7 +224,7 @@ def compute_metrics(
|
|
|
155
224
|
|
|
156
225
|
average_precision = np.zeros((n_ious, n_labels))
|
|
157
226
|
average_recall = np.zeros((n_scores, n_labels))
|
|
158
|
-
|
|
227
|
+
counts = np.zeros((n_ious, n_scores, n_labels, 7))
|
|
159
228
|
|
|
160
229
|
pd_labels = data[:, 5].astype(int)
|
|
161
230
|
unique_pd_labels = np.unique(pd_labels)
|
|
@@ -245,7 +314,7 @@ def compute_metrics(
|
|
|
245
314
|
out=accuracy,
|
|
246
315
|
)
|
|
247
316
|
|
|
248
|
-
|
|
317
|
+
counts[iou_idx][score_idx] = np.concatenate(
|
|
249
318
|
(
|
|
250
319
|
tp_count[:, np.newaxis],
|
|
251
320
|
fp_count[:, np.newaxis],
|
|
@@ -353,42 +422,77 @@ def compute_metrics(
|
|
|
353
422
|
return (
|
|
354
423
|
ap_results,
|
|
355
424
|
ar_results,
|
|
356
|
-
|
|
425
|
+
counts,
|
|
357
426
|
pr_curve,
|
|
358
427
|
)
|
|
359
428
|
|
|
360
429
|
|
|
361
430
|
def compute_detailed_counts(
|
|
362
|
-
data: np.
|
|
363
|
-
label_metadata: np.
|
|
364
|
-
iou_thresholds: np.
|
|
365
|
-
score_thresholds: np.
|
|
431
|
+
data: NDArray[np.floating],
|
|
432
|
+
label_metadata: NDArray[np.int32],
|
|
433
|
+
iou_thresholds: NDArray[np.floating],
|
|
434
|
+
score_thresholds: NDArray[np.floating],
|
|
366
435
|
n_samples: int,
|
|
367
|
-
) -> np.
|
|
368
|
-
|
|
436
|
+
) -> NDArray[np.int32]:
|
|
369
437
|
"""
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
438
|
+
Compute detailed counts.
|
|
439
|
+
|
|
440
|
+
Takes data with shape (N, 7):
|
|
441
|
+
|
|
442
|
+
Index 0 - Datum Index
|
|
443
|
+
Index 1 - GroundTruth Index
|
|
444
|
+
Index 2 - Prediction Index
|
|
445
|
+
Index 3 - IoU
|
|
446
|
+
Index 4 - GroundTruth Label Index
|
|
447
|
+
Index 5 - Prediction Label Index
|
|
448
|
+
Index 6 - Score
|
|
449
|
+
|
|
450
|
+
Outputs an array with shape (N_IoUs, N_Score, N_Labels, 5 * n_samples + 5):
|
|
451
|
+
|
|
452
|
+
Index 0 - True Positive Count
|
|
453
|
+
... Datum ID Examples
|
|
454
|
+
Index 2 * n_samples + 1 - False Positive Misclassification Count
|
|
455
|
+
... Datum ID Examples
|
|
456
|
+
Index 4 * n_samples + 2 - False Positive Hallucination Count
|
|
457
|
+
... Datum ID Examples
|
|
458
|
+
Index 6 * n_samples + 3 - False Negative Misclassification Count
|
|
459
|
+
... Datum ID Examples
|
|
460
|
+
Index 8 * n_samples + 4 - False Negative Missing Prediction Count
|
|
461
|
+
... Datum ID Examples
|
|
462
|
+
|
|
463
|
+
Parameters
|
|
464
|
+
----------
|
|
465
|
+
data : NDArray[np.floating]
|
|
466
|
+
A sorted array of classification pairs.
|
|
467
|
+
label_metadata : NDArray[np.int32]
|
|
468
|
+
An array containing metadata related to labels.
|
|
469
|
+
iou_thresholds : NDArray[np.floating]
|
|
470
|
+
A 1-D array containing IoU thresholds.
|
|
471
|
+
score_thresholds : NDArray[np.floating]
|
|
472
|
+
A 1-D array containing score thresholds.
|
|
473
|
+
n_samples : int
|
|
474
|
+
The number of examples to return per count.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
NDArray[np.int32]
|
|
479
|
+
The detailed counts with optional examples.
|
|
378
480
|
"""
|
|
379
481
|
|
|
380
482
|
n_labels = label_metadata.shape[0]
|
|
381
483
|
n_ious = iou_thresholds.shape[0]
|
|
382
484
|
n_scores = score_thresholds.shape[0]
|
|
383
|
-
n_metrics = 5 * (n_samples + 1)
|
|
485
|
+
n_metrics = 5 * (2 * n_samples + 1)
|
|
384
486
|
|
|
385
487
|
tp_idx = 0
|
|
386
|
-
fp_misclf_idx =
|
|
387
|
-
fp_halluc_idx =
|
|
388
|
-
fn_misclf_idx =
|
|
389
|
-
fn_misprd_idx =
|
|
488
|
+
fp_misclf_idx = 2 * n_samples + 1
|
|
489
|
+
fp_halluc_idx = 4 * n_samples + 2
|
|
490
|
+
fn_misclf_idx = 6 * n_samples + 3
|
|
491
|
+
fn_misprd_idx = 8 * n_samples + 4
|
|
390
492
|
|
|
391
|
-
detailed_pr_curve = np.ones(
|
|
493
|
+
detailed_pr_curve = -1 * np.ones(
|
|
494
|
+
(n_ious, n_scores, n_labels, n_metrics), dtype=np.int32
|
|
495
|
+
)
|
|
392
496
|
|
|
393
497
|
mask_gt_exists = data[:, 1] > -0.5
|
|
394
498
|
mask_pd_exists = data[:, 2] > -0.5
|
|
@@ -406,13 +510,14 @@ def compute_detailed_counts(
|
|
|
406
510
|
mask_iou_threshold = data[:, 3] >= iou_thresholds[iou_idx]
|
|
407
511
|
mask_iou = mask_iou_nonzero & mask_iou_threshold
|
|
408
512
|
|
|
409
|
-
|
|
410
|
-
mask_groundtruths_with_passing_ious = (
|
|
411
|
-
groundtruths.reshape(-1, 1, 2)
|
|
412
|
-
== groundtruths_with_pairs.reshape(1, -1, 2)
|
|
413
|
-
).all(axis=2)
|
|
513
|
+
groundtruths_passing_ious = np.unique(groundtruths[mask_iou], axis=0)
|
|
414
514
|
mask_groundtruths_with_passing_ious = (
|
|
415
|
-
|
|
515
|
+
(
|
|
516
|
+
groundtruths.reshape(-1, 1, 2)
|
|
517
|
+
== groundtruths_passing_ious.reshape(1, -1, 2)
|
|
518
|
+
)
|
|
519
|
+
.all(axis=2)
|
|
520
|
+
.any(axis=1)
|
|
416
521
|
)
|
|
417
522
|
mask_groundtruths_without_passing_ious = (
|
|
418
523
|
~mask_groundtruths_with_passing_ious & mask_gt_exists
|
|
@@ -422,11 +527,12 @@ def compute_detailed_counts(
|
|
|
422
527
|
predictions[mask_iou], axis=0
|
|
423
528
|
)
|
|
424
529
|
mask_predictions_with_passing_ious = (
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
530
|
+
(
|
|
531
|
+
predictions.reshape(-1, 1, 2)
|
|
532
|
+
== predictions_with_passing_ious.reshape(1, -1, 2)
|
|
533
|
+
)
|
|
534
|
+
.all(axis=2)
|
|
535
|
+
.any(axis=1)
|
|
430
536
|
)
|
|
431
537
|
mask_predictions_without_passing_ious = (
|
|
432
538
|
~mask_predictions_with_passing_ious & mask_pd_exists
|
|
@@ -440,11 +546,12 @@ def compute_detailed_counts(
|
|
|
440
546
|
groundtruths[mask_iou & mask_score], axis=0
|
|
441
547
|
)
|
|
442
548
|
mask_groundtruths_with_passing_score = (
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
549
|
+
(
|
|
550
|
+
groundtruths.reshape(-1, 1, 2)
|
|
551
|
+
== groundtruths_with_passing_score.reshape(1, -1, 2)
|
|
552
|
+
)
|
|
553
|
+
.all(axis=2)
|
|
554
|
+
.any(axis=1)
|
|
448
555
|
)
|
|
449
556
|
mask_groundtruths_without_passing_score = (
|
|
450
557
|
~mask_groundtruths_with_passing_score & mask_gt_exists
|
|
@@ -466,12 +573,30 @@ def compute_detailed_counts(
|
|
|
466
573
|
| mask_groundtruths_without_passing_score
|
|
467
574
|
)
|
|
468
575
|
|
|
469
|
-
|
|
576
|
+
tp_pds = np.unique(data[mask_tp][:, [0, 2, 5]], axis=0)
|
|
577
|
+
tp_gts = np.unique(data[mask_tp][:, [0, 1, 4]], axis=0)
|
|
470
578
|
fp_misclf = np.unique(data[mask_fp_misclf][:, [0, 2, 5]], axis=0)
|
|
471
579
|
fp_halluc = np.unique(data[mask_fp_halluc][:, [0, 2, 5]], axis=0)
|
|
472
580
|
fn_misclf = np.unique(data[mask_fn_misclf][:, [0, 1, 4]], axis=0)
|
|
473
581
|
fn_misprd = np.unique(data[mask_fn_misprd][:, [0, 1, 4]], axis=0)
|
|
474
582
|
|
|
583
|
+
mask_fp_misclf_is_tp = (
|
|
584
|
+
(fp_misclf.reshape(-1, 1, 3) == tp_pds.reshape(1, -1, 3))
|
|
585
|
+
.all(axis=2)
|
|
586
|
+
.any(axis=1)
|
|
587
|
+
)
|
|
588
|
+
mask_fn_misclf_is_tp = (
|
|
589
|
+
(fn_misclf.reshape(-1, 1, 3) == tp_gts.reshape(1, -1, 3))
|
|
590
|
+
.all(axis=2)
|
|
591
|
+
.any(axis=1)
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
tp = tp_pds
|
|
595
|
+
fp_misclf = fp_misclf[~mask_fp_misclf_is_tp]
|
|
596
|
+
fp_halluc = fp_halluc
|
|
597
|
+
fn_misclf = fn_misclf[~mask_fn_misclf_is_tp]
|
|
598
|
+
fn_misprd = fn_misprd
|
|
599
|
+
|
|
475
600
|
tp_count = np.bincount(tp[:, 2].astype(int), minlength=n_labels)
|
|
476
601
|
fp_misclf_count = np.bincount(
|
|
477
602
|
fp_misclf[:, 2].astype(int), minlength=n_labels
|
|
@@ -502,21 +627,41 @@ def compute_detailed_counts(
|
|
|
502
627
|
|
|
503
628
|
if n_samples > 0:
|
|
504
629
|
for label_idx in range(n_labels):
|
|
505
|
-
tp_examples =
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
630
|
+
tp_examples = (
|
|
631
|
+
tp[tp[:, 2].astype(int) == label_idx][
|
|
632
|
+
:n_samples, [0, 1]
|
|
633
|
+
]
|
|
634
|
+
.astype(int)
|
|
635
|
+
.flatten()
|
|
636
|
+
)
|
|
637
|
+
fp_misclf_examples = (
|
|
638
|
+
fp_misclf[fp_misclf[:, 2].astype(int) == label_idx][
|
|
639
|
+
:n_samples, [0, 1]
|
|
640
|
+
]
|
|
641
|
+
.astype(int)
|
|
642
|
+
.flatten()
|
|
643
|
+
)
|
|
644
|
+
fp_halluc_examples = (
|
|
645
|
+
fp_halluc[fp_halluc[:, 2].astype(int) == label_idx][
|
|
646
|
+
:n_samples, [0, 1]
|
|
647
|
+
]
|
|
648
|
+
.astype(int)
|
|
649
|
+
.flatten()
|
|
650
|
+
)
|
|
651
|
+
fn_misclf_examples = (
|
|
652
|
+
fn_misclf[fn_misclf[:, 2].astype(int) == label_idx][
|
|
653
|
+
:n_samples, [0, 1]
|
|
654
|
+
]
|
|
655
|
+
.astype(int)
|
|
656
|
+
.flatten()
|
|
657
|
+
)
|
|
658
|
+
fn_misprd_examples = (
|
|
659
|
+
fn_misprd[fn_misprd[:, 2].astype(int) == label_idx][
|
|
660
|
+
:n_samples, [0, 1]
|
|
661
|
+
]
|
|
662
|
+
.astype(int)
|
|
663
|
+
.flatten()
|
|
664
|
+
)
|
|
520
665
|
|
|
521
666
|
detailed_pr_curve[
|
|
522
667
|
iou_idx,
|
valor_lite/detection/manager.py
CHANGED
|
@@ -58,6 +58,10 @@ class Filter:
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
class Evaluator:
|
|
61
|
+
"""
|
|
62
|
+
Object Detection Evaluator
|
|
63
|
+
"""
|
|
64
|
+
|
|
61
65
|
def __init__(self):
|
|
62
66
|
|
|
63
67
|
# metadata
|
|
@@ -70,6 +74,10 @@ class Evaluator:
|
|
|
70
74
|
self.uid_to_index: dict[str, int] = dict()
|
|
71
75
|
self.index_to_uid: dict[int, str] = dict()
|
|
72
76
|
|
|
77
|
+
# annotation reference
|
|
78
|
+
self.groundtruth_examples: dict[int, NDArray[np.float16]] = dict()
|
|
79
|
+
self.prediction_examples: dict[int, NDArray[np.float16]] = dict()
|
|
80
|
+
|
|
73
81
|
# label reference
|
|
74
82
|
self.label_to_index: dict[tuple[str, str], int] = dict()
|
|
75
83
|
self.index_to_label: dict[int, tuple[str, str]] = dict()
|
|
@@ -80,13 +88,16 @@ class Evaluator:
|
|
|
80
88
|
self.label_index_to_label_key_index: dict[int, int] = dict()
|
|
81
89
|
|
|
82
90
|
# computation caches
|
|
83
|
-
self._detailed_pairs = np.array([])
|
|
84
|
-
self._ranked_pairs = np.array([])
|
|
85
|
-
self._label_metadata = np.array([])
|
|
86
|
-
self._label_metadata_per_datum = np.array([])
|
|
91
|
+
self._detailed_pairs: NDArray[np.floating] = np.array([])
|
|
92
|
+
self._ranked_pairs: NDArray[np.floating] = np.array([])
|
|
93
|
+
self._label_metadata: NDArray[np.int32] = np.array([])
|
|
94
|
+
self._label_metadata_per_datum: NDArray[np.int32] = np.array([])
|
|
87
95
|
|
|
88
96
|
@property
|
|
89
97
|
def ignored_prediction_labels(self) -> list[tuple[str, str]]:
|
|
98
|
+
"""
|
|
99
|
+
Prediction labels that are not present in the ground truth set.
|
|
100
|
+
"""
|
|
90
101
|
glabels = set(np.where(self._label_metadata[:, 0] > 0)[0])
|
|
91
102
|
plabels = set(np.where(self._label_metadata[:, 1] > 0)[0])
|
|
92
103
|
return [
|
|
@@ -95,6 +106,9 @@ class Evaluator:
|
|
|
95
106
|
|
|
96
107
|
@property
|
|
97
108
|
def missing_prediction_labels(self) -> list[tuple[str, str]]:
|
|
109
|
+
"""
|
|
110
|
+
Ground truth labels that are not present in the prediction set.
|
|
111
|
+
"""
|
|
98
112
|
glabels = set(np.where(self._label_metadata[:, 0] > 0)[0])
|
|
99
113
|
plabels = set(np.where(self._label_metadata[:, 1] > 0)[0])
|
|
100
114
|
return [
|
|
@@ -103,6 +117,9 @@ class Evaluator:
|
|
|
103
117
|
|
|
104
118
|
@property
|
|
105
119
|
def metadata(self) -> dict:
|
|
120
|
+
"""
|
|
121
|
+
Evaluation metadata.
|
|
122
|
+
"""
|
|
106
123
|
return {
|
|
107
124
|
"n_datums": self.n_datums,
|
|
108
125
|
"n_groundtruths": self.n_groundtruths,
|
|
@@ -211,21 +228,32 @@ class Evaluator:
|
|
|
211
228
|
|
|
212
229
|
def evaluate(
|
|
213
230
|
self,
|
|
231
|
+
metrics_to_return: list[MetricType] = MetricType.base_metrics(),
|
|
214
232
|
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
215
233
|
score_thresholds: list[float] = [0.5],
|
|
234
|
+
number_of_examples: int = 0,
|
|
216
235
|
filter_: Filter | None = None,
|
|
217
236
|
) -> dict[MetricType, list]:
|
|
218
237
|
"""
|
|
219
|
-
|
|
238
|
+
Performs an evaluation and returns metrics.
|
|
220
239
|
|
|
221
240
|
Parameters
|
|
222
241
|
----------
|
|
242
|
+
metrics_to_return : list[MetricType]
|
|
243
|
+
A list of metrics to return in the results.
|
|
223
244
|
iou_thresholds : list[float]
|
|
224
|
-
A list of
|
|
245
|
+
A list of IoU thresholds to compute metrics over.
|
|
225
246
|
score_thresholds : list[float]
|
|
226
|
-
A list of score thresholds to compute over.
|
|
227
|
-
|
|
228
|
-
|
|
247
|
+
A list of score thresholds to compute metrics over.
|
|
248
|
+
number_of_examples : int, default=0
|
|
249
|
+
Number of annotation examples to return in DetailedCounts.
|
|
250
|
+
filter_ : Filter, optional
|
|
251
|
+
An optional filter object.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
dict[MetricType, list]
|
|
256
|
+
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
229
257
|
"""
|
|
230
258
|
|
|
231
259
|
data = self._ranked_pairs
|
|
@@ -360,6 +388,10 @@ class Evaluator:
|
|
|
360
388
|
for label_idx, label in self.index_to_label.items():
|
|
361
389
|
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
362
390
|
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
391
|
+
|
|
392
|
+
if label_metadata[label_idx, 0] == 0:
|
|
393
|
+
continue
|
|
394
|
+
|
|
363
395
|
row = precision_recall[iou_idx][score_idx][label_idx]
|
|
364
396
|
kwargs = {
|
|
365
397
|
"label": label,
|
|
@@ -374,6 +406,7 @@ class Evaluator:
|
|
|
374
406
|
**kwargs,
|
|
375
407
|
)
|
|
376
408
|
)
|
|
409
|
+
|
|
377
410
|
metrics[MetricType.Precision].append(
|
|
378
411
|
Precision(
|
|
379
412
|
value=row[3],
|
|
@@ -399,16 +432,27 @@ class Evaluator:
|
|
|
399
432
|
)
|
|
400
433
|
)
|
|
401
434
|
|
|
435
|
+
if MetricType.DetailedCounts in metrics_to_return:
|
|
436
|
+
metrics[MetricType.DetailedCounts] = self._compute_detailed_counts(
|
|
437
|
+
iou_thresholds=iou_thresholds,
|
|
438
|
+
score_thresholds=score_thresholds,
|
|
439
|
+
n_samples=number_of_examples,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
for metric in set(metrics.keys()):
|
|
443
|
+
if metric not in metrics_to_return:
|
|
444
|
+
del metrics[metric]
|
|
445
|
+
|
|
402
446
|
return metrics
|
|
403
447
|
|
|
404
|
-
def
|
|
448
|
+
def _compute_detailed_counts(
|
|
405
449
|
self,
|
|
406
450
|
iou_thresholds: list[float] = [0.5],
|
|
407
451
|
score_thresholds: list[float] = [
|
|
408
452
|
score / 10.0 for score in range(1, 11)
|
|
409
453
|
],
|
|
410
454
|
n_samples: int = 0,
|
|
411
|
-
) -> list[
|
|
455
|
+
) -> list[DetailedCounts]:
|
|
412
456
|
"""
|
|
413
457
|
Computes detailed counting metrics.
|
|
414
458
|
|
|
@@ -439,106 +483,139 @@ class Evaluator:
|
|
|
439
483
|
)
|
|
440
484
|
|
|
441
485
|
tp_idx = 0
|
|
442
|
-
fp_misclf_idx =
|
|
443
|
-
fp_halluc_idx =
|
|
444
|
-
fn_misclf_idx =
|
|
445
|
-
fn_misprd_idx =
|
|
486
|
+
fp_misclf_idx = 2 * n_samples + 1
|
|
487
|
+
fp_halluc_idx = 4 * n_samples + 2
|
|
488
|
+
fn_misclf_idx = 6 * n_samples + 3
|
|
489
|
+
fn_misprd_idx = 8 * n_samples + 4
|
|
490
|
+
|
|
491
|
+
def _unpack_examples(
|
|
492
|
+
iou_idx: int,
|
|
493
|
+
label_idx: int,
|
|
494
|
+
type_idx: int,
|
|
495
|
+
example_source: dict[int, NDArray[np.float16]],
|
|
496
|
+
) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
|
|
497
|
+
"""
|
|
498
|
+
Unpacks metric examples from computation.
|
|
499
|
+
"""
|
|
500
|
+
type_idx += 1
|
|
501
|
+
|
|
502
|
+
results = list()
|
|
503
|
+
for score_idx in range(n_scores):
|
|
504
|
+
examples = list()
|
|
505
|
+
for example_idx in range(n_samples):
|
|
506
|
+
datum_idx = metrics[
|
|
507
|
+
iou_idx,
|
|
508
|
+
score_idx,
|
|
509
|
+
label_idx,
|
|
510
|
+
type_idx + example_idx * 2,
|
|
511
|
+
]
|
|
512
|
+
annotation_idx = metrics[
|
|
513
|
+
iou_idx,
|
|
514
|
+
score_idx,
|
|
515
|
+
label_idx,
|
|
516
|
+
type_idx + example_idx * 2 + 1,
|
|
517
|
+
]
|
|
518
|
+
if datum_idx >= 0:
|
|
519
|
+
examples.append(
|
|
520
|
+
(
|
|
521
|
+
self.index_to_uid[datum_idx],
|
|
522
|
+
tuple(
|
|
523
|
+
example_source[datum_idx][
|
|
524
|
+
annotation_idx
|
|
525
|
+
].tolist()
|
|
526
|
+
),
|
|
527
|
+
)
|
|
528
|
+
)
|
|
529
|
+
results.append(examples)
|
|
530
|
+
|
|
531
|
+
return results
|
|
446
532
|
|
|
447
533
|
n_ious, n_scores, n_labels, _ = metrics.shape
|
|
448
534
|
return [
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
.
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
[
|
|
504
|
-
self.index_to_uid[int(datum_idx)]
|
|
505
|
-
for datum_idx in metrics[iou_idx][score_idx][
|
|
506
|
-
label_idx
|
|
507
|
-
][fn_misclf_idx + 1 : fn_misprd_idx]
|
|
508
|
-
if int(datum_idx) >= 0
|
|
509
|
-
]
|
|
510
|
-
for score_idx in range(n_scores)
|
|
511
|
-
],
|
|
512
|
-
fn_missing_prediction=metrics[
|
|
513
|
-
iou_idx, :, label_idx, fn_misprd_idx
|
|
514
|
-
]
|
|
515
|
-
.astype(int)
|
|
516
|
-
.tolist(),
|
|
517
|
-
fn_missing_prediction_examples=[
|
|
518
|
-
[
|
|
519
|
-
self.index_to_uid[int(datum_idx)]
|
|
520
|
-
for datum_idx in metrics[iou_idx][score_idx][
|
|
521
|
-
label_idx
|
|
522
|
-
][fn_misprd_idx + 1 :]
|
|
523
|
-
if int(datum_idx) >= 0
|
|
524
|
-
]
|
|
525
|
-
for score_idx in range(n_scores)
|
|
526
|
-
],
|
|
527
|
-
)
|
|
528
|
-
for iou_idx in range(n_ious)
|
|
529
|
-
]
|
|
535
|
+
DetailedCounts(
|
|
536
|
+
iou_threshold=iou_thresholds[iou_idx],
|
|
537
|
+
label=self.index_to_label[label_idx],
|
|
538
|
+
score_thresholds=score_thresholds,
|
|
539
|
+
tp=metrics[iou_idx, :, label_idx, tp_idx].astype(int).tolist(),
|
|
540
|
+
fp_misclassification=metrics[
|
|
541
|
+
iou_idx, :, label_idx, fp_misclf_idx
|
|
542
|
+
]
|
|
543
|
+
.astype(int)
|
|
544
|
+
.tolist(),
|
|
545
|
+
fp_hallucination=metrics[iou_idx, :, label_idx, fp_halluc_idx]
|
|
546
|
+
.astype(int)
|
|
547
|
+
.tolist(),
|
|
548
|
+
fn_misclassification=metrics[
|
|
549
|
+
iou_idx, :, label_idx, fn_misclf_idx
|
|
550
|
+
]
|
|
551
|
+
.astype(int)
|
|
552
|
+
.tolist(),
|
|
553
|
+
fn_missing_prediction=metrics[
|
|
554
|
+
iou_idx, :, label_idx, fn_misprd_idx
|
|
555
|
+
]
|
|
556
|
+
.astype(int)
|
|
557
|
+
.tolist(),
|
|
558
|
+
tp_examples=_unpack_examples(
|
|
559
|
+
iou_idx=iou_idx,
|
|
560
|
+
label_idx=label_idx,
|
|
561
|
+
type_idx=tp_idx,
|
|
562
|
+
example_source=self.prediction_examples,
|
|
563
|
+
),
|
|
564
|
+
fp_misclassification_examples=_unpack_examples(
|
|
565
|
+
iou_idx=iou_idx,
|
|
566
|
+
label_idx=label_idx,
|
|
567
|
+
type_idx=fp_misclf_idx,
|
|
568
|
+
example_source=self.prediction_examples,
|
|
569
|
+
),
|
|
570
|
+
fp_hallucination_examples=_unpack_examples(
|
|
571
|
+
iou_idx=iou_idx,
|
|
572
|
+
label_idx=label_idx,
|
|
573
|
+
type_idx=fp_halluc_idx,
|
|
574
|
+
example_source=self.prediction_examples,
|
|
575
|
+
),
|
|
576
|
+
fn_misclassification_examples=_unpack_examples(
|
|
577
|
+
iou_idx=iou_idx,
|
|
578
|
+
label_idx=label_idx,
|
|
579
|
+
type_idx=fn_misclf_idx,
|
|
580
|
+
example_source=self.groundtruth_examples,
|
|
581
|
+
),
|
|
582
|
+
fn_missing_prediction_examples=_unpack_examples(
|
|
583
|
+
iou_idx=iou_idx,
|
|
584
|
+
label_idx=label_idx,
|
|
585
|
+
type_idx=fn_misprd_idx,
|
|
586
|
+
example_source=self.groundtruth_examples,
|
|
587
|
+
),
|
|
588
|
+
)
|
|
530
589
|
for label_idx in range(n_labels)
|
|
590
|
+
for iou_idx in range(n_ious)
|
|
531
591
|
]
|
|
532
592
|
|
|
533
593
|
|
|
534
594
|
class DataLoader:
|
|
595
|
+
"""
|
|
596
|
+
Object Detection DataLoader
|
|
597
|
+
"""
|
|
598
|
+
|
|
535
599
|
def __init__(self):
|
|
536
600
|
self._evaluator = Evaluator()
|
|
537
|
-
self.pairs = list()
|
|
601
|
+
self.pairs: list[NDArray[np.floating]] = list()
|
|
538
602
|
self.groundtruth_count = defaultdict(lambda: defaultdict(int))
|
|
539
603
|
self.prediction_count = defaultdict(lambda: defaultdict(int))
|
|
540
604
|
|
|
541
605
|
def _add_datum(self, uid: str) -> int:
|
|
606
|
+
"""
|
|
607
|
+
Helper function for adding a datum to the cache.
|
|
608
|
+
|
|
609
|
+
Parameters
|
|
610
|
+
----------
|
|
611
|
+
uid : str
|
|
612
|
+
The datum uid.
|
|
613
|
+
|
|
614
|
+
Returns
|
|
615
|
+
-------
|
|
616
|
+
int
|
|
617
|
+
The datum index.
|
|
618
|
+
"""
|
|
542
619
|
if uid not in self._evaluator.uid_to_index:
|
|
543
620
|
index = len(self._evaluator.uid_to_index)
|
|
544
621
|
self._evaluator.uid_to_index[uid] = index
|
|
@@ -546,6 +623,22 @@ class DataLoader:
|
|
|
546
623
|
return self._evaluator.uid_to_index[uid]
|
|
547
624
|
|
|
548
625
|
def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
|
|
626
|
+
"""
|
|
627
|
+
Helper function for adding a label to the cache.
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
label : tuple[str, str]
|
|
632
|
+
The label as a tuple in format (key, value).
|
|
633
|
+
|
|
634
|
+
Returns
|
|
635
|
+
-------
|
|
636
|
+
int
|
|
637
|
+
Label index.
|
|
638
|
+
int
|
|
639
|
+
Label key index.
|
|
640
|
+
"""
|
|
641
|
+
|
|
549
642
|
label_id = len(self._evaluator.index_to_label)
|
|
550
643
|
label_key_id = len(self._evaluator.index_to_label_key)
|
|
551
644
|
if label not in self._evaluator.label_to_index:
|
|
@@ -568,11 +661,140 @@ class DataLoader:
|
|
|
568
661
|
self._evaluator.label_key_to_index[label[0]],
|
|
569
662
|
)
|
|
570
663
|
|
|
664
|
+
def _add_data(
|
|
665
|
+
self,
|
|
666
|
+
uid_index: int,
|
|
667
|
+
keyed_groundtruths: dict,
|
|
668
|
+
keyed_predictions: dict,
|
|
669
|
+
):
|
|
670
|
+
gt_keys = set(keyed_groundtruths.keys())
|
|
671
|
+
pd_keys = set(keyed_predictions.keys())
|
|
672
|
+
joint_keys = gt_keys.intersection(pd_keys)
|
|
673
|
+
gt_unique_keys = gt_keys - pd_keys
|
|
674
|
+
pd_unique_keys = pd_keys - gt_keys
|
|
675
|
+
|
|
676
|
+
pairs = list()
|
|
677
|
+
for key in joint_keys:
|
|
678
|
+
n_predictions = len(keyed_predictions[key])
|
|
679
|
+
n_groundtruths = len(keyed_groundtruths[key])
|
|
680
|
+
boxes = np.array(
|
|
681
|
+
[
|
|
682
|
+
np.array([*gextrema, *pextrema])
|
|
683
|
+
for _, _, _, pextrema in keyed_predictions[key]
|
|
684
|
+
for _, _, gextrema in keyed_groundtruths[key]
|
|
685
|
+
]
|
|
686
|
+
)
|
|
687
|
+
ious = compute_iou(boxes)
|
|
688
|
+
mask_nonzero_iou = (ious > 1e-9).reshape(
|
|
689
|
+
(n_predictions, n_groundtruths)
|
|
690
|
+
)
|
|
691
|
+
mask_ious_halluc = ~(mask_nonzero_iou.any(axis=1))
|
|
692
|
+
mask_ious_misprd = ~(mask_nonzero_iou.any(axis=0))
|
|
693
|
+
|
|
694
|
+
pairs.extend(
|
|
695
|
+
[
|
|
696
|
+
np.array(
|
|
697
|
+
[
|
|
698
|
+
float(uid_index),
|
|
699
|
+
float(gidx),
|
|
700
|
+
float(pidx),
|
|
701
|
+
ious[pidx * len(keyed_groundtruths[key]) + gidx],
|
|
702
|
+
float(glabel),
|
|
703
|
+
float(plabel),
|
|
704
|
+
float(score),
|
|
705
|
+
]
|
|
706
|
+
)
|
|
707
|
+
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
708
|
+
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
709
|
+
if ious[pidx * len(keyed_groundtruths[key]) + gidx] > 1e-9
|
|
710
|
+
]
|
|
711
|
+
)
|
|
712
|
+
pairs.extend(
|
|
713
|
+
[
|
|
714
|
+
np.array(
|
|
715
|
+
[
|
|
716
|
+
float(uid_index),
|
|
717
|
+
-1.0,
|
|
718
|
+
float(pidx),
|
|
719
|
+
0.0,
|
|
720
|
+
-1.0,
|
|
721
|
+
float(plabel),
|
|
722
|
+
float(score),
|
|
723
|
+
]
|
|
724
|
+
)
|
|
725
|
+
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
726
|
+
if mask_ious_halluc[pidx]
|
|
727
|
+
]
|
|
728
|
+
)
|
|
729
|
+
pairs.extend(
|
|
730
|
+
[
|
|
731
|
+
np.array(
|
|
732
|
+
[
|
|
733
|
+
float(uid_index),
|
|
734
|
+
float(gidx),
|
|
735
|
+
-1.0,
|
|
736
|
+
0.0,
|
|
737
|
+
float(glabel),
|
|
738
|
+
-1.0,
|
|
739
|
+
-1.0,
|
|
740
|
+
]
|
|
741
|
+
)
|
|
742
|
+
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
743
|
+
if mask_ious_misprd[gidx]
|
|
744
|
+
]
|
|
745
|
+
)
|
|
746
|
+
for key in gt_unique_keys:
|
|
747
|
+
pairs.extend(
|
|
748
|
+
[
|
|
749
|
+
np.array(
|
|
750
|
+
[
|
|
751
|
+
float(uid_index),
|
|
752
|
+
float(gidx),
|
|
753
|
+
-1.0,
|
|
754
|
+
0.0,
|
|
755
|
+
float(glabel),
|
|
756
|
+
-1.0,
|
|
757
|
+
-1.0,
|
|
758
|
+
]
|
|
759
|
+
)
|
|
760
|
+
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
761
|
+
]
|
|
762
|
+
)
|
|
763
|
+
for key in pd_unique_keys:
|
|
764
|
+
pairs.extend(
|
|
765
|
+
[
|
|
766
|
+
np.array(
|
|
767
|
+
[
|
|
768
|
+
float(uid_index),
|
|
769
|
+
-1.0,
|
|
770
|
+
float(pidx),
|
|
771
|
+
0.0,
|
|
772
|
+
-1.0,
|
|
773
|
+
float(plabel),
|
|
774
|
+
float(score),
|
|
775
|
+
]
|
|
776
|
+
)
|
|
777
|
+
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
778
|
+
]
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
self.pairs.append(np.array(pairs))
|
|
782
|
+
|
|
571
783
|
def add_data(
|
|
572
784
|
self,
|
|
573
785
|
detections: list[Detection],
|
|
574
786
|
show_progress: bool = False,
|
|
575
787
|
):
|
|
788
|
+
"""
|
|
789
|
+
Adds detections to the cache.
|
|
790
|
+
|
|
791
|
+
Parameters
|
|
792
|
+
----------
|
|
793
|
+
detections : list[Detection]
|
|
794
|
+
A list of Detection objects.
|
|
795
|
+
show_progress : bool, default=False
|
|
796
|
+
Toggle for tqdm progress bar.
|
|
797
|
+
"""
|
|
576
798
|
disable_tqdm = not show_progress
|
|
577
799
|
for detection in tqdm(detections, disable=disable_tqdm):
|
|
578
800
|
|
|
@@ -584,10 +806,21 @@ class DataLoader:
|
|
|
584
806
|
# update datum uid index
|
|
585
807
|
uid_index = self._add_datum(uid=detection.uid)
|
|
586
808
|
|
|
809
|
+
# initialize bounding box examples
|
|
810
|
+
self._evaluator.groundtruth_examples[uid_index] = np.zeros(
|
|
811
|
+
(len(detection.groundtruths), 4), dtype=np.float16
|
|
812
|
+
)
|
|
813
|
+
self._evaluator.prediction_examples[uid_index] = np.zeros(
|
|
814
|
+
(len(detection.predictions), 4), dtype=np.float16
|
|
815
|
+
)
|
|
816
|
+
|
|
587
817
|
# cache labels and annotations
|
|
588
818
|
keyed_groundtruths = defaultdict(list)
|
|
589
819
|
keyed_predictions = defaultdict(list)
|
|
590
820
|
for gidx, gann in enumerate(detection.groundtruths):
|
|
821
|
+
self._evaluator.groundtruth_examples[uid_index][
|
|
822
|
+
gidx
|
|
823
|
+
] = np.array(gann.extrema)
|
|
591
824
|
for glabel in gann.labels:
|
|
592
825
|
label_idx, label_key_idx = self._add_label(glabel)
|
|
593
826
|
self.groundtruth_count[label_idx][uid_index] += 1
|
|
@@ -599,6 +832,9 @@ class DataLoader:
|
|
|
599
832
|
)
|
|
600
833
|
)
|
|
601
834
|
for pidx, pann in enumerate(detection.predictions):
|
|
835
|
+
self._evaluator.prediction_examples[uid_index][
|
|
836
|
+
pidx
|
|
837
|
+
] = np.array(pann.extrema)
|
|
602
838
|
for plabel, pscore in zip(pann.labels, pann.scores):
|
|
603
839
|
label_idx, label_key_idx = self._add_label(plabel)
|
|
604
840
|
self.prediction_count[label_idx][uid_index] += 1
|
|
@@ -611,83 +847,28 @@ class DataLoader:
|
|
|
611
847
|
)
|
|
612
848
|
)
|
|
613
849
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
pairs = list()
|
|
621
|
-
for key in joint_keys:
|
|
622
|
-
boxes = np.array(
|
|
623
|
-
[
|
|
624
|
-
np.array([*gextrema, *pextrema])
|
|
625
|
-
for _, _, _, pextrema in keyed_predictions[key]
|
|
626
|
-
for _, _, gextrema in keyed_groundtruths[key]
|
|
627
|
-
]
|
|
628
|
-
)
|
|
629
|
-
ious = compute_iou(boxes)
|
|
630
|
-
pairs.extend(
|
|
631
|
-
[
|
|
632
|
-
np.array(
|
|
633
|
-
[
|
|
634
|
-
float(uid_index),
|
|
635
|
-
float(gidx),
|
|
636
|
-
float(pidx),
|
|
637
|
-
ious[
|
|
638
|
-
pidx * len(keyed_groundtruths[key]) + gidx
|
|
639
|
-
],
|
|
640
|
-
float(glabel),
|
|
641
|
-
float(plabel),
|
|
642
|
-
float(score),
|
|
643
|
-
]
|
|
644
|
-
)
|
|
645
|
-
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
646
|
-
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
647
|
-
]
|
|
648
|
-
)
|
|
649
|
-
for key in gt_unique_keys:
|
|
650
|
-
pairs.extend(
|
|
651
|
-
[
|
|
652
|
-
np.array(
|
|
653
|
-
[
|
|
654
|
-
float(uid_index),
|
|
655
|
-
float(gidx),
|
|
656
|
-
-1.0,
|
|
657
|
-
0.0,
|
|
658
|
-
float(glabel),
|
|
659
|
-
-1.0,
|
|
660
|
-
-1.0,
|
|
661
|
-
]
|
|
662
|
-
)
|
|
663
|
-
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
664
|
-
]
|
|
665
|
-
)
|
|
666
|
-
for key in pd_unique_keys:
|
|
667
|
-
pairs.extend(
|
|
668
|
-
[
|
|
669
|
-
np.array(
|
|
670
|
-
[
|
|
671
|
-
float(uid_index),
|
|
672
|
-
-1.0,
|
|
673
|
-
float(pidx),
|
|
674
|
-
0.0,
|
|
675
|
-
-1.0,
|
|
676
|
-
float(plabel),
|
|
677
|
-
float(score),
|
|
678
|
-
]
|
|
679
|
-
)
|
|
680
|
-
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
681
|
-
]
|
|
682
|
-
)
|
|
683
|
-
|
|
684
|
-
self.pairs.append(np.array(pairs))
|
|
850
|
+
self._add_data(
|
|
851
|
+
uid_index=uid_index,
|
|
852
|
+
keyed_groundtruths=keyed_groundtruths,
|
|
853
|
+
keyed_predictions=keyed_predictions,
|
|
854
|
+
)
|
|
685
855
|
|
|
686
856
|
def add_data_from_valor_dict(
|
|
687
857
|
self,
|
|
688
858
|
detections: list[tuple[dict, dict]],
|
|
689
859
|
show_progress: bool = False,
|
|
690
860
|
):
|
|
861
|
+
"""
|
|
862
|
+
Adds Valor-format detections to the cache.
|
|
863
|
+
|
|
864
|
+
Parameters
|
|
865
|
+
----------
|
|
866
|
+
detections : list[tuple[dict, dict]]
|
|
867
|
+
A list of groundtruth, prediction pairs in Valor-format dictionaries.
|
|
868
|
+
show_progress : bool, default=False
|
|
869
|
+
Toggle for tqdm progress bar.
|
|
870
|
+
"""
|
|
871
|
+
|
|
691
872
|
def _get_bbox_extrema(
|
|
692
873
|
data: list[list[list[float]]],
|
|
693
874
|
) -> tuple[float, float, float, float]:
|
|
@@ -706,10 +887,21 @@ class DataLoader:
|
|
|
706
887
|
# update datum uid index
|
|
707
888
|
uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
|
|
708
889
|
|
|
890
|
+
# initialize bounding box examples
|
|
891
|
+
self._evaluator.groundtruth_examples[uid_index] = np.zeros(
|
|
892
|
+
(len(groundtruth["annotations"]), 4), dtype=np.float16
|
|
893
|
+
)
|
|
894
|
+
self._evaluator.prediction_examples[uid_index] = np.zeros(
|
|
895
|
+
(len(prediction["annotations"]), 4), dtype=np.float16
|
|
896
|
+
)
|
|
897
|
+
|
|
709
898
|
# cache labels and annotations
|
|
710
899
|
keyed_groundtruths = defaultdict(list)
|
|
711
900
|
keyed_predictions = defaultdict(list)
|
|
712
901
|
for gidx, gann in enumerate(groundtruth["annotations"]):
|
|
902
|
+
self._evaluator.groundtruth_examples[uid_index][
|
|
903
|
+
gidx
|
|
904
|
+
] = np.array(_get_bbox_extrema(gann["bounding_box"]))
|
|
713
905
|
for valor_label in gann["labels"]:
|
|
714
906
|
glabel = (valor_label["key"], valor_label["value"])
|
|
715
907
|
label_idx, label_key_idx = self._add_label(glabel)
|
|
@@ -722,6 +914,9 @@ class DataLoader:
|
|
|
722
914
|
)
|
|
723
915
|
)
|
|
724
916
|
for pidx, pann in enumerate(prediction["annotations"]):
|
|
917
|
+
self._evaluator.prediction_examples[uid_index][
|
|
918
|
+
pidx
|
|
919
|
+
] = np.array(_get_bbox_extrema(pann["bounding_box"]))
|
|
725
920
|
for valor_label in pann["labels"]:
|
|
726
921
|
plabel = (valor_label["key"], valor_label["value"])
|
|
727
922
|
pscore = valor_label["score"]
|
|
@@ -736,79 +931,21 @@ class DataLoader:
|
|
|
736
931
|
)
|
|
737
932
|
)
|
|
738
933
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
pairs = list()
|
|
746
|
-
for key in joint_keys:
|
|
747
|
-
boxes = np.array(
|
|
748
|
-
[
|
|
749
|
-
np.array([*gextrema, *pextrema])
|
|
750
|
-
for _, _, _, pextrema in keyed_predictions[key]
|
|
751
|
-
for _, _, gextrema in keyed_groundtruths[key]
|
|
752
|
-
]
|
|
753
|
-
)
|
|
754
|
-
ious = compute_iou(boxes)
|
|
755
|
-
pairs.extend(
|
|
756
|
-
[
|
|
757
|
-
np.array(
|
|
758
|
-
[
|
|
759
|
-
float(uid_index),
|
|
760
|
-
float(gidx),
|
|
761
|
-
float(pidx),
|
|
762
|
-
ious[
|
|
763
|
-
pidx * len(keyed_groundtruths[key]) + gidx
|
|
764
|
-
],
|
|
765
|
-
float(glabel),
|
|
766
|
-
float(plabel),
|
|
767
|
-
float(score),
|
|
768
|
-
]
|
|
769
|
-
)
|
|
770
|
-
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
771
|
-
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
772
|
-
]
|
|
773
|
-
)
|
|
774
|
-
for key in gt_unique_keys:
|
|
775
|
-
pairs.extend(
|
|
776
|
-
[
|
|
777
|
-
np.array(
|
|
778
|
-
[
|
|
779
|
-
float(uid_index),
|
|
780
|
-
float(gidx),
|
|
781
|
-
-1.0,
|
|
782
|
-
0.0,
|
|
783
|
-
float(glabel),
|
|
784
|
-
-1.0,
|
|
785
|
-
-1.0,
|
|
786
|
-
]
|
|
787
|
-
)
|
|
788
|
-
for gidx, glabel, _ in keyed_groundtruths[key]
|
|
789
|
-
]
|
|
790
|
-
)
|
|
791
|
-
for key in pd_unique_keys:
|
|
792
|
-
pairs.extend(
|
|
793
|
-
[
|
|
794
|
-
np.array(
|
|
795
|
-
[
|
|
796
|
-
float(uid_index),
|
|
797
|
-
-1.0,
|
|
798
|
-
float(pidx),
|
|
799
|
-
0.0,
|
|
800
|
-
-1.0,
|
|
801
|
-
float(plabel),
|
|
802
|
-
float(score),
|
|
803
|
-
]
|
|
804
|
-
)
|
|
805
|
-
for pidx, plabel, score, _ in keyed_predictions[key]
|
|
806
|
-
]
|
|
807
|
-
)
|
|
808
|
-
|
|
809
|
-
self.pairs.append(np.array(pairs))
|
|
934
|
+
self._add_data(
|
|
935
|
+
uid_index=uid_index,
|
|
936
|
+
keyed_groundtruths=keyed_groundtruths,
|
|
937
|
+
keyed_predictions=keyed_predictions,
|
|
938
|
+
)
|
|
810
939
|
|
|
811
940
|
def finalize(self) -> Evaluator:
|
|
941
|
+
"""
|
|
942
|
+
Performs data finalization and some preprocessing steps.
|
|
943
|
+
|
|
944
|
+
Returns
|
|
945
|
+
-------
|
|
946
|
+
Evaluator
|
|
947
|
+
A ready-to-use evaluator object.
|
|
948
|
+
"""
|
|
812
949
|
|
|
813
950
|
self.pairs = [pair for pair in self.pairs if pair.size > 0]
|
|
814
951
|
if len(self.pairs) == 0:
|
valor_lite/detection/metric.py
CHANGED
|
@@ -21,6 +21,25 @@ class MetricType(str, Enum):
|
|
|
21
21
|
PrecisionRecallCurve = "PrecisionRecallCurve"
|
|
22
22
|
DetailedCounts = "DetailedCounts"
|
|
23
23
|
|
|
24
|
+
@classmethod
|
|
25
|
+
def base_metrics(cls):
|
|
26
|
+
return [
|
|
27
|
+
cls.Counts,
|
|
28
|
+
cls.Accuracy,
|
|
29
|
+
cls.Precision,
|
|
30
|
+
cls.Recall,
|
|
31
|
+
cls.F1,
|
|
32
|
+
cls.AP,
|
|
33
|
+
cls.AR,
|
|
34
|
+
cls.mAP,
|
|
35
|
+
cls.mAR,
|
|
36
|
+
cls.APAveragedOverIOUs,
|
|
37
|
+
cls.mAPAveragedOverIOUs,
|
|
38
|
+
cls.ARAveragedOverScores,
|
|
39
|
+
cls.mARAveragedOverScores,
|
|
40
|
+
cls.PrecisionRecallCurve,
|
|
41
|
+
]
|
|
42
|
+
|
|
24
43
|
|
|
25
44
|
@dataclass
|
|
26
45
|
class Counts:
|
|
@@ -316,11 +335,19 @@ class DetailedCounts:
|
|
|
316
335
|
fp_hallucination: list[int]
|
|
317
336
|
fn_misclassification: list[int]
|
|
318
337
|
fn_missing_prediction: list[int]
|
|
319
|
-
tp_examples: list[list[str]]
|
|
320
|
-
fp_misclassification_examples: list[
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
338
|
+
tp_examples: list[list[tuple[str, tuple[float, float, float, float]]]]
|
|
339
|
+
fp_misclassification_examples: list[
|
|
340
|
+
list[tuple[str, tuple[float, float, float, float]]]
|
|
341
|
+
]
|
|
342
|
+
fp_hallucination_examples: list[
|
|
343
|
+
list[tuple[str, tuple[float, float, float, float]]]
|
|
344
|
+
]
|
|
345
|
+
fn_misclassification_examples: list[
|
|
346
|
+
list[tuple[str, tuple[float, float, float, float]]]
|
|
347
|
+
]
|
|
348
|
+
fn_missing_prediction_examples: list[
|
|
349
|
+
list[tuple[str, tuple[float, float, float, float]]]
|
|
350
|
+
]
|
|
324
351
|
score_thresholds: list[float]
|
|
325
352
|
iou_threshold: float
|
|
326
353
|
label: tuple[str, str]
|
|
@@ -335,13 +362,11 @@ class DetailedCounts:
|
|
|
335
362
|
"fp_hallucination": self.fp_hallucination,
|
|
336
363
|
"fn_misclassification": self.fn_misclassification,
|
|
337
364
|
"fn_missing_prediction": self.fn_missing_prediction,
|
|
338
|
-
"tn": None,
|
|
339
365
|
"tp_examples": self.tp_examples,
|
|
340
366
|
"fp_misclassification_examples": self.fp_misclassification_examples,
|
|
341
367
|
"fp_hallucination_examples": self.fp_hallucination_examples,
|
|
342
368
|
"fn_misclassification_examples": self.fn_misclassification_examples,
|
|
343
369
|
"fn_missing_prediction_examples": self.fn_missing_prediction_examples,
|
|
344
|
-
"tn_examples": None,
|
|
345
370
|
},
|
|
346
371
|
parameters={
|
|
347
372
|
"score_thresholds": self.score_thresholds,
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
|
|
3
|
+
valor_lite/detection/__init__.py,sha256=WHLHwHoKzXTBjkjC6E1_lhqB7gRWkiGWVWPqkKn-yK8,997
|
|
4
|
+
valor_lite/detection/annotation.py,sha256=c45pZD1Pp2vf5GeyW_6Kl9JCx5FoaaktCaaa4q3QDUo,1758
|
|
5
|
+
valor_lite/detection/computation.py,sha256=7PttK0VuOWlhRN92wpLVrGzB7RAdfdZyT3b1aTm_WaI,23214
|
|
6
|
+
valor_lite/detection/manager.py,sha256=ziVnukGs-WrkyBEBBO3LVSv4LTbaWFaWqLWarVosj2c,35807
|
|
7
|
+
valor_lite/detection/metric.py,sha256=DLqpODJZOG7SCqt7TCgR4am68PQORRCIQW_SXiTb1IA,9473
|
|
8
|
+
valor_lite-0.33.3.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
|
|
9
|
+
valor_lite-0.33.3.dist-info/METADATA,sha256=FBpd-wMWv-m37EK5BfFuiVmnJXg4GNzCaJrTDHv4-gE,1842
|
|
10
|
+
valor_lite-0.33.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
11
|
+
valor_lite-0.33.3.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
|
|
12
|
+
valor_lite-0.33.3.dist-info/RECORD,,
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
|
|
3
|
-
valor_lite/detection/__init__.py,sha256=WHLHwHoKzXTBjkjC6E1_lhqB7gRWkiGWVWPqkKn-yK8,997
|
|
4
|
-
valor_lite/detection/annotation.py,sha256=ON9iVa33pxysUmZVTCb0wNz-eFX6MDOqDhGDz-ouymc,1466
|
|
5
|
-
valor_lite/detection/computation.py,sha256=2FnVw6_dcAOvwCqpU9bIkeD7gPqDzfW48WSARnvKeOg,18873
|
|
6
|
-
valor_lite/detection/manager.py,sha256=HyODoIkmj92Kfspnpojp1pUY7noAw3FuCgQ36r6vMa4,32356
|
|
7
|
-
valor_lite/detection/metric.py,sha256=hHqClS7c71ztoUnfoaW3T7RmGYaVNU1SlM6vUs1P08I,8809
|
|
8
|
-
valor_lite-0.33.1.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
|
|
9
|
-
valor_lite-0.33.1.dist-info/METADATA,sha256=W36vWkCaas8e0H5RqfGwwlh5FritdeNO7bBj8r-lf6s,1842
|
|
10
|
-
valor_lite-0.33.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
11
|
-
valor_lite-0.33.1.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
|
|
12
|
-
valor_lite-0.33.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|