valor-lite 0.33.4__py3-none-any.whl → 0.33.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +30 -0
- valor_lite/classification/annotation.py +13 -0
- valor_lite/classification/computation.py +411 -0
- valor_lite/classification/manager.py +842 -0
- valor_lite/classification/metric.py +191 -0
- valor_lite/detection/__init__.py +11 -6
- valor_lite/detection/computation.py +208 -152
- valor_lite/detection/manager.py +354 -133
- valor_lite/detection/metric.py +60 -34
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/METADATA +1 -1
- valor_lite-0.33.6.dist-info/RECORD +17 -0
- valor_lite-0.33.4.dist-info/RECORD +0 -12
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/top_level.txt +0 -0
valor_lite/detection/manager.py
CHANGED
|
@@ -14,7 +14,7 @@ from valor_lite.detection.annotation import (
|
|
|
14
14
|
from valor_lite.detection.computation import (
|
|
15
15
|
compute_bbox_iou,
|
|
16
16
|
compute_bitmask_iou,
|
|
17
|
-
|
|
17
|
+
compute_confusion_matrix,
|
|
18
18
|
compute_metrics,
|
|
19
19
|
compute_polygon_iou,
|
|
20
20
|
compute_ranked_pairs,
|
|
@@ -26,8 +26,8 @@ from valor_lite.detection.metric import (
|
|
|
26
26
|
Accuracy,
|
|
27
27
|
APAveragedOverIOUs,
|
|
28
28
|
ARAveragedOverScores,
|
|
29
|
+
ConfusionMatrix,
|
|
29
30
|
Counts,
|
|
30
|
-
DetailedCounts,
|
|
31
31
|
MetricType,
|
|
32
32
|
Precision,
|
|
33
33
|
PrecisionRecallCurve,
|
|
@@ -158,7 +158,8 @@ def compute_iou(
|
|
|
158
158
|
|
|
159
159
|
@dataclass
|
|
160
160
|
class Filter:
|
|
161
|
-
|
|
161
|
+
ranked_indices: NDArray[np.int32]
|
|
162
|
+
detailed_indices: NDArray[np.int32]
|
|
162
163
|
label_metadata: NDArray[np.int32]
|
|
163
164
|
|
|
164
165
|
|
|
@@ -257,12 +258,14 @@ class Evaluator:
|
|
|
257
258
|
Filter
|
|
258
259
|
A filter object that can be passed to the `evaluate` method.
|
|
259
260
|
"""
|
|
260
|
-
n_rows = self._ranked_pairs.shape[0]
|
|
261
261
|
|
|
262
262
|
n_datums = self._label_metadata_per_datum.shape[1]
|
|
263
263
|
n_labels = self._label_metadata_per_datum.shape[2]
|
|
264
264
|
|
|
265
|
-
|
|
265
|
+
mask_ranked = np.ones((self._ranked_pairs.shape[0], 1), dtype=np.bool_)
|
|
266
|
+
mask_detailed = np.ones(
|
|
267
|
+
(self._detailed_pairs.shape[0], 1), dtype=np.bool_
|
|
268
|
+
)
|
|
266
269
|
mask_datums = np.ones(n_datums, dtype=np.bool_)
|
|
267
270
|
mask_labels = np.ones(n_labels, dtype=np.bool_)
|
|
268
271
|
|
|
@@ -272,9 +275,12 @@ class Evaluator:
|
|
|
272
275
|
[self.uid_to_index[uid] for uid in datum_uids],
|
|
273
276
|
dtype=np.int32,
|
|
274
277
|
)
|
|
275
|
-
|
|
278
|
+
mask_ranked[
|
|
276
279
|
~np.isin(self._ranked_pairs[:, 0].astype(int), datum_uids)
|
|
277
280
|
] = False
|
|
281
|
+
mask_detailed[
|
|
282
|
+
~np.isin(self._detailed_pairs[:, 0].astype(int), datum_uids)
|
|
283
|
+
] = False
|
|
278
284
|
mask_datums[~np.isin(np.arange(n_datums), datum_uids)] = False
|
|
279
285
|
|
|
280
286
|
if labels is not None:
|
|
@@ -282,9 +288,12 @@ class Evaluator:
|
|
|
282
288
|
labels = np.array(
|
|
283
289
|
[self.label_to_index[label] for label in labels]
|
|
284
290
|
)
|
|
285
|
-
|
|
291
|
+
mask_ranked[
|
|
286
292
|
~np.isin(self._ranked_pairs[:, 4].astype(int), labels)
|
|
287
293
|
] = False
|
|
294
|
+
mask_detailed[
|
|
295
|
+
~np.isin(self._detailed_pairs[:, 4].astype(int), labels)
|
|
296
|
+
] = False
|
|
288
297
|
mask_labels[~np.isin(np.arange(n_labels), labels)] = False
|
|
289
298
|
|
|
290
299
|
if label_keys is not None:
|
|
@@ -297,14 +306,19 @@ class Evaluator:
|
|
|
297
306
|
if label_keys.size > 0
|
|
298
307
|
else np.array([])
|
|
299
308
|
)
|
|
300
|
-
|
|
309
|
+
mask_ranked[
|
|
301
310
|
~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
|
|
302
311
|
] = False
|
|
312
|
+
mask_detailed[
|
|
313
|
+
~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
|
|
314
|
+
] = False
|
|
303
315
|
mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
|
|
304
316
|
|
|
305
|
-
|
|
317
|
+
mask_label_metadata = (
|
|
318
|
+
mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
|
|
319
|
+
)
|
|
306
320
|
label_metadata_per_datum = self._label_metadata_per_datum.copy()
|
|
307
|
-
label_metadata_per_datum[:, ~
|
|
321
|
+
label_metadata_per_datum[:, ~mask_label_metadata] = 0
|
|
308
322
|
|
|
309
323
|
label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
|
|
310
324
|
label_metadata[:, :2] = np.transpose(
|
|
@@ -316,7 +330,8 @@ class Evaluator:
|
|
|
316
330
|
label_metadata[:, 2] = self._label_metadata[:, 2]
|
|
317
331
|
|
|
318
332
|
return Filter(
|
|
319
|
-
|
|
333
|
+
ranked_indices=np.where(mask_ranked)[0],
|
|
334
|
+
detailed_indices=np.where(mask_detailed)[0],
|
|
320
335
|
label_metadata=label_metadata,
|
|
321
336
|
)
|
|
322
337
|
|
|
@@ -340,7 +355,7 @@ class Evaluator:
|
|
|
340
355
|
score_thresholds : list[float]
|
|
341
356
|
A list of score thresholds to compute metrics over.
|
|
342
357
|
number_of_examples : int, default=0
|
|
343
|
-
|
|
358
|
+
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
344
359
|
filter_ : Filter, optional
|
|
345
360
|
An optional filter object.
|
|
346
361
|
|
|
@@ -350,10 +365,12 @@ class Evaluator:
|
|
|
350
365
|
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
351
366
|
"""
|
|
352
367
|
|
|
353
|
-
|
|
368
|
+
ranked_pairs = self._ranked_pairs
|
|
369
|
+
detailed_pairs = self._detailed_pairs
|
|
354
370
|
label_metadata = self._label_metadata
|
|
355
371
|
if filter_ is not None:
|
|
356
|
-
|
|
372
|
+
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
373
|
+
detailed_pairs = detailed_pairs[filter_.detailed_indices]
|
|
357
374
|
label_metadata = filter_.label_metadata
|
|
358
375
|
|
|
359
376
|
(
|
|
@@ -372,7 +389,7 @@ class Evaluator:
|
|
|
372
389
|
precision_recall,
|
|
373
390
|
pr_curves,
|
|
374
391
|
) = compute_metrics(
|
|
375
|
-
data=
|
|
392
|
+
data=ranked_pairs,
|
|
376
393
|
label_metadata=label_metadata,
|
|
377
394
|
iou_thresholds=np.array(iou_thresholds),
|
|
378
395
|
score_thresholds=np.array(score_thresholds),
|
|
@@ -527,11 +544,15 @@ class Evaluator:
|
|
|
527
544
|
)
|
|
528
545
|
)
|
|
529
546
|
|
|
530
|
-
if MetricType.
|
|
531
|
-
metrics[
|
|
547
|
+
if MetricType.ConfusionMatrix in metrics_to_return:
|
|
548
|
+
metrics[
|
|
549
|
+
MetricType.ConfusionMatrix
|
|
550
|
+
] = self._compute_confusion_matrix(
|
|
551
|
+
data=detailed_pairs,
|
|
552
|
+
label_metadata=label_metadata,
|
|
532
553
|
iou_thresholds=iou_thresholds,
|
|
533
554
|
score_thresholds=score_thresholds,
|
|
534
|
-
|
|
555
|
+
number_of_examples=number_of_examples,
|
|
535
556
|
)
|
|
536
557
|
|
|
537
558
|
for metric in set(metrics.keys()):
|
|
@@ -540,149 +561,349 @@ class Evaluator:
|
|
|
540
561
|
|
|
541
562
|
return metrics
|
|
542
563
|
|
|
543
|
-
def
|
|
564
|
+
def _unpack_confusion_matrix(
|
|
565
|
+
self,
|
|
566
|
+
confusion_matrix: NDArray[np.floating],
|
|
567
|
+
label_key_idx: int,
|
|
568
|
+
number_of_labels: int,
|
|
569
|
+
number_of_examples: int,
|
|
570
|
+
) -> dict[
|
|
571
|
+
str,
|
|
572
|
+
dict[
|
|
573
|
+
str,
|
|
574
|
+
dict[
|
|
575
|
+
str,
|
|
576
|
+
int
|
|
577
|
+
| list[
|
|
578
|
+
dict[
|
|
579
|
+
str,
|
|
580
|
+
str | float | tuple[float, float, float, float],
|
|
581
|
+
]
|
|
582
|
+
],
|
|
583
|
+
],
|
|
584
|
+
],
|
|
585
|
+
]:
|
|
586
|
+
"""
|
|
587
|
+
Unpacks a numpy array of confusion matrix counts and examples.
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
591
|
+
confusion_matrix[
|
|
592
|
+
gt_label_idx,
|
|
593
|
+
pd_label_idx,
|
|
594
|
+
example_idx * 4 + 1,
|
|
595
|
+
]
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
599
|
+
confusion_matrix[
|
|
600
|
+
gt_label_idx,
|
|
601
|
+
pd_label_idx,
|
|
602
|
+
example_idx * 4 + 2,
|
|
603
|
+
]
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
607
|
+
confusion_matrix[
|
|
608
|
+
gt_label_idx,
|
|
609
|
+
pd_label_idx,
|
|
610
|
+
example_idx * 4 + 3,
|
|
611
|
+
]
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
615
|
+
confusion_matrix[
|
|
616
|
+
gt_label_idx,
|
|
617
|
+
pd_label_idx,
|
|
618
|
+
example_idx * 4 + 4,
|
|
619
|
+
]
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
return {
|
|
623
|
+
self.index_to_label[gt_label_idx][1]: {
|
|
624
|
+
self.index_to_label[pd_label_idx][1]: {
|
|
625
|
+
"count": max(
|
|
626
|
+
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
627
|
+
0,
|
|
628
|
+
),
|
|
629
|
+
"examples": [
|
|
630
|
+
{
|
|
631
|
+
"datum": self.index_to_uid[
|
|
632
|
+
datum_idx(
|
|
633
|
+
gt_label_idx, pd_label_idx, example_idx
|
|
634
|
+
)
|
|
635
|
+
],
|
|
636
|
+
"groundtruth": tuple(
|
|
637
|
+
self.groundtruth_examples[
|
|
638
|
+
datum_idx(
|
|
639
|
+
gt_label_idx,
|
|
640
|
+
pd_label_idx,
|
|
641
|
+
example_idx,
|
|
642
|
+
)
|
|
643
|
+
][
|
|
644
|
+
groundtruth_idx(
|
|
645
|
+
gt_label_idx,
|
|
646
|
+
pd_label_idx,
|
|
647
|
+
example_idx,
|
|
648
|
+
)
|
|
649
|
+
].tolist()
|
|
650
|
+
),
|
|
651
|
+
"prediction": tuple(
|
|
652
|
+
self.prediction_examples[
|
|
653
|
+
datum_idx(
|
|
654
|
+
gt_label_idx,
|
|
655
|
+
pd_label_idx,
|
|
656
|
+
example_idx,
|
|
657
|
+
)
|
|
658
|
+
][
|
|
659
|
+
prediction_idx(
|
|
660
|
+
gt_label_idx,
|
|
661
|
+
pd_label_idx,
|
|
662
|
+
example_idx,
|
|
663
|
+
)
|
|
664
|
+
].tolist()
|
|
665
|
+
),
|
|
666
|
+
"score": score_idx(
|
|
667
|
+
gt_label_idx, pd_label_idx, example_idx
|
|
668
|
+
),
|
|
669
|
+
}
|
|
670
|
+
for example_idx in range(number_of_examples)
|
|
671
|
+
if datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
672
|
+
>= 0
|
|
673
|
+
],
|
|
674
|
+
}
|
|
675
|
+
for pd_label_idx in range(number_of_labels)
|
|
676
|
+
if (
|
|
677
|
+
self.label_index_to_label_key_index[pd_label_idx]
|
|
678
|
+
== label_key_idx
|
|
679
|
+
)
|
|
680
|
+
}
|
|
681
|
+
for gt_label_idx in range(number_of_labels)
|
|
682
|
+
if (
|
|
683
|
+
self.label_index_to_label_key_index[gt_label_idx]
|
|
684
|
+
== label_key_idx
|
|
685
|
+
)
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
def _unpack_hallucinations(
|
|
689
|
+
self,
|
|
690
|
+
hallucinations: NDArray[np.floating],
|
|
691
|
+
label_key_idx: int,
|
|
692
|
+
number_of_labels: int,
|
|
693
|
+
number_of_examples: int,
|
|
694
|
+
) -> dict[
|
|
695
|
+
str,
|
|
696
|
+
dict[
|
|
697
|
+
str,
|
|
698
|
+
int
|
|
699
|
+
| list[dict[str, str | float | tuple[float, float, float, float]]],
|
|
700
|
+
],
|
|
701
|
+
]:
|
|
702
|
+
"""
|
|
703
|
+
Unpacks a numpy array of hallucination counts and examples.
|
|
704
|
+
"""
|
|
705
|
+
|
|
706
|
+
datum_idx = (
|
|
707
|
+
lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
708
|
+
hallucinations[
|
|
709
|
+
pd_label_idx,
|
|
710
|
+
example_idx * 3 + 1,
|
|
711
|
+
]
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
prediction_idx = (
|
|
716
|
+
lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
717
|
+
hallucinations[
|
|
718
|
+
pd_label_idx,
|
|
719
|
+
example_idx * 3 + 2,
|
|
720
|
+
]
|
|
721
|
+
)
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
score_idx = (
|
|
725
|
+
lambda pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
726
|
+
hallucinations[
|
|
727
|
+
pd_label_idx,
|
|
728
|
+
example_idx * 3 + 3,
|
|
729
|
+
]
|
|
730
|
+
)
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
return {
|
|
734
|
+
self.index_to_label[pd_label_idx][1]: {
|
|
735
|
+
"count": max(
|
|
736
|
+
int(hallucinations[pd_label_idx, 0]),
|
|
737
|
+
0,
|
|
738
|
+
),
|
|
739
|
+
"examples": [
|
|
740
|
+
{
|
|
741
|
+
"datum": self.index_to_uid[
|
|
742
|
+
datum_idx(pd_label_idx, example_idx)
|
|
743
|
+
],
|
|
744
|
+
"prediction": tuple(
|
|
745
|
+
self.prediction_examples[
|
|
746
|
+
datum_idx(pd_label_idx, example_idx)
|
|
747
|
+
][
|
|
748
|
+
prediction_idx(pd_label_idx, example_idx)
|
|
749
|
+
].tolist()
|
|
750
|
+
),
|
|
751
|
+
"score": score_idx(pd_label_idx, example_idx),
|
|
752
|
+
}
|
|
753
|
+
for example_idx in range(number_of_examples)
|
|
754
|
+
if datum_idx(pd_label_idx, example_idx) >= 0
|
|
755
|
+
],
|
|
756
|
+
}
|
|
757
|
+
for pd_label_idx in range(number_of_labels)
|
|
758
|
+
if (
|
|
759
|
+
self.label_index_to_label_key_index[pd_label_idx]
|
|
760
|
+
== label_key_idx
|
|
761
|
+
)
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
def _unpack_missing_predictions(
|
|
544
765
|
self,
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
766
|
+
missing_predictions: NDArray[np.int32],
|
|
767
|
+
label_key_idx: int,
|
|
768
|
+
number_of_labels: int,
|
|
769
|
+
number_of_examples: int,
|
|
770
|
+
) -> dict[
|
|
771
|
+
str,
|
|
772
|
+
dict[
|
|
773
|
+
str,
|
|
774
|
+
int | list[dict[str, str | tuple[float, float, float, float]]],
|
|
548
775
|
],
|
|
549
|
-
|
|
550
|
-
|
|
776
|
+
]:
|
|
777
|
+
"""
|
|
778
|
+
Unpacks a numpy array of missing prediction counts and examples.
|
|
779
|
+
"""
|
|
780
|
+
|
|
781
|
+
datum_idx = (
|
|
782
|
+
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
783
|
+
missing_predictions[
|
|
784
|
+
gt_label_idx,
|
|
785
|
+
example_idx * 2 + 1,
|
|
786
|
+
]
|
|
787
|
+
)
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
groundtruth_idx = (
|
|
791
|
+
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
792
|
+
missing_predictions[
|
|
793
|
+
gt_label_idx,
|
|
794
|
+
example_idx * 2 + 2,
|
|
795
|
+
]
|
|
796
|
+
)
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
return {
|
|
800
|
+
self.index_to_label[gt_label_idx][1]: {
|
|
801
|
+
"count": max(
|
|
802
|
+
int(missing_predictions[gt_label_idx, 0]),
|
|
803
|
+
0,
|
|
804
|
+
),
|
|
805
|
+
"examples": [
|
|
806
|
+
{
|
|
807
|
+
"datum": self.index_to_uid[
|
|
808
|
+
datum_idx(gt_label_idx, example_idx)
|
|
809
|
+
],
|
|
810
|
+
"groundtruth": tuple(
|
|
811
|
+
self.groundtruth_examples[
|
|
812
|
+
datum_idx(gt_label_idx, example_idx)
|
|
813
|
+
][
|
|
814
|
+
groundtruth_idx(gt_label_idx, example_idx)
|
|
815
|
+
].tolist()
|
|
816
|
+
),
|
|
817
|
+
}
|
|
818
|
+
for example_idx in range(number_of_examples)
|
|
819
|
+
if datum_idx(gt_label_idx, example_idx) >= 0
|
|
820
|
+
],
|
|
821
|
+
}
|
|
822
|
+
for gt_label_idx in range(number_of_labels)
|
|
823
|
+
if (
|
|
824
|
+
self.label_index_to_label_key_index[gt_label_idx]
|
|
825
|
+
== label_key_idx
|
|
826
|
+
)
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
def _compute_confusion_matrix(
|
|
830
|
+
self,
|
|
831
|
+
data: NDArray[np.floating],
|
|
832
|
+
label_metadata: NDArray[np.int32],
|
|
833
|
+
iou_thresholds: list[float],
|
|
834
|
+
score_thresholds: list[float],
|
|
835
|
+
number_of_examples: int,
|
|
836
|
+
) -> list[ConfusionMatrix]:
|
|
551
837
|
"""
|
|
552
838
|
Computes detailed counting metrics.
|
|
553
839
|
|
|
554
840
|
Parameters
|
|
555
841
|
----------
|
|
556
|
-
|
|
842
|
+
data : NDArray[np.floating]
|
|
843
|
+
An array containing detailed pairs of detections.
|
|
844
|
+
label_metadata : NDArray[np.int32]
|
|
845
|
+
An array containing label metadata.
|
|
846
|
+
iou_thresholds : list[float]
|
|
557
847
|
List of IoU thresholds to compute metrics for.
|
|
558
|
-
score_thresholds : list[float]
|
|
848
|
+
score_thresholds : list[float]
|
|
559
849
|
List of confidence thresholds to compute metrics for.
|
|
560
|
-
|
|
561
|
-
|
|
850
|
+
number_of_examples : int
|
|
851
|
+
Maximum number of annotation examples to return per metric.
|
|
562
852
|
|
|
563
853
|
Returns
|
|
564
854
|
-------
|
|
565
|
-
list[list[
|
|
855
|
+
list[list[ConfusionMatrix]]
|
|
566
856
|
Outer list is indexed by label, inner list is by IoU.
|
|
567
857
|
"""
|
|
568
858
|
|
|
569
|
-
if
|
|
859
|
+
if data.size == 0:
|
|
570
860
|
return list()
|
|
571
861
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
862
|
+
(
|
|
863
|
+
confusion_matrix,
|
|
864
|
+
hallucinations,
|
|
865
|
+
missing_predictions,
|
|
866
|
+
) = compute_confusion_matrix(
|
|
867
|
+
data=data,
|
|
868
|
+
label_metadata=label_metadata,
|
|
575
869
|
iou_thresholds=np.array(iou_thresholds),
|
|
576
870
|
score_thresholds=np.array(score_thresholds),
|
|
577
|
-
|
|
871
|
+
n_examples=number_of_examples,
|
|
578
872
|
)
|
|
579
873
|
|
|
580
|
-
|
|
581
|
-
fp_misclf_idx = 2 * n_samples + 1
|
|
582
|
-
fp_halluc_idx = 4 * n_samples + 2
|
|
583
|
-
fn_misclf_idx = 6 * n_samples + 3
|
|
584
|
-
fn_misprd_idx = 8 * n_samples + 4
|
|
585
|
-
|
|
586
|
-
def _unpack_examples(
|
|
587
|
-
iou_idx: int,
|
|
588
|
-
label_idx: int,
|
|
589
|
-
type_idx: int,
|
|
590
|
-
example_source: dict[int, NDArray[np.float16]],
|
|
591
|
-
) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
|
|
592
|
-
"""
|
|
593
|
-
Unpacks metric examples from computation.
|
|
594
|
-
"""
|
|
595
|
-
type_idx += 1
|
|
596
|
-
|
|
597
|
-
results = list()
|
|
598
|
-
for score_idx in range(n_scores):
|
|
599
|
-
examples = list()
|
|
600
|
-
for example_idx in range(n_samples):
|
|
601
|
-
datum_idx = metrics[
|
|
602
|
-
iou_idx,
|
|
603
|
-
score_idx,
|
|
604
|
-
label_idx,
|
|
605
|
-
type_idx + example_idx * 2,
|
|
606
|
-
]
|
|
607
|
-
annotation_idx = metrics[
|
|
608
|
-
iou_idx,
|
|
609
|
-
score_idx,
|
|
610
|
-
label_idx,
|
|
611
|
-
type_idx + example_idx * 2 + 1,
|
|
612
|
-
]
|
|
613
|
-
if datum_idx >= 0:
|
|
614
|
-
examples.append(
|
|
615
|
-
(
|
|
616
|
-
self.index_to_uid[datum_idx],
|
|
617
|
-
tuple(
|
|
618
|
-
example_source[datum_idx][
|
|
619
|
-
annotation_idx
|
|
620
|
-
].tolist()
|
|
621
|
-
),
|
|
622
|
-
)
|
|
623
|
-
)
|
|
624
|
-
results.append(examples)
|
|
625
|
-
|
|
626
|
-
return results
|
|
627
|
-
|
|
628
|
-
n_ious, n_scores, n_labels, _ = metrics.shape
|
|
874
|
+
n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
|
|
629
875
|
return [
|
|
630
|
-
|
|
876
|
+
ConfusionMatrix(
|
|
631
877
|
iou_threshold=iou_thresholds[iou_idx],
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
.tolist(),
|
|
643
|
-
fn_misclassification=metrics[
|
|
644
|
-
iou_idx, :, label_idx, fn_misclf_idx
|
|
645
|
-
]
|
|
646
|
-
.astype(int)
|
|
647
|
-
.tolist(),
|
|
648
|
-
fn_missing_prediction=metrics[
|
|
649
|
-
iou_idx, :, label_idx, fn_misprd_idx
|
|
650
|
-
]
|
|
651
|
-
.astype(int)
|
|
652
|
-
.tolist(),
|
|
653
|
-
tp_examples=_unpack_examples(
|
|
654
|
-
iou_idx=iou_idx,
|
|
655
|
-
label_idx=label_idx,
|
|
656
|
-
type_idx=tp_idx,
|
|
657
|
-
example_source=self.prediction_examples,
|
|
658
|
-
),
|
|
659
|
-
fp_misclassification_examples=_unpack_examples(
|
|
660
|
-
iou_idx=iou_idx,
|
|
661
|
-
label_idx=label_idx,
|
|
662
|
-
type_idx=fp_misclf_idx,
|
|
663
|
-
example_source=self.prediction_examples,
|
|
664
|
-
),
|
|
665
|
-
fp_hallucination_examples=_unpack_examples(
|
|
666
|
-
iou_idx=iou_idx,
|
|
667
|
-
label_idx=label_idx,
|
|
668
|
-
type_idx=fp_halluc_idx,
|
|
669
|
-
example_source=self.prediction_examples,
|
|
878
|
+
score_threshold=score_thresholds[score_idx],
|
|
879
|
+
label_key=label_key,
|
|
880
|
+
number_of_examples=number_of_examples,
|
|
881
|
+
confusion_matrix=self._unpack_confusion_matrix(
|
|
882
|
+
confusion_matrix=confusion_matrix[
|
|
883
|
+
iou_idx, score_idx, :, :, :
|
|
884
|
+
],
|
|
885
|
+
label_key_idx=label_key_idx,
|
|
886
|
+
number_of_labels=n_labels,
|
|
887
|
+
number_of_examples=number_of_examples,
|
|
670
888
|
),
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
889
|
+
hallucinations=self._unpack_hallucinations(
|
|
890
|
+
hallucinations=hallucinations[iou_idx, score_idx, :, :],
|
|
891
|
+
label_key_idx=label_key_idx,
|
|
892
|
+
number_of_labels=n_labels,
|
|
893
|
+
number_of_examples=number_of_examples,
|
|
676
894
|
),
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
895
|
+
missing_predictions=self._unpack_missing_predictions(
|
|
896
|
+
missing_predictions=missing_predictions[
|
|
897
|
+
iou_idx, score_idx, :, :
|
|
898
|
+
],
|
|
899
|
+
label_key_idx=label_key_idx,
|
|
900
|
+
number_of_labels=n_labels,
|
|
901
|
+
number_of_examples=number_of_examples,
|
|
682
902
|
),
|
|
683
903
|
)
|
|
684
|
-
for
|
|
904
|
+
for label_key_idx, label_key in self.index_to_label_key.items()
|
|
685
905
|
for iou_idx in range(n_ious)
|
|
906
|
+
for score_idx in range(n_scores)
|
|
686
907
|
]
|
|
687
908
|
|
|
688
909
|
|