valor-lite 0.33.4__py3-none-any.whl → 0.33.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/detection/__init__.py +11 -6
- valor_lite/detection/computation.py +208 -152
- valor_lite/detection/manager.py +347 -128
- valor_lite/detection/metric.py +60 -34
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/METADATA +1 -1
- valor_lite-0.33.5.dist-info/RECORD +12 -0
- valor_lite-0.33.4.dist-info/RECORD +0 -12
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/top_level.txt +0 -0
valor_lite/detection/__init__.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
from .annotation import Bitmask, BoundingBox, Detection, Polygon
|
|
2
2
|
from .computation import (
|
|
3
|
-
|
|
3
|
+
compute_bbox_iou,
|
|
4
|
+
compute_bitmask_iou,
|
|
5
|
+
compute_confusion_matrix,
|
|
4
6
|
compute_metrics,
|
|
7
|
+
compute_polygon_iou,
|
|
5
8
|
compute_ranked_pairs,
|
|
6
9
|
)
|
|
7
|
-
from .manager import DataLoader, Evaluator
|
|
10
|
+
from .manager import DataLoader, Evaluator
|
|
8
11
|
from .metric import (
|
|
9
12
|
AP,
|
|
10
13
|
AR,
|
|
@@ -12,8 +15,8 @@ from .metric import (
|
|
|
12
15
|
Accuracy,
|
|
13
16
|
APAveragedOverIOUs,
|
|
14
17
|
ARAveragedOverScores,
|
|
18
|
+
ConfusionMatrix,
|
|
15
19
|
Counts,
|
|
16
|
-
DetailedCounts,
|
|
17
20
|
MetricType,
|
|
18
21
|
Precision,
|
|
19
22
|
PrecisionRecallCurve,
|
|
@@ -44,11 +47,13 @@ __all__ = [
|
|
|
44
47
|
"ARAveragedOverScores",
|
|
45
48
|
"mARAveragedOverScores",
|
|
46
49
|
"PrecisionRecallCurve",
|
|
47
|
-
"
|
|
48
|
-
"
|
|
50
|
+
"ConfusionMatrix",
|
|
51
|
+
"compute_bbox_iou",
|
|
52
|
+
"compute_bitmask_iou",
|
|
53
|
+
"compute_polygon_iou",
|
|
49
54
|
"compute_ranked_pairs",
|
|
50
55
|
"compute_metrics",
|
|
51
|
-
"
|
|
56
|
+
"compute_confusion_matrix",
|
|
52
57
|
"DataLoader",
|
|
53
58
|
"Evaluator",
|
|
54
59
|
]
|
|
@@ -492,13 +492,52 @@ def compute_metrics(
|
|
|
492
492
|
)
|
|
493
493
|
|
|
494
494
|
|
|
495
|
-
def
|
|
495
|
+
def _count_with_examples(
|
|
496
|
+
data: NDArray[np.floating],
|
|
497
|
+
unique_idx: int | list[int],
|
|
498
|
+
label_idx: int | list[int],
|
|
499
|
+
) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
|
|
500
|
+
"""
|
|
501
|
+
Helper function for counting occurences of unique detailed pairs.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
data : NDArray[np.floating]
|
|
506
|
+
A masked portion of a detailed pairs array.
|
|
507
|
+
unique_idx : int | list[int]
|
|
508
|
+
The index or indices upon which uniqueness is constrained.
|
|
509
|
+
label_idx : int | list[int]
|
|
510
|
+
The index or indices within the unique index or indices that encode labels.
|
|
511
|
+
|
|
512
|
+
Returns
|
|
513
|
+
-------
|
|
514
|
+
NDArray[np.floating]
|
|
515
|
+
Examples drawn from the data input.
|
|
516
|
+
NDArray[np.int32]
|
|
517
|
+
Unique label indices.
|
|
518
|
+
NDArray[np.int32]
|
|
519
|
+
Counts for each unique label index.
|
|
520
|
+
"""
|
|
521
|
+
unique_rows, indices = np.unique(
|
|
522
|
+
data.astype(int)[:, unique_idx],
|
|
523
|
+
return_index=True,
|
|
524
|
+
axis=0,
|
|
525
|
+
)
|
|
526
|
+
examples = data[indices]
|
|
527
|
+
labels, counts = np.unique(
|
|
528
|
+
unique_rows[:, label_idx], return_counts=True, axis=0
|
|
529
|
+
)
|
|
530
|
+
return examples, labels, counts
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def compute_confusion_matrix(
|
|
496
534
|
data: NDArray[np.floating],
|
|
497
535
|
label_metadata: NDArray[np.int32],
|
|
498
536
|
iou_thresholds: NDArray[np.floating],
|
|
499
537
|
score_thresholds: NDArray[np.floating],
|
|
500
|
-
|
|
501
|
-
) -> NDArray[np.int32]:
|
|
538
|
+
n_examples: int,
|
|
539
|
+
) -> tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.int32]]:
|
|
540
|
+
|
|
502
541
|
"""
|
|
503
542
|
Compute detailed counts.
|
|
504
543
|
|
|
@@ -512,19 +551,6 @@ def compute_detailed_counts(
|
|
|
512
551
|
Index 5 - Prediction Label Index
|
|
513
552
|
Index 6 - Score
|
|
514
553
|
|
|
515
|
-
Outputs an array with shape (N_IoUs, N_Score, N_Labels, 5 * n_samples + 5):
|
|
516
|
-
|
|
517
|
-
Index 0 - True Positive Count
|
|
518
|
-
... Datum ID Examples
|
|
519
|
-
Index 2 * n_samples + 1 - False Positive Misclassification Count
|
|
520
|
-
... Datum ID Examples
|
|
521
|
-
Index 4 * n_samples + 2 - False Positive Hallucination Count
|
|
522
|
-
... Datum ID Examples
|
|
523
|
-
Index 6 * n_samples + 3 - False Negative Misclassification Count
|
|
524
|
-
... Datum ID Examples
|
|
525
|
-
Index 8 * n_samples + 4 - False Negative Missing Prediction Count
|
|
526
|
-
... Datum ID Examples
|
|
527
|
-
|
|
528
554
|
Parameters
|
|
529
555
|
----------
|
|
530
556
|
data : NDArray[np.floating]
|
|
@@ -535,28 +561,37 @@ def compute_detailed_counts(
|
|
|
535
561
|
A 1-D array containing IoU thresholds.
|
|
536
562
|
score_thresholds : NDArray[np.floating]
|
|
537
563
|
A 1-D array containing score thresholds.
|
|
538
|
-
|
|
539
|
-
The number of examples to return per count.
|
|
564
|
+
n_examples : int
|
|
565
|
+
The maximum number of examples to return per count.
|
|
540
566
|
|
|
541
567
|
Returns
|
|
542
568
|
-------
|
|
569
|
+
NDArray[np.floating]
|
|
570
|
+
Confusion matrix.
|
|
571
|
+
NDArray[np.floating]
|
|
572
|
+
Hallucinations.
|
|
543
573
|
NDArray[np.int32]
|
|
544
|
-
|
|
574
|
+
Missing Predictions.
|
|
545
575
|
"""
|
|
546
576
|
|
|
547
577
|
n_labels = label_metadata.shape[0]
|
|
548
578
|
n_ious = iou_thresholds.shape[0]
|
|
549
579
|
n_scores = score_thresholds.shape[0]
|
|
550
|
-
n_metrics = 5 * (2 * n_samples + 1)
|
|
551
|
-
|
|
552
|
-
tp_idx = 0
|
|
553
|
-
fp_misclf_idx = 2 * n_samples + 1
|
|
554
|
-
fp_halluc_idx = 4 * n_samples + 2
|
|
555
|
-
fn_misclf_idx = 6 * n_samples + 3
|
|
556
|
-
fn_misprd_idx = 8 * n_samples + 4
|
|
557
580
|
|
|
558
|
-
|
|
559
|
-
(
|
|
581
|
+
confusion_matrix = -1 * np.ones(
|
|
582
|
+
# (datum idx, gt idx, pd idx, pd score) * n_examples + count
|
|
583
|
+
(n_ious, n_scores, n_labels, n_labels, 4 * n_examples + 1),
|
|
584
|
+
dtype=np.float32,
|
|
585
|
+
)
|
|
586
|
+
hallucinations = -1 * np.ones(
|
|
587
|
+
# (datum idx, pd idx, pd score) * n_examples + count
|
|
588
|
+
(n_ious, n_scores, n_labels, 3 * n_examples + 1),
|
|
589
|
+
dtype=np.float32,
|
|
590
|
+
)
|
|
591
|
+
missing_predictions = -1 * np.ones(
|
|
592
|
+
# (datum idx, gt idx) * n_examples + count
|
|
593
|
+
(n_ious, n_scores, n_labels, 2 * n_examples + 1),
|
|
594
|
+
dtype=np.int32,
|
|
560
595
|
)
|
|
561
596
|
|
|
562
597
|
mask_gt_exists = data[:, 1] > -0.5
|
|
@@ -622,9 +657,9 @@ def compute_detailed_counts(
|
|
|
622
657
|
~mask_groundtruths_with_passing_score & mask_gt_exists
|
|
623
658
|
)
|
|
624
659
|
|
|
660
|
+
# create category masks
|
|
625
661
|
mask_tp = mask_score & mask_iou & mask_gt_pd_match
|
|
626
|
-
|
|
627
|
-
mask_fn_misclf = mask_iou & (
|
|
662
|
+
mask_misclf = mask_iou & (
|
|
628
663
|
(
|
|
629
664
|
~mask_score
|
|
630
665
|
& mask_gt_pd_match
|
|
@@ -632,143 +667,164 @@ def compute_detailed_counts(
|
|
|
632
667
|
)
|
|
633
668
|
| (mask_score & mask_gt_pd_mismatch)
|
|
634
669
|
)
|
|
635
|
-
|
|
636
|
-
|
|
670
|
+
mask_halluc = mask_score & mask_predictions_without_passing_ious
|
|
671
|
+
mask_misprd = (
|
|
637
672
|
mask_groundtruths_without_passing_ious
|
|
638
673
|
| mask_groundtruths_without_passing_score
|
|
639
674
|
)
|
|
640
675
|
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
mask_fp_misclf_is_tp = (
|
|
649
|
-
(fp_misclf.reshape(-1, 1, 3) == tp_pds.reshape(1, -1, 3))
|
|
676
|
+
# filter out true-positives from misclf and misprd
|
|
677
|
+
mask_gts_with_tp_override = (
|
|
678
|
+
(
|
|
679
|
+
data[mask_misclf][:, [0, 1]].reshape(-1, 1, 2)
|
|
680
|
+
== data[mask_tp][:, [0, 1]].reshape(1, -1, 2)
|
|
681
|
+
)
|
|
650
682
|
.all(axis=2)
|
|
651
683
|
.any(axis=1)
|
|
652
684
|
)
|
|
653
|
-
|
|
654
|
-
(
|
|
685
|
+
mask_pds_with_tp_override = (
|
|
686
|
+
(
|
|
687
|
+
data[mask_misclf][:, [0, 2]].reshape(-1, 1, 2)
|
|
688
|
+
== data[mask_tp][:, [0, 2]].reshape(1, -1, 2)
|
|
689
|
+
)
|
|
655
690
|
.all(axis=2)
|
|
656
691
|
.any(axis=1)
|
|
657
692
|
)
|
|
693
|
+
mask_misprd[mask_misclf] |= (
|
|
694
|
+
~mask_gts_with_tp_override & mask_pds_with_tp_override
|
|
695
|
+
)
|
|
696
|
+
mask_misclf[mask_misclf] &= (
|
|
697
|
+
~mask_gts_with_tp_override & ~mask_pds_with_tp_override
|
|
698
|
+
)
|
|
658
699
|
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
tp_count = np.bincount(tp[:, 2].astype(int), minlength=n_labels)
|
|
666
|
-
fp_misclf_count = np.bincount(
|
|
667
|
-
fp_misclf[:, 2].astype(int), minlength=n_labels
|
|
700
|
+
# count true positives
|
|
701
|
+
tp_examples, tp_labels, tp_counts = _count_with_examples(
|
|
702
|
+
data[mask_tp],
|
|
703
|
+
unique_idx=[0, 2, 5],
|
|
704
|
+
label_idx=2,
|
|
668
705
|
)
|
|
669
|
-
|
|
670
|
-
|
|
706
|
+
|
|
707
|
+
# count misclassifications
|
|
708
|
+
(
|
|
709
|
+
misclf_examples,
|
|
710
|
+
misclf_labels,
|
|
711
|
+
misclf_counts,
|
|
712
|
+
) = _count_with_examples(
|
|
713
|
+
data[mask_misclf], unique_idx=[0, 1, 2, 4, 5], label_idx=[3, 4]
|
|
671
714
|
)
|
|
672
|
-
|
|
673
|
-
|
|
715
|
+
|
|
716
|
+
# count hallucinations
|
|
717
|
+
(
|
|
718
|
+
halluc_examples,
|
|
719
|
+
halluc_labels,
|
|
720
|
+
halluc_counts,
|
|
721
|
+
) = _count_with_examples(
|
|
722
|
+
data[mask_halluc], unique_idx=[0, 2, 5], label_idx=2
|
|
674
723
|
)
|
|
675
|
-
|
|
676
|
-
|
|
724
|
+
|
|
725
|
+
# count missing predictions
|
|
726
|
+
(
|
|
727
|
+
misprd_examples,
|
|
728
|
+
misprd_labels,
|
|
729
|
+
misprd_counts,
|
|
730
|
+
) = _count_with_examples(
|
|
731
|
+
data[mask_misprd], unique_idx=[0, 1, 4], label_idx=2
|
|
677
732
|
)
|
|
678
733
|
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
iou_idx, score_idx,
|
|
682
|
-
] =
|
|
683
|
-
|
|
684
|
-
iou_idx,
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
734
|
+
# store the counts
|
|
735
|
+
confusion_matrix[
|
|
736
|
+
iou_idx, score_idx, tp_labels, tp_labels, 0
|
|
737
|
+
] = tp_counts
|
|
738
|
+
confusion_matrix[
|
|
739
|
+
iou_idx,
|
|
740
|
+
score_idx,
|
|
741
|
+
misclf_labels[:, 0],
|
|
742
|
+
misclf_labels[:, 1],
|
|
743
|
+
0,
|
|
744
|
+
] = misclf_counts
|
|
745
|
+
hallucinations[
|
|
746
|
+
iou_idx,
|
|
747
|
+
score_idx,
|
|
748
|
+
halluc_labels,
|
|
749
|
+
0,
|
|
750
|
+
] = halluc_counts
|
|
751
|
+
missing_predictions[
|
|
752
|
+
iou_idx,
|
|
753
|
+
score_idx,
|
|
754
|
+
misprd_labels,
|
|
755
|
+
0,
|
|
756
|
+
] = misprd_counts
|
|
757
|
+
|
|
758
|
+
# store examples
|
|
759
|
+
if n_examples > 0:
|
|
694
760
|
for label_idx in range(n_labels):
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
)
|
|
702
|
-
fp_misclf_examples = (
|
|
703
|
-
fp_misclf[fp_misclf[:, 2].astype(int) == label_idx][
|
|
704
|
-
:n_samples, [0, 1]
|
|
705
|
-
]
|
|
706
|
-
.astype(int)
|
|
707
|
-
.flatten()
|
|
708
|
-
)
|
|
709
|
-
fp_halluc_examples = (
|
|
710
|
-
fp_halluc[fp_halluc[:, 2].astype(int) == label_idx][
|
|
711
|
-
:n_samples, [0, 1]
|
|
712
|
-
]
|
|
713
|
-
.astype(int)
|
|
714
|
-
.flatten()
|
|
715
|
-
)
|
|
716
|
-
fn_misclf_examples = (
|
|
717
|
-
fn_misclf[fn_misclf[:, 2].astype(int) == label_idx][
|
|
718
|
-
:n_samples, [0, 1]
|
|
719
|
-
]
|
|
720
|
-
.astype(int)
|
|
721
|
-
.flatten()
|
|
722
|
-
)
|
|
723
|
-
fn_misprd_examples = (
|
|
724
|
-
fn_misprd[fn_misprd[:, 2].astype(int) == label_idx][
|
|
725
|
-
:n_samples, [0, 1]
|
|
761
|
+
|
|
762
|
+
# true-positive examples
|
|
763
|
+
mask_tp_label = tp_examples[:, 5] == label_idx
|
|
764
|
+
if mask_tp_label.sum() > 0:
|
|
765
|
+
tp_label_examples = tp_examples[mask_tp_label][
|
|
766
|
+
:n_examples
|
|
726
767
|
]
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
768
|
+
confusion_matrix[
|
|
769
|
+
iou_idx,
|
|
770
|
+
score_idx,
|
|
771
|
+
label_idx,
|
|
772
|
+
label_idx,
|
|
773
|
+
1 : 4 * tp_label_examples.shape[0] + 1,
|
|
774
|
+
] = tp_label_examples[:, [0, 1, 2, 6]].flatten()
|
|
775
|
+
|
|
776
|
+
# misclassification examples
|
|
777
|
+
mask_misclf_gt_label = misclf_examples[:, 4] == label_idx
|
|
778
|
+
if mask_misclf_gt_label.sum() > 0:
|
|
779
|
+
for pd_label_idx in range(n_labels):
|
|
780
|
+
mask_misclf_pd_label = (
|
|
781
|
+
misclf_examples[:, 5] == pd_label_idx
|
|
782
|
+
)
|
|
783
|
+
mask_misclf_label_combo = (
|
|
784
|
+
mask_misclf_gt_label & mask_misclf_pd_label
|
|
785
|
+
)
|
|
786
|
+
if mask_misclf_label_combo.sum() > 0:
|
|
787
|
+
misclf_label_examples = misclf_examples[
|
|
788
|
+
mask_misclf_label_combo
|
|
789
|
+
][:n_examples]
|
|
790
|
+
confusion_matrix[
|
|
791
|
+
iou_idx,
|
|
792
|
+
score_idx,
|
|
793
|
+
label_idx,
|
|
794
|
+
pd_label_idx,
|
|
795
|
+
1 : 4 * misclf_label_examples.shape[0] + 1,
|
|
796
|
+
] = misclf_label_examples[
|
|
797
|
+
:, [0, 1, 2, 6]
|
|
798
|
+
].flatten()
|
|
799
|
+
|
|
800
|
+
# hallucination examples
|
|
801
|
+
mask_halluc_label = halluc_examples[:, 5] == label_idx
|
|
802
|
+
if mask_halluc_label.sum() > 0:
|
|
803
|
+
halluc_label_examples = halluc_examples[
|
|
804
|
+
mask_halluc_label
|
|
805
|
+
][:n_examples]
|
|
806
|
+
hallucinations[
|
|
807
|
+
iou_idx,
|
|
808
|
+
score_idx,
|
|
809
|
+
label_idx,
|
|
810
|
+
1 : 3 * halluc_label_examples.shape[0] + 1,
|
|
811
|
+
] = halluc_label_examples[:, [0, 2, 6]].flatten()
|
|
812
|
+
|
|
813
|
+
# missing prediction examples
|
|
814
|
+
mask_misprd_label = misprd_examples[:, 4] == label_idx
|
|
815
|
+
if misprd_examples.size > 0:
|
|
816
|
+
misprd_label_examples = misprd_examples[
|
|
817
|
+
mask_misprd_label
|
|
818
|
+
][:n_examples]
|
|
819
|
+
missing_predictions[
|
|
820
|
+
iou_idx,
|
|
821
|
+
score_idx,
|
|
822
|
+
label_idx,
|
|
823
|
+
1 : 2 * misprd_label_examples.shape[0] + 1,
|
|
824
|
+
] = misprd_label_examples[:, [0, 1]].flatten()
|
|
825
|
+
|
|
826
|
+
return (
|
|
827
|
+
confusion_matrix,
|
|
828
|
+
hallucinations,
|
|
829
|
+
missing_predictions,
|
|
830
|
+
)
|
valor_lite/detection/manager.py
CHANGED
|
@@ -14,7 +14,7 @@ from valor_lite.detection.annotation import (
|
|
|
14
14
|
from valor_lite.detection.computation import (
|
|
15
15
|
compute_bbox_iou,
|
|
16
16
|
compute_bitmask_iou,
|
|
17
|
-
|
|
17
|
+
compute_confusion_matrix,
|
|
18
18
|
compute_metrics,
|
|
19
19
|
compute_polygon_iou,
|
|
20
20
|
compute_ranked_pairs,
|
|
@@ -26,8 +26,8 @@ from valor_lite.detection.metric import (
|
|
|
26
26
|
Accuracy,
|
|
27
27
|
APAveragedOverIOUs,
|
|
28
28
|
ARAveragedOverScores,
|
|
29
|
+
ConfusionMatrix,
|
|
29
30
|
Counts,
|
|
30
|
-
DetailedCounts,
|
|
31
31
|
MetricType,
|
|
32
32
|
Precision,
|
|
33
33
|
PrecisionRecallCurve,
|
|
@@ -158,7 +158,8 @@ def compute_iou(
|
|
|
158
158
|
|
|
159
159
|
@dataclass
|
|
160
160
|
class Filter:
|
|
161
|
-
|
|
161
|
+
ranked_indices: NDArray[np.int32]
|
|
162
|
+
detailed_indices: NDArray[np.int32]
|
|
162
163
|
label_metadata: NDArray[np.int32]
|
|
163
164
|
|
|
164
165
|
|
|
@@ -257,12 +258,14 @@ class Evaluator:
|
|
|
257
258
|
Filter
|
|
258
259
|
A filter object that can be passed to the `evaluate` method.
|
|
259
260
|
"""
|
|
260
|
-
n_rows = self._ranked_pairs.shape[0]
|
|
261
261
|
|
|
262
262
|
n_datums = self._label_metadata_per_datum.shape[1]
|
|
263
263
|
n_labels = self._label_metadata_per_datum.shape[2]
|
|
264
264
|
|
|
265
|
-
|
|
265
|
+
mask_ranked = np.ones((self._ranked_pairs.shape[0], 1), dtype=np.bool_)
|
|
266
|
+
mask_detailed = np.ones(
|
|
267
|
+
(self._detailed_pairs.shape[0], 1), dtype=np.bool_
|
|
268
|
+
)
|
|
266
269
|
mask_datums = np.ones(n_datums, dtype=np.bool_)
|
|
267
270
|
mask_labels = np.ones(n_labels, dtype=np.bool_)
|
|
268
271
|
|
|
@@ -272,9 +275,12 @@ class Evaluator:
|
|
|
272
275
|
[self.uid_to_index[uid] for uid in datum_uids],
|
|
273
276
|
dtype=np.int32,
|
|
274
277
|
)
|
|
275
|
-
|
|
278
|
+
mask_ranked[
|
|
276
279
|
~np.isin(self._ranked_pairs[:, 0].astype(int), datum_uids)
|
|
277
280
|
] = False
|
|
281
|
+
mask_detailed[
|
|
282
|
+
~np.isin(self._detailed_pairs[:, 0].astype(int), datum_uids)
|
|
283
|
+
] = False
|
|
278
284
|
mask_datums[~np.isin(np.arange(n_datums), datum_uids)] = False
|
|
279
285
|
|
|
280
286
|
if labels is not None:
|
|
@@ -282,9 +288,12 @@ class Evaluator:
|
|
|
282
288
|
labels = np.array(
|
|
283
289
|
[self.label_to_index[label] for label in labels]
|
|
284
290
|
)
|
|
285
|
-
|
|
291
|
+
mask_ranked[
|
|
286
292
|
~np.isin(self._ranked_pairs[:, 4].astype(int), labels)
|
|
287
293
|
] = False
|
|
294
|
+
mask_detailed[
|
|
295
|
+
~np.isin(self._detailed_pairs[:, 4].astype(int), labels)
|
|
296
|
+
] = False
|
|
288
297
|
mask_labels[~np.isin(np.arange(n_labels), labels)] = False
|
|
289
298
|
|
|
290
299
|
if label_keys is not None:
|
|
@@ -297,14 +306,19 @@ class Evaluator:
|
|
|
297
306
|
if label_keys.size > 0
|
|
298
307
|
else np.array([])
|
|
299
308
|
)
|
|
300
|
-
|
|
309
|
+
mask_ranked[
|
|
301
310
|
~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
|
|
302
311
|
] = False
|
|
312
|
+
mask_detailed[
|
|
313
|
+
~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
|
|
314
|
+
] = False
|
|
303
315
|
mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
|
|
304
316
|
|
|
305
|
-
|
|
317
|
+
mask_label_metadata = (
|
|
318
|
+
mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
|
|
319
|
+
)
|
|
306
320
|
label_metadata_per_datum = self._label_metadata_per_datum.copy()
|
|
307
|
-
label_metadata_per_datum[:, ~
|
|
321
|
+
label_metadata_per_datum[:, ~mask_label_metadata] = 0
|
|
308
322
|
|
|
309
323
|
label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
|
|
310
324
|
label_metadata[:, :2] = np.transpose(
|
|
@@ -316,7 +330,8 @@ class Evaluator:
|
|
|
316
330
|
label_metadata[:, 2] = self._label_metadata[:, 2]
|
|
317
331
|
|
|
318
332
|
return Filter(
|
|
319
|
-
|
|
333
|
+
ranked_indices=np.where(mask_ranked)[0],
|
|
334
|
+
detailed_indices=np.where(mask_detailed)[0],
|
|
320
335
|
label_metadata=label_metadata,
|
|
321
336
|
)
|
|
322
337
|
|
|
@@ -340,7 +355,7 @@ class Evaluator:
|
|
|
340
355
|
score_thresholds : list[float]
|
|
341
356
|
A list of score thresholds to compute metrics over.
|
|
342
357
|
number_of_examples : int, default=0
|
|
343
|
-
|
|
358
|
+
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
344
359
|
filter_ : Filter, optional
|
|
345
360
|
An optional filter object.
|
|
346
361
|
|
|
@@ -350,10 +365,12 @@ class Evaluator:
|
|
|
350
365
|
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
351
366
|
"""
|
|
352
367
|
|
|
353
|
-
|
|
368
|
+
ranked_pairs = self._ranked_pairs
|
|
369
|
+
detailed_pairs = self._detailed_pairs
|
|
354
370
|
label_metadata = self._label_metadata
|
|
355
371
|
if filter_ is not None:
|
|
356
|
-
|
|
372
|
+
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
373
|
+
detailed_pairs = detailed_pairs[filter_.detailed_indices]
|
|
357
374
|
label_metadata = filter_.label_metadata
|
|
358
375
|
|
|
359
376
|
(
|
|
@@ -372,7 +389,7 @@ class Evaluator:
|
|
|
372
389
|
precision_recall,
|
|
373
390
|
pr_curves,
|
|
374
391
|
) = compute_metrics(
|
|
375
|
-
data=
|
|
392
|
+
data=ranked_pairs,
|
|
376
393
|
label_metadata=label_metadata,
|
|
377
394
|
iou_thresholds=np.array(iou_thresholds),
|
|
378
395
|
score_thresholds=np.array(score_thresholds),
|
|
@@ -527,11 +544,15 @@ class Evaluator:
|
|
|
527
544
|
)
|
|
528
545
|
)
|
|
529
546
|
|
|
530
|
-
if MetricType.
|
|
531
|
-
metrics[
|
|
547
|
+
if MetricType.ConfusionMatrix in metrics_to_return:
|
|
548
|
+
metrics[
|
|
549
|
+
MetricType.ConfusionMatrix
|
|
550
|
+
] = self._compute_confusion_matrix(
|
|
551
|
+
data=detailed_pairs,
|
|
552
|
+
label_metadata=label_metadata,
|
|
532
553
|
iou_thresholds=iou_thresholds,
|
|
533
554
|
score_thresholds=score_thresholds,
|
|
534
|
-
|
|
555
|
+
number_of_examples=number_of_examples,
|
|
535
556
|
)
|
|
536
557
|
|
|
537
558
|
for metric in set(metrics.keys()):
|
|
@@ -540,14 +561,281 @@ class Evaluator:
|
|
|
540
561
|
|
|
541
562
|
return metrics
|
|
542
563
|
|
|
543
|
-
def
|
|
564
|
+
def _unpack_confusion_matrix(
|
|
565
|
+
self,
|
|
566
|
+
confusion_matrix: NDArray[np.floating],
|
|
567
|
+
label_key_idx: int,
|
|
568
|
+
number_of_labels: int,
|
|
569
|
+
number_of_examples: int,
|
|
570
|
+
) -> dict[
|
|
571
|
+
str,
|
|
572
|
+
dict[
|
|
573
|
+
str,
|
|
574
|
+
dict[
|
|
575
|
+
str,
|
|
576
|
+
int
|
|
577
|
+
| list[
|
|
578
|
+
dict[
|
|
579
|
+
str,
|
|
580
|
+
str | float | tuple[float, float, float, float],
|
|
581
|
+
]
|
|
582
|
+
],
|
|
583
|
+
],
|
|
584
|
+
],
|
|
585
|
+
]:
|
|
586
|
+
"""
|
|
587
|
+
Unpacks a numpy array of confusion matrix counts and examples.
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
591
|
+
confusion_matrix[
|
|
592
|
+
gt_label_idx,
|
|
593
|
+
pd_label_idx,
|
|
594
|
+
example_idx * 4 + 1,
|
|
595
|
+
]
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
599
|
+
confusion_matrix[
|
|
600
|
+
gt_label_idx,
|
|
601
|
+
pd_label_idx,
|
|
602
|
+
example_idx * 4 + 2,
|
|
603
|
+
]
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
607
|
+
confusion_matrix[
|
|
608
|
+
gt_label_idx,
|
|
609
|
+
pd_label_idx,
|
|
610
|
+
example_idx * 4 + 3,
|
|
611
|
+
]
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
615
|
+
confusion_matrix[
|
|
616
|
+
gt_label_idx,
|
|
617
|
+
pd_label_idx,
|
|
618
|
+
example_idx * 4 + 4,
|
|
619
|
+
]
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
return {
|
|
623
|
+
self.index_to_label[gt_label_idx][1]: {
|
|
624
|
+
self.index_to_label[pd_label_idx][1]: {
|
|
625
|
+
"count": max(
|
|
626
|
+
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
627
|
+
0,
|
|
628
|
+
),
|
|
629
|
+
"examples": [
|
|
630
|
+
{
|
|
631
|
+
"datum": self.index_to_uid[
|
|
632
|
+
datum_idx(
|
|
633
|
+
gt_label_idx, pd_label_idx, example_idx
|
|
634
|
+
)
|
|
635
|
+
],
|
|
636
|
+
"groundtruth": tuple(
|
|
637
|
+
self.groundtruth_examples[
|
|
638
|
+
datum_idx(
|
|
639
|
+
gt_label_idx,
|
|
640
|
+
pd_label_idx,
|
|
641
|
+
example_idx,
|
|
642
|
+
)
|
|
643
|
+
][
|
|
644
|
+
groundtruth_idx(
|
|
645
|
+
gt_label_idx,
|
|
646
|
+
pd_label_idx,
|
|
647
|
+
example_idx,
|
|
648
|
+
)
|
|
649
|
+
].tolist()
|
|
650
|
+
),
|
|
651
|
+
"prediction": tuple(
|
|
652
|
+
self.prediction_examples[
|
|
653
|
+
datum_idx(
|
|
654
|
+
gt_label_idx,
|
|
655
|
+
pd_label_idx,
|
|
656
|
+
example_idx,
|
|
657
|
+
)
|
|
658
|
+
][
|
|
659
|
+
prediction_idx(
|
|
660
|
+
gt_label_idx,
|
|
661
|
+
pd_label_idx,
|
|
662
|
+
example_idx,
|
|
663
|
+
)
|
|
664
|
+
].tolist()
|
|
665
|
+
),
|
|
666
|
+
"score": score_idx(
|
|
667
|
+
gt_label_idx, pd_label_idx, example_idx
|
|
668
|
+
),
|
|
669
|
+
}
|
|
670
|
+
for example_idx in range(number_of_examples)
|
|
671
|
+
if datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
672
|
+
>= 0
|
|
673
|
+
],
|
|
674
|
+
}
|
|
675
|
+
for pd_label_idx in range(number_of_labels)
|
|
676
|
+
if (
|
|
677
|
+
self.label_index_to_label_key_index[pd_label_idx]
|
|
678
|
+
== label_key_idx
|
|
679
|
+
)
|
|
680
|
+
}
|
|
681
|
+
for gt_label_idx in range(number_of_labels)
|
|
682
|
+
if (
|
|
683
|
+
self.label_index_to_label_key_index[gt_label_idx]
|
|
684
|
+
== label_key_idx
|
|
685
|
+
)
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
def _unpack_hallucinations(
|
|
544
689
|
self,
|
|
690
|
+
hallucinations: NDArray[np.floating],
|
|
691
|
+
label_key_idx: int,
|
|
692
|
+
number_of_labels: int,
|
|
693
|
+
number_of_examples: int,
|
|
694
|
+
) -> dict[
|
|
695
|
+
str,
|
|
696
|
+
dict[
|
|
697
|
+
str,
|
|
698
|
+
int
|
|
699
|
+
| list[dict[str, str | float | tuple[float, float, float, float]]],
|
|
700
|
+
],
|
|
701
|
+
]:
|
|
702
|
+
"""
|
|
703
|
+
Unpacks a numpy array of hallucination counts and examples.
|
|
704
|
+
"""
|
|
705
|
+
|
|
706
|
+
datum_idx = (
|
|
707
|
+
lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
708
|
+
hallucinations[
|
|
709
|
+
pd_label_idx,
|
|
710
|
+
example_idx * 3 + 1,
|
|
711
|
+
]
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
prediction_idx = (
|
|
716
|
+
lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
717
|
+
hallucinations[
|
|
718
|
+
pd_label_idx,
|
|
719
|
+
example_idx * 3 + 2,
|
|
720
|
+
]
|
|
721
|
+
)
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
score_idx = (
|
|
725
|
+
lambda pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
726
|
+
hallucinations[
|
|
727
|
+
pd_label_idx,
|
|
728
|
+
example_idx * 3 + 3,
|
|
729
|
+
]
|
|
730
|
+
)
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
return {
|
|
734
|
+
self.index_to_label[pd_label_idx][1]: {
|
|
735
|
+
"count": max(
|
|
736
|
+
int(hallucinations[pd_label_idx, 0]),
|
|
737
|
+
0,
|
|
738
|
+
),
|
|
739
|
+
"examples": [
|
|
740
|
+
{
|
|
741
|
+
"datum": self.index_to_uid[
|
|
742
|
+
datum_idx(pd_label_idx, example_idx)
|
|
743
|
+
],
|
|
744
|
+
"prediction": tuple(
|
|
745
|
+
self.prediction_examples[
|
|
746
|
+
datum_idx(pd_label_idx, example_idx)
|
|
747
|
+
][
|
|
748
|
+
prediction_idx(pd_label_idx, example_idx)
|
|
749
|
+
].tolist()
|
|
750
|
+
),
|
|
751
|
+
"score": score_idx(pd_label_idx, example_idx),
|
|
752
|
+
}
|
|
753
|
+
for example_idx in range(number_of_examples)
|
|
754
|
+
if datum_idx(pd_label_idx, example_idx) >= 0
|
|
755
|
+
],
|
|
756
|
+
}
|
|
757
|
+
for pd_label_idx in range(number_of_labels)
|
|
758
|
+
if (
|
|
759
|
+
self.label_index_to_label_key_index[pd_label_idx]
|
|
760
|
+
== label_key_idx
|
|
761
|
+
)
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
def _unpack_missing_predictions(
|
|
765
|
+
self,
|
|
766
|
+
missing_predictions: NDArray[np.int32],
|
|
767
|
+
label_key_idx: int,
|
|
768
|
+
number_of_labels: int,
|
|
769
|
+
number_of_examples: int,
|
|
770
|
+
) -> dict[
|
|
771
|
+
str,
|
|
772
|
+
dict[
|
|
773
|
+
str,
|
|
774
|
+
int | list[dict[str, str | tuple[float, float, float, float]]],
|
|
775
|
+
],
|
|
776
|
+
]:
|
|
777
|
+
"""
|
|
778
|
+
Unpacks a numpy array of missing prediction counts and examples.
|
|
779
|
+
"""
|
|
780
|
+
|
|
781
|
+
datum_idx = (
|
|
782
|
+
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
783
|
+
missing_predictions[
|
|
784
|
+
gt_label_idx,
|
|
785
|
+
example_idx * 2 + 1,
|
|
786
|
+
]
|
|
787
|
+
)
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
groundtruth_idx = (
|
|
791
|
+
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
792
|
+
missing_predictions[
|
|
793
|
+
gt_label_idx,
|
|
794
|
+
example_idx * 2 + 2,
|
|
795
|
+
]
|
|
796
|
+
)
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
return {
|
|
800
|
+
self.index_to_label[gt_label_idx][1]: {
|
|
801
|
+
"count": max(
|
|
802
|
+
int(missing_predictions[gt_label_idx, 0]),
|
|
803
|
+
0,
|
|
804
|
+
),
|
|
805
|
+
"examples": [
|
|
806
|
+
{
|
|
807
|
+
"datum": self.index_to_uid[
|
|
808
|
+
datum_idx(gt_label_idx, example_idx)
|
|
809
|
+
],
|
|
810
|
+
"groundtruth": tuple(
|
|
811
|
+
self.groundtruth_examples[
|
|
812
|
+
datum_idx(gt_label_idx, example_idx)
|
|
813
|
+
][
|
|
814
|
+
groundtruth_idx(gt_label_idx, example_idx)
|
|
815
|
+
].tolist()
|
|
816
|
+
),
|
|
817
|
+
}
|
|
818
|
+
for example_idx in range(number_of_examples)
|
|
819
|
+
if datum_idx(gt_label_idx, example_idx) >= 0
|
|
820
|
+
],
|
|
821
|
+
}
|
|
822
|
+
for gt_label_idx in range(number_of_labels)
|
|
823
|
+
if (
|
|
824
|
+
self.label_index_to_label_key_index[gt_label_idx]
|
|
825
|
+
== label_key_idx
|
|
826
|
+
)
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
def _compute_confusion_matrix(
|
|
830
|
+
self,
|
|
831
|
+
data: NDArray[np.floating],
|
|
832
|
+
label_metadata: NDArray[np.int32],
|
|
545
833
|
iou_thresholds: list[float] = [0.5],
|
|
546
834
|
score_thresholds: list[float] = [
|
|
547
835
|
score / 10.0 for score in range(1, 11)
|
|
548
836
|
],
|
|
549
|
-
|
|
550
|
-
) -> list[
|
|
837
|
+
number_of_examples: int = 0,
|
|
838
|
+
) -> list[ConfusionMatrix]:
|
|
551
839
|
"""
|
|
552
840
|
Computes detailed counting metrics.
|
|
553
841
|
|
|
@@ -557,132 +845,63 @@ class Evaluator:
|
|
|
557
845
|
List of IoU thresholds to compute metrics for.
|
|
558
846
|
score_thresholds : list[float], default=[0.1,0.2,...,1.0]
|
|
559
847
|
List of confidence thresholds to compute metrics for.
|
|
560
|
-
|
|
561
|
-
|
|
848
|
+
number_of_examples : int, default=0
|
|
849
|
+
Maximum number of annotation examples to return per metric.
|
|
562
850
|
|
|
563
851
|
Returns
|
|
564
852
|
-------
|
|
565
|
-
list[list[
|
|
853
|
+
list[list[ConfusionMatrix]]
|
|
566
854
|
Outer list is indexed by label, inner list is by IoU.
|
|
567
855
|
"""
|
|
568
856
|
|
|
569
|
-
if
|
|
857
|
+
if data.size == 0:
|
|
570
858
|
return list()
|
|
571
859
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
860
|
+
(
|
|
861
|
+
confusion_matrix,
|
|
862
|
+
hallucinations,
|
|
863
|
+
missing_predictions,
|
|
864
|
+
) = compute_confusion_matrix(
|
|
865
|
+
data=data,
|
|
866
|
+
label_metadata=label_metadata,
|
|
575
867
|
iou_thresholds=np.array(iou_thresholds),
|
|
576
868
|
score_thresholds=np.array(score_thresholds),
|
|
577
|
-
|
|
869
|
+
n_examples=number_of_examples,
|
|
578
870
|
)
|
|
579
871
|
|
|
580
|
-
|
|
581
|
-
fp_misclf_idx = 2 * n_samples + 1
|
|
582
|
-
fp_halluc_idx = 4 * n_samples + 2
|
|
583
|
-
fn_misclf_idx = 6 * n_samples + 3
|
|
584
|
-
fn_misprd_idx = 8 * n_samples + 4
|
|
585
|
-
|
|
586
|
-
def _unpack_examples(
|
|
587
|
-
iou_idx: int,
|
|
588
|
-
label_idx: int,
|
|
589
|
-
type_idx: int,
|
|
590
|
-
example_source: dict[int, NDArray[np.float16]],
|
|
591
|
-
) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
|
|
592
|
-
"""
|
|
593
|
-
Unpacks metric examples from computation.
|
|
594
|
-
"""
|
|
595
|
-
type_idx += 1
|
|
596
|
-
|
|
597
|
-
results = list()
|
|
598
|
-
for score_idx in range(n_scores):
|
|
599
|
-
examples = list()
|
|
600
|
-
for example_idx in range(n_samples):
|
|
601
|
-
datum_idx = metrics[
|
|
602
|
-
iou_idx,
|
|
603
|
-
score_idx,
|
|
604
|
-
label_idx,
|
|
605
|
-
type_idx + example_idx * 2,
|
|
606
|
-
]
|
|
607
|
-
annotation_idx = metrics[
|
|
608
|
-
iou_idx,
|
|
609
|
-
score_idx,
|
|
610
|
-
label_idx,
|
|
611
|
-
type_idx + example_idx * 2 + 1,
|
|
612
|
-
]
|
|
613
|
-
if datum_idx >= 0:
|
|
614
|
-
examples.append(
|
|
615
|
-
(
|
|
616
|
-
self.index_to_uid[datum_idx],
|
|
617
|
-
tuple(
|
|
618
|
-
example_source[datum_idx][
|
|
619
|
-
annotation_idx
|
|
620
|
-
].tolist()
|
|
621
|
-
),
|
|
622
|
-
)
|
|
623
|
-
)
|
|
624
|
-
results.append(examples)
|
|
625
|
-
|
|
626
|
-
return results
|
|
627
|
-
|
|
628
|
-
n_ious, n_scores, n_labels, _ = metrics.shape
|
|
872
|
+
n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
|
|
629
873
|
return [
|
|
630
|
-
|
|
874
|
+
ConfusionMatrix(
|
|
631
875
|
iou_threshold=iou_thresholds[iou_idx],
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
.tolist(),
|
|
643
|
-
fn_misclassification=metrics[
|
|
644
|
-
iou_idx, :, label_idx, fn_misclf_idx
|
|
645
|
-
]
|
|
646
|
-
.astype(int)
|
|
647
|
-
.tolist(),
|
|
648
|
-
fn_missing_prediction=metrics[
|
|
649
|
-
iou_idx, :, label_idx, fn_misprd_idx
|
|
650
|
-
]
|
|
651
|
-
.astype(int)
|
|
652
|
-
.tolist(),
|
|
653
|
-
tp_examples=_unpack_examples(
|
|
654
|
-
iou_idx=iou_idx,
|
|
655
|
-
label_idx=label_idx,
|
|
656
|
-
type_idx=tp_idx,
|
|
657
|
-
example_source=self.prediction_examples,
|
|
658
|
-
),
|
|
659
|
-
fp_misclassification_examples=_unpack_examples(
|
|
660
|
-
iou_idx=iou_idx,
|
|
661
|
-
label_idx=label_idx,
|
|
662
|
-
type_idx=fp_misclf_idx,
|
|
663
|
-
example_source=self.prediction_examples,
|
|
664
|
-
),
|
|
665
|
-
fp_hallucination_examples=_unpack_examples(
|
|
666
|
-
iou_idx=iou_idx,
|
|
667
|
-
label_idx=label_idx,
|
|
668
|
-
type_idx=fp_halluc_idx,
|
|
669
|
-
example_source=self.prediction_examples,
|
|
876
|
+
score_threshold=score_thresholds[score_idx],
|
|
877
|
+
label_key=label_key,
|
|
878
|
+
number_of_examples=number_of_examples,
|
|
879
|
+
confusion_matrix=self._unpack_confusion_matrix(
|
|
880
|
+
confusion_matrix=confusion_matrix[
|
|
881
|
+
iou_idx, score_idx, :, :, :
|
|
882
|
+
],
|
|
883
|
+
label_key_idx=label_key_idx,
|
|
884
|
+
number_of_labels=n_labels,
|
|
885
|
+
number_of_examples=number_of_examples,
|
|
670
886
|
),
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
887
|
+
hallucinations=self._unpack_hallucinations(
|
|
888
|
+
hallucinations=hallucinations[iou_idx, score_idx, :, :],
|
|
889
|
+
label_key_idx=label_key_idx,
|
|
890
|
+
number_of_labels=n_labels,
|
|
891
|
+
number_of_examples=number_of_examples,
|
|
676
892
|
),
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
893
|
+
missing_predictions=self._unpack_missing_predictions(
|
|
894
|
+
missing_predictions=missing_predictions[
|
|
895
|
+
iou_idx, score_idx, :, :
|
|
896
|
+
],
|
|
897
|
+
label_key_idx=label_key_idx,
|
|
898
|
+
number_of_labels=n_labels,
|
|
899
|
+
number_of_examples=number_of_examples,
|
|
682
900
|
),
|
|
683
901
|
)
|
|
684
|
-
for
|
|
902
|
+
for label_key_idx, label_key in self.index_to_label_key.items()
|
|
685
903
|
for iou_idx in range(n_ious)
|
|
904
|
+
for score_idx in range(n_scores)
|
|
686
905
|
]
|
|
687
906
|
|
|
688
907
|
|
valor_lite/detection/metric.py
CHANGED
|
@@ -19,7 +19,7 @@ class MetricType(str, Enum):
|
|
|
19
19
|
ARAveragedOverScores = "ARAveragedOverScores"
|
|
20
20
|
mARAveragedOverScores = "mARAveragedOverScores"
|
|
21
21
|
PrecisionRecallCurve = "PrecisionRecallCurve"
|
|
22
|
-
|
|
22
|
+
ConfusionMatrix = "ConfusionMatrix"
|
|
23
23
|
|
|
24
24
|
@classmethod
|
|
25
25
|
def base_metrics(cls):
|
|
@@ -329,52 +329,78 @@ class PrecisionRecallCurve:
|
|
|
329
329
|
|
|
330
330
|
|
|
331
331
|
@dataclass
|
|
332
|
-
class
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
332
|
+
class ConfusionMatrix:
|
|
333
|
+
confusion_matrix: dict[
|
|
334
|
+
str, # ground truth label value
|
|
335
|
+
dict[
|
|
336
|
+
str, # prediction label value
|
|
337
|
+
dict[
|
|
338
|
+
str, # either `count` or `examples`
|
|
339
|
+
int
|
|
340
|
+
| list[
|
|
341
|
+
dict[
|
|
342
|
+
str, # either `datum`, `groundtruth`, `prediction` or score
|
|
343
|
+
str # datum uid
|
|
344
|
+
| tuple[
|
|
345
|
+
float, float, float, float
|
|
346
|
+
] # bounding box (xmin, xmax, ymin, ymax)
|
|
347
|
+
| float, # prediction score
|
|
348
|
+
]
|
|
349
|
+
],
|
|
350
|
+
],
|
|
351
|
+
],
|
|
341
352
|
]
|
|
342
|
-
|
|
343
|
-
|
|
353
|
+
hallucinations: dict[
|
|
354
|
+
str, # prediction label value
|
|
355
|
+
dict[
|
|
356
|
+
str, # either `count` or `examples`
|
|
357
|
+
int
|
|
358
|
+
| list[
|
|
359
|
+
dict[
|
|
360
|
+
str, # either `datum`, `prediction` or score
|
|
361
|
+
str # datum uid
|
|
362
|
+
| float # prediction score
|
|
363
|
+
| tuple[
|
|
364
|
+
float, float, float, float
|
|
365
|
+
], # bounding box (xmin, xmax, ymin, ymax)
|
|
366
|
+
]
|
|
367
|
+
],
|
|
368
|
+
],
|
|
344
369
|
]
|
|
345
|
-
|
|
346
|
-
|
|
370
|
+
missing_predictions: dict[
|
|
371
|
+
str, # ground truth label value
|
|
372
|
+
dict[
|
|
373
|
+
str, # either `count` or `examples`
|
|
374
|
+
int
|
|
375
|
+
| list[
|
|
376
|
+
dict[
|
|
377
|
+
str, # either `datum` or `groundtruth`
|
|
378
|
+
str # datum uid
|
|
379
|
+
| tuple[
|
|
380
|
+
float, float, float, float
|
|
381
|
+
], # bounding box (xmin, xmax, ymin, ymax)
|
|
382
|
+
]
|
|
383
|
+
],
|
|
384
|
+
],
|
|
347
385
|
]
|
|
348
|
-
|
|
349
|
-
list[tuple[str, tuple[float, float, float, float]]]
|
|
350
|
-
]
|
|
351
|
-
score_thresholds: list[float]
|
|
386
|
+
score_threshold: float
|
|
352
387
|
iou_threshold: float
|
|
353
|
-
|
|
388
|
+
label_key: str
|
|
389
|
+
number_of_examples: int
|
|
354
390
|
|
|
355
391
|
@property
|
|
356
392
|
def metric(self) -> Metric:
|
|
357
393
|
return Metric(
|
|
358
394
|
type=type(self).__name__,
|
|
359
395
|
value={
|
|
360
|
-
"
|
|
361
|
-
"
|
|
362
|
-
"
|
|
363
|
-
"fn_misclassification": self.fn_misclassification,
|
|
364
|
-
"fn_missing_prediction": self.fn_missing_prediction,
|
|
365
|
-
"tp_examples": self.tp_examples,
|
|
366
|
-
"fp_misclassification_examples": self.fp_misclassification_examples,
|
|
367
|
-
"fp_hallucination_examples": self.fp_hallucination_examples,
|
|
368
|
-
"fn_misclassification_examples": self.fn_misclassification_examples,
|
|
369
|
-
"fn_missing_prediction_examples": self.fn_missing_prediction_examples,
|
|
396
|
+
"confusion_matrix": self.confusion_matrix,
|
|
397
|
+
"hallucinations": self.hallucinations,
|
|
398
|
+
"missing_predictions": self.missing_predictions,
|
|
370
399
|
},
|
|
371
400
|
parameters={
|
|
372
|
-
"
|
|
401
|
+
"score_threshold": self.score_threshold,
|
|
373
402
|
"iou_threshold": self.iou_threshold,
|
|
374
|
-
"
|
|
375
|
-
"key": self.label[0],
|
|
376
|
-
"value": self.label[1],
|
|
377
|
-
},
|
|
403
|
+
"label_key": self.label_key,
|
|
378
404
|
},
|
|
379
405
|
)
|
|
380
406
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
|
|
3
|
+
valor_lite/detection/__init__.py,sha256=PiKfemo8FkZRzBhPSjhil8ahGURLy0Vk_iV25CB4UBU,1139
|
|
4
|
+
valor_lite/detection/annotation.py,sha256=BspLc3SjWXj6qYlGGpzDPHEZ8j7CiFzIL5cNlk0WCAM,2732
|
|
5
|
+
valor_lite/detection/computation.py,sha256=HDFfPTFQN2obm-g570KKDf7SP9V-h09OyMtFEJXsoTA,26323
|
|
6
|
+
valor_lite/detection/manager.py,sha256=ld2ytAw96UOO25iTwnfvAI1D2UY2Z1AGmP7cyCrT-V4,52801
|
|
7
|
+
valor_lite/detection/metric.py,sha256=RYKN17nEFRIZIqmotQa6OyNnU0nkjXyfFIclX_5hGpY,9933
|
|
8
|
+
valor_lite-0.33.5.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
|
|
9
|
+
valor_lite-0.33.5.dist-info/METADATA,sha256=WL0LQR2fT4CO4MuV0aXIkLPt3zQW2SsBS4MwcA_kHJY,1865
|
|
10
|
+
valor_lite-0.33.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
11
|
+
valor_lite-0.33.5.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
|
|
12
|
+
valor_lite-0.33.5.dist-info/RECORD,,
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
|
|
3
|
-
valor_lite/detection/__init__.py,sha256=taEB7NQBsyCSsMtvDA7E_FhDxMfJB1rax-Rl1ZtRMoE,1017
|
|
4
|
-
valor_lite/detection/annotation.py,sha256=BspLc3SjWXj6qYlGGpzDPHEZ8j7CiFzIL5cNlk0WCAM,2732
|
|
5
|
-
valor_lite/detection/computation.py,sha256=AsF9zb_c7XQ7z3LfOAtMPZDkmuCZmB8HeAMZJlCaO6U,24696
|
|
6
|
-
valor_lite/detection/manager.py,sha256=vnouYdx_Ul9jz_pOYt8xfvdPrNy0S4SB838KXvtS1Bw,45301
|
|
7
|
-
valor_lite/detection/metric.py,sha256=DLqpODJZOG7SCqt7TCgR4am68PQORRCIQW_SXiTb1IA,9473
|
|
8
|
-
valor_lite-0.33.4.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
|
|
9
|
-
valor_lite-0.33.4.dist-info/METADATA,sha256=Eqb7KlTizDcjIV7eWM67zgdbbbVICGURdGrbben2NrI,1865
|
|
10
|
-
valor_lite-0.33.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
11
|
-
valor_lite-0.33.4.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
|
|
12
|
-
valor_lite-0.33.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|