valor-lite 0.33.4__py3-none-any.whl → 0.33.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +30 -0
- valor_lite/classification/annotation.py +13 -0
- valor_lite/classification/computation.py +411 -0
- valor_lite/classification/manager.py +842 -0
- valor_lite/classification/metric.py +191 -0
- valor_lite/detection/__init__.py +11 -6
- valor_lite/detection/computation.py +208 -152
- valor_lite/detection/manager.py +354 -133
- valor_lite/detection/metric.py +60 -34
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/METADATA +1 -1
- valor_lite-0.33.6.dist-info/RECORD +17 -0
- valor_lite-0.33.4.dist-info/RECORD +0 -12
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.4.dist-info → valor_lite-0.33.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from valor_lite.schemas import Metric
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MetricType(Enum):
|
|
8
|
+
Counts = "Counts"
|
|
9
|
+
ROCAUC = "ROCAUC"
|
|
10
|
+
mROCAUC = "mROCAUC"
|
|
11
|
+
Precision = "Precision"
|
|
12
|
+
Recall = "Recall"
|
|
13
|
+
Accuracy = "Accuracy"
|
|
14
|
+
F1 = "F1"
|
|
15
|
+
ConfusionMatrix = "ConfusionMatrix"
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def base(cls):
|
|
19
|
+
return [
|
|
20
|
+
cls.Counts,
|
|
21
|
+
cls.ROCAUC,
|
|
22
|
+
cls.mROCAUC,
|
|
23
|
+
cls.Precision,
|
|
24
|
+
cls.Recall,
|
|
25
|
+
cls.Accuracy,
|
|
26
|
+
cls.F1,
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Counts:
|
|
32
|
+
tp: list[int]
|
|
33
|
+
fp: list[int]
|
|
34
|
+
fn: list[int]
|
|
35
|
+
tn: list[int]
|
|
36
|
+
score_thresholds: list[float]
|
|
37
|
+
hardmax: bool
|
|
38
|
+
label: tuple[str, str]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def metric(self) -> Metric:
|
|
42
|
+
return Metric(
|
|
43
|
+
type=type(self).__name__,
|
|
44
|
+
value={
|
|
45
|
+
"tp": self.tp,
|
|
46
|
+
"fp": self.fp,
|
|
47
|
+
"fn": self.fn,
|
|
48
|
+
"tn": self.tn,
|
|
49
|
+
},
|
|
50
|
+
parameters={
|
|
51
|
+
"score_thresholds": self.score_thresholds,
|
|
52
|
+
"hardmax": self.hardmax,
|
|
53
|
+
"label": {
|
|
54
|
+
"key": self.label[0],
|
|
55
|
+
"value": self.label[1],
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def to_dict(self) -> dict:
|
|
61
|
+
return self.metric.to_dict()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class _ThresholdValue:
|
|
66
|
+
value: list[float]
|
|
67
|
+
score_thresholds: list[float]
|
|
68
|
+
hardmax: bool
|
|
69
|
+
label: tuple[str, str]
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def metric(self) -> Metric:
|
|
73
|
+
return Metric(
|
|
74
|
+
type=type(self).__name__,
|
|
75
|
+
value=self.value,
|
|
76
|
+
parameters={
|
|
77
|
+
"score_thresholds": self.score_thresholds,
|
|
78
|
+
"hardmax": self.hardmax,
|
|
79
|
+
"label": {
|
|
80
|
+
"key": self.label[0],
|
|
81
|
+
"value": self.label[1],
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def to_dict(self) -> dict:
|
|
87
|
+
return self.metric.to_dict()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Precision(_ThresholdValue):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class Recall(_ThresholdValue):
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class Accuracy(_ThresholdValue):
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class F1(_ThresholdValue):
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class ROCAUC:
|
|
108
|
+
value: float
|
|
109
|
+
label: tuple[str, str]
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def metric(self) -> Metric:
|
|
113
|
+
return Metric(
|
|
114
|
+
type=type(self).__name__,
|
|
115
|
+
value=self.value,
|
|
116
|
+
parameters={
|
|
117
|
+
"label": {
|
|
118
|
+
"key": self.label[0],
|
|
119
|
+
"value": self.label[1],
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def to_dict(self) -> dict:
|
|
125
|
+
return self.metric.to_dict()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass
|
|
129
|
+
class mROCAUC:
|
|
130
|
+
value: float
|
|
131
|
+
label_key: str
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def metric(self) -> Metric:
|
|
135
|
+
return Metric(
|
|
136
|
+
type=type(self).__name__,
|
|
137
|
+
value=self.value,
|
|
138
|
+
parameters={
|
|
139
|
+
"label_key": self.label_key,
|
|
140
|
+
},
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def to_dict(self) -> dict:
|
|
144
|
+
return self.metric.to_dict()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@dataclass
|
|
148
|
+
class ConfusionMatrix:
|
|
149
|
+
confusion_matrix: dict[
|
|
150
|
+
str, # ground truth label value
|
|
151
|
+
dict[
|
|
152
|
+
str, # prediction label value
|
|
153
|
+
dict[
|
|
154
|
+
str, # either `count` or `examples`
|
|
155
|
+
int
|
|
156
|
+
| list[
|
|
157
|
+
dict[
|
|
158
|
+
str, # either `datum` or `score`
|
|
159
|
+
str | float, # datum uid # prediction score
|
|
160
|
+
]
|
|
161
|
+
],
|
|
162
|
+
],
|
|
163
|
+
],
|
|
164
|
+
]
|
|
165
|
+
missing_predictions: dict[
|
|
166
|
+
str, # ground truth label value
|
|
167
|
+
dict[
|
|
168
|
+
str, # either `count` or `examples`
|
|
169
|
+
int | list[dict[str, str]], # count or datum examples
|
|
170
|
+
],
|
|
171
|
+
]
|
|
172
|
+
score_threshold: float
|
|
173
|
+
label_key: str
|
|
174
|
+
number_of_examples: int
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def metric(self) -> Metric:
|
|
178
|
+
return Metric(
|
|
179
|
+
type=type(self).__name__,
|
|
180
|
+
value={
|
|
181
|
+
"confusion_matrix": self.confusion_matrix,
|
|
182
|
+
"missing_predictions": self.missing_predictions,
|
|
183
|
+
},
|
|
184
|
+
parameters={
|
|
185
|
+
"score_threshold": self.score_threshold,
|
|
186
|
+
"label_key": self.label_key,
|
|
187
|
+
},
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def to_dict(self) -> dict:
|
|
191
|
+
return self.metric.to_dict()
|
valor_lite/detection/__init__.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
from .annotation import Bitmask, BoundingBox, Detection, Polygon
|
|
2
2
|
from .computation import (
|
|
3
|
-
|
|
3
|
+
compute_bbox_iou,
|
|
4
|
+
compute_bitmask_iou,
|
|
5
|
+
compute_confusion_matrix,
|
|
4
6
|
compute_metrics,
|
|
7
|
+
compute_polygon_iou,
|
|
5
8
|
compute_ranked_pairs,
|
|
6
9
|
)
|
|
7
|
-
from .manager import DataLoader, Evaluator
|
|
10
|
+
from .manager import DataLoader, Evaluator
|
|
8
11
|
from .metric import (
|
|
9
12
|
AP,
|
|
10
13
|
AR,
|
|
@@ -12,8 +15,8 @@ from .metric import (
|
|
|
12
15
|
Accuracy,
|
|
13
16
|
APAveragedOverIOUs,
|
|
14
17
|
ARAveragedOverScores,
|
|
18
|
+
ConfusionMatrix,
|
|
15
19
|
Counts,
|
|
16
|
-
DetailedCounts,
|
|
17
20
|
MetricType,
|
|
18
21
|
Precision,
|
|
19
22
|
PrecisionRecallCurve,
|
|
@@ -44,11 +47,13 @@ __all__ = [
|
|
|
44
47
|
"ARAveragedOverScores",
|
|
45
48
|
"mARAveragedOverScores",
|
|
46
49
|
"PrecisionRecallCurve",
|
|
47
|
-
"
|
|
48
|
-
"
|
|
50
|
+
"ConfusionMatrix",
|
|
51
|
+
"compute_bbox_iou",
|
|
52
|
+
"compute_bitmask_iou",
|
|
53
|
+
"compute_polygon_iou",
|
|
49
54
|
"compute_ranked_pairs",
|
|
50
55
|
"compute_metrics",
|
|
51
|
-
"
|
|
56
|
+
"compute_confusion_matrix",
|
|
52
57
|
"DataLoader",
|
|
53
58
|
"Evaluator",
|
|
54
59
|
]
|
|
@@ -492,13 +492,52 @@ def compute_metrics(
|
|
|
492
492
|
)
|
|
493
493
|
|
|
494
494
|
|
|
495
|
-
def
|
|
495
|
+
def _count_with_examples(
|
|
496
|
+
data: NDArray[np.floating],
|
|
497
|
+
unique_idx: int | list[int],
|
|
498
|
+
label_idx: int | list[int],
|
|
499
|
+
) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
|
|
500
|
+
"""
|
|
501
|
+
Helper function for counting occurences of unique detailed pairs.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
data : NDArray[np.floating]
|
|
506
|
+
A masked portion of a detailed pairs array.
|
|
507
|
+
unique_idx : int | list[int]
|
|
508
|
+
The index or indices upon which uniqueness is constrained.
|
|
509
|
+
label_idx : int | list[int]
|
|
510
|
+
The index or indices within the unique index or indices that encode labels.
|
|
511
|
+
|
|
512
|
+
Returns
|
|
513
|
+
-------
|
|
514
|
+
NDArray[np.floating]
|
|
515
|
+
Examples drawn from the data input.
|
|
516
|
+
NDArray[np.int32]
|
|
517
|
+
Unique label indices.
|
|
518
|
+
NDArray[np.int32]
|
|
519
|
+
Counts for each unique label index.
|
|
520
|
+
"""
|
|
521
|
+
unique_rows, indices = np.unique(
|
|
522
|
+
data.astype(int)[:, unique_idx],
|
|
523
|
+
return_index=True,
|
|
524
|
+
axis=0,
|
|
525
|
+
)
|
|
526
|
+
examples = data[indices]
|
|
527
|
+
labels, counts = np.unique(
|
|
528
|
+
unique_rows[:, label_idx], return_counts=True, axis=0
|
|
529
|
+
)
|
|
530
|
+
return examples, labels, counts
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def compute_confusion_matrix(
|
|
496
534
|
data: NDArray[np.floating],
|
|
497
535
|
label_metadata: NDArray[np.int32],
|
|
498
536
|
iou_thresholds: NDArray[np.floating],
|
|
499
537
|
score_thresholds: NDArray[np.floating],
|
|
500
|
-
|
|
501
|
-
) -> NDArray[np.int32]:
|
|
538
|
+
n_examples: int,
|
|
539
|
+
) -> tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.int32]]:
|
|
540
|
+
|
|
502
541
|
"""
|
|
503
542
|
Compute detailed counts.
|
|
504
543
|
|
|
@@ -512,19 +551,6 @@ def compute_detailed_counts(
|
|
|
512
551
|
Index 5 - Prediction Label Index
|
|
513
552
|
Index 6 - Score
|
|
514
553
|
|
|
515
|
-
Outputs an array with shape (N_IoUs, N_Score, N_Labels, 5 * n_samples + 5):
|
|
516
|
-
|
|
517
|
-
Index 0 - True Positive Count
|
|
518
|
-
... Datum ID Examples
|
|
519
|
-
Index 2 * n_samples + 1 - False Positive Misclassification Count
|
|
520
|
-
... Datum ID Examples
|
|
521
|
-
Index 4 * n_samples + 2 - False Positive Hallucination Count
|
|
522
|
-
... Datum ID Examples
|
|
523
|
-
Index 6 * n_samples + 3 - False Negative Misclassification Count
|
|
524
|
-
... Datum ID Examples
|
|
525
|
-
Index 8 * n_samples + 4 - False Negative Missing Prediction Count
|
|
526
|
-
... Datum ID Examples
|
|
527
|
-
|
|
528
554
|
Parameters
|
|
529
555
|
----------
|
|
530
556
|
data : NDArray[np.floating]
|
|
@@ -535,28 +561,37 @@ def compute_detailed_counts(
|
|
|
535
561
|
A 1-D array containing IoU thresholds.
|
|
536
562
|
score_thresholds : NDArray[np.floating]
|
|
537
563
|
A 1-D array containing score thresholds.
|
|
538
|
-
|
|
539
|
-
The number of examples to return per count.
|
|
564
|
+
n_examples : int
|
|
565
|
+
The maximum number of examples to return per count.
|
|
540
566
|
|
|
541
567
|
Returns
|
|
542
568
|
-------
|
|
569
|
+
NDArray[np.floating]
|
|
570
|
+
Confusion matrix.
|
|
571
|
+
NDArray[np.floating]
|
|
572
|
+
Hallucinations.
|
|
543
573
|
NDArray[np.int32]
|
|
544
|
-
|
|
574
|
+
Missing Predictions.
|
|
545
575
|
"""
|
|
546
576
|
|
|
547
577
|
n_labels = label_metadata.shape[0]
|
|
548
578
|
n_ious = iou_thresholds.shape[0]
|
|
549
579
|
n_scores = score_thresholds.shape[0]
|
|
550
|
-
n_metrics = 5 * (2 * n_samples + 1)
|
|
551
|
-
|
|
552
|
-
tp_idx = 0
|
|
553
|
-
fp_misclf_idx = 2 * n_samples + 1
|
|
554
|
-
fp_halluc_idx = 4 * n_samples + 2
|
|
555
|
-
fn_misclf_idx = 6 * n_samples + 3
|
|
556
|
-
fn_misprd_idx = 8 * n_samples + 4
|
|
557
580
|
|
|
558
|
-
|
|
559
|
-
(
|
|
581
|
+
confusion_matrix = -1 * np.ones(
|
|
582
|
+
# (datum idx, gt idx, pd idx, pd score) * n_examples + count
|
|
583
|
+
(n_ious, n_scores, n_labels, n_labels, 4 * n_examples + 1),
|
|
584
|
+
dtype=np.float32,
|
|
585
|
+
)
|
|
586
|
+
hallucinations = -1 * np.ones(
|
|
587
|
+
# (datum idx, pd idx, pd score) * n_examples + count
|
|
588
|
+
(n_ious, n_scores, n_labels, 3 * n_examples + 1),
|
|
589
|
+
dtype=np.float32,
|
|
590
|
+
)
|
|
591
|
+
missing_predictions = -1 * np.ones(
|
|
592
|
+
# (datum idx, gt idx) * n_examples + count
|
|
593
|
+
(n_ious, n_scores, n_labels, 2 * n_examples + 1),
|
|
594
|
+
dtype=np.int32,
|
|
560
595
|
)
|
|
561
596
|
|
|
562
597
|
mask_gt_exists = data[:, 1] > -0.5
|
|
@@ -622,9 +657,9 @@ def compute_detailed_counts(
|
|
|
622
657
|
~mask_groundtruths_with_passing_score & mask_gt_exists
|
|
623
658
|
)
|
|
624
659
|
|
|
660
|
+
# create category masks
|
|
625
661
|
mask_tp = mask_score & mask_iou & mask_gt_pd_match
|
|
626
|
-
|
|
627
|
-
mask_fn_misclf = mask_iou & (
|
|
662
|
+
mask_misclf = mask_iou & (
|
|
628
663
|
(
|
|
629
664
|
~mask_score
|
|
630
665
|
& mask_gt_pd_match
|
|
@@ -632,143 +667,164 @@ def compute_detailed_counts(
|
|
|
632
667
|
)
|
|
633
668
|
| (mask_score & mask_gt_pd_mismatch)
|
|
634
669
|
)
|
|
635
|
-
|
|
636
|
-
|
|
670
|
+
mask_halluc = mask_score & mask_predictions_without_passing_ious
|
|
671
|
+
mask_misprd = (
|
|
637
672
|
mask_groundtruths_without_passing_ious
|
|
638
673
|
| mask_groundtruths_without_passing_score
|
|
639
674
|
)
|
|
640
675
|
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
mask_fp_misclf_is_tp = (
|
|
649
|
-
(fp_misclf.reshape(-1, 1, 3) == tp_pds.reshape(1, -1, 3))
|
|
676
|
+
# filter out true-positives from misclf and misprd
|
|
677
|
+
mask_gts_with_tp_override = (
|
|
678
|
+
(
|
|
679
|
+
data[mask_misclf][:, [0, 1]].reshape(-1, 1, 2)
|
|
680
|
+
== data[mask_tp][:, [0, 1]].reshape(1, -1, 2)
|
|
681
|
+
)
|
|
650
682
|
.all(axis=2)
|
|
651
683
|
.any(axis=1)
|
|
652
684
|
)
|
|
653
|
-
|
|
654
|
-
(
|
|
685
|
+
mask_pds_with_tp_override = (
|
|
686
|
+
(
|
|
687
|
+
data[mask_misclf][:, [0, 2]].reshape(-1, 1, 2)
|
|
688
|
+
== data[mask_tp][:, [0, 2]].reshape(1, -1, 2)
|
|
689
|
+
)
|
|
655
690
|
.all(axis=2)
|
|
656
691
|
.any(axis=1)
|
|
657
692
|
)
|
|
693
|
+
mask_misprd[mask_misclf] |= (
|
|
694
|
+
~mask_gts_with_tp_override & mask_pds_with_tp_override
|
|
695
|
+
)
|
|
696
|
+
mask_misclf[mask_misclf] &= (
|
|
697
|
+
~mask_gts_with_tp_override & ~mask_pds_with_tp_override
|
|
698
|
+
)
|
|
658
699
|
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
tp_count = np.bincount(tp[:, 2].astype(int), minlength=n_labels)
|
|
666
|
-
fp_misclf_count = np.bincount(
|
|
667
|
-
fp_misclf[:, 2].astype(int), minlength=n_labels
|
|
700
|
+
# count true positives
|
|
701
|
+
tp_examples, tp_labels, tp_counts = _count_with_examples(
|
|
702
|
+
data[mask_tp],
|
|
703
|
+
unique_idx=[0, 2, 5],
|
|
704
|
+
label_idx=2,
|
|
668
705
|
)
|
|
669
|
-
|
|
670
|
-
|
|
706
|
+
|
|
707
|
+
# count misclassifications
|
|
708
|
+
(
|
|
709
|
+
misclf_examples,
|
|
710
|
+
misclf_labels,
|
|
711
|
+
misclf_counts,
|
|
712
|
+
) = _count_with_examples(
|
|
713
|
+
data[mask_misclf], unique_idx=[0, 1, 2, 4, 5], label_idx=[3, 4]
|
|
671
714
|
)
|
|
672
|
-
|
|
673
|
-
|
|
715
|
+
|
|
716
|
+
# count hallucinations
|
|
717
|
+
(
|
|
718
|
+
halluc_examples,
|
|
719
|
+
halluc_labels,
|
|
720
|
+
halluc_counts,
|
|
721
|
+
) = _count_with_examples(
|
|
722
|
+
data[mask_halluc], unique_idx=[0, 2, 5], label_idx=2
|
|
674
723
|
)
|
|
675
|
-
|
|
676
|
-
|
|
724
|
+
|
|
725
|
+
# count missing predictions
|
|
726
|
+
(
|
|
727
|
+
misprd_examples,
|
|
728
|
+
misprd_labels,
|
|
729
|
+
misprd_counts,
|
|
730
|
+
) = _count_with_examples(
|
|
731
|
+
data[mask_misprd], unique_idx=[0, 1, 4], label_idx=2
|
|
677
732
|
)
|
|
678
733
|
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
iou_idx, score_idx,
|
|
682
|
-
] =
|
|
683
|
-
|
|
684
|
-
iou_idx,
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
734
|
+
# store the counts
|
|
735
|
+
confusion_matrix[
|
|
736
|
+
iou_idx, score_idx, tp_labels, tp_labels, 0
|
|
737
|
+
] = tp_counts
|
|
738
|
+
confusion_matrix[
|
|
739
|
+
iou_idx,
|
|
740
|
+
score_idx,
|
|
741
|
+
misclf_labels[:, 0],
|
|
742
|
+
misclf_labels[:, 1],
|
|
743
|
+
0,
|
|
744
|
+
] = misclf_counts
|
|
745
|
+
hallucinations[
|
|
746
|
+
iou_idx,
|
|
747
|
+
score_idx,
|
|
748
|
+
halluc_labels,
|
|
749
|
+
0,
|
|
750
|
+
] = halluc_counts
|
|
751
|
+
missing_predictions[
|
|
752
|
+
iou_idx,
|
|
753
|
+
score_idx,
|
|
754
|
+
misprd_labels,
|
|
755
|
+
0,
|
|
756
|
+
] = misprd_counts
|
|
757
|
+
|
|
758
|
+
# store examples
|
|
759
|
+
if n_examples > 0:
|
|
694
760
|
for label_idx in range(n_labels):
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
)
|
|
702
|
-
fp_misclf_examples = (
|
|
703
|
-
fp_misclf[fp_misclf[:, 2].astype(int) == label_idx][
|
|
704
|
-
:n_samples, [0, 1]
|
|
705
|
-
]
|
|
706
|
-
.astype(int)
|
|
707
|
-
.flatten()
|
|
708
|
-
)
|
|
709
|
-
fp_halluc_examples = (
|
|
710
|
-
fp_halluc[fp_halluc[:, 2].astype(int) == label_idx][
|
|
711
|
-
:n_samples, [0, 1]
|
|
712
|
-
]
|
|
713
|
-
.astype(int)
|
|
714
|
-
.flatten()
|
|
715
|
-
)
|
|
716
|
-
fn_misclf_examples = (
|
|
717
|
-
fn_misclf[fn_misclf[:, 2].astype(int) == label_idx][
|
|
718
|
-
:n_samples, [0, 1]
|
|
719
|
-
]
|
|
720
|
-
.astype(int)
|
|
721
|
-
.flatten()
|
|
722
|
-
)
|
|
723
|
-
fn_misprd_examples = (
|
|
724
|
-
fn_misprd[fn_misprd[:, 2].astype(int) == label_idx][
|
|
725
|
-
:n_samples, [0, 1]
|
|
761
|
+
|
|
762
|
+
# true-positive examples
|
|
763
|
+
mask_tp_label = tp_examples[:, 5] == label_idx
|
|
764
|
+
if mask_tp_label.sum() > 0:
|
|
765
|
+
tp_label_examples = tp_examples[mask_tp_label][
|
|
766
|
+
:n_examples
|
|
726
767
|
]
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
768
|
+
confusion_matrix[
|
|
769
|
+
iou_idx,
|
|
770
|
+
score_idx,
|
|
771
|
+
label_idx,
|
|
772
|
+
label_idx,
|
|
773
|
+
1 : 4 * tp_label_examples.shape[0] + 1,
|
|
774
|
+
] = tp_label_examples[:, [0, 1, 2, 6]].flatten()
|
|
775
|
+
|
|
776
|
+
# misclassification examples
|
|
777
|
+
mask_misclf_gt_label = misclf_examples[:, 4] == label_idx
|
|
778
|
+
if mask_misclf_gt_label.sum() > 0:
|
|
779
|
+
for pd_label_idx in range(n_labels):
|
|
780
|
+
mask_misclf_pd_label = (
|
|
781
|
+
misclf_examples[:, 5] == pd_label_idx
|
|
782
|
+
)
|
|
783
|
+
mask_misclf_label_combo = (
|
|
784
|
+
mask_misclf_gt_label & mask_misclf_pd_label
|
|
785
|
+
)
|
|
786
|
+
if mask_misclf_label_combo.sum() > 0:
|
|
787
|
+
misclf_label_examples = misclf_examples[
|
|
788
|
+
mask_misclf_label_combo
|
|
789
|
+
][:n_examples]
|
|
790
|
+
confusion_matrix[
|
|
791
|
+
iou_idx,
|
|
792
|
+
score_idx,
|
|
793
|
+
label_idx,
|
|
794
|
+
pd_label_idx,
|
|
795
|
+
1 : 4 * misclf_label_examples.shape[0] + 1,
|
|
796
|
+
] = misclf_label_examples[
|
|
797
|
+
:, [0, 1, 2, 6]
|
|
798
|
+
].flatten()
|
|
799
|
+
|
|
800
|
+
# hallucination examples
|
|
801
|
+
mask_halluc_label = halluc_examples[:, 5] == label_idx
|
|
802
|
+
if mask_halluc_label.sum() > 0:
|
|
803
|
+
halluc_label_examples = halluc_examples[
|
|
804
|
+
mask_halluc_label
|
|
805
|
+
][:n_examples]
|
|
806
|
+
hallucinations[
|
|
807
|
+
iou_idx,
|
|
808
|
+
score_idx,
|
|
809
|
+
label_idx,
|
|
810
|
+
1 : 3 * halluc_label_examples.shape[0] + 1,
|
|
811
|
+
] = halluc_label_examples[:, [0, 2, 6]].flatten()
|
|
812
|
+
|
|
813
|
+
# missing prediction examples
|
|
814
|
+
mask_misprd_label = misprd_examples[:, 4] == label_idx
|
|
815
|
+
if misprd_examples.size > 0:
|
|
816
|
+
misprd_label_examples = misprd_examples[
|
|
817
|
+
mask_misprd_label
|
|
818
|
+
][:n_examples]
|
|
819
|
+
missing_predictions[
|
|
820
|
+
iou_idx,
|
|
821
|
+
score_idx,
|
|
822
|
+
label_idx,
|
|
823
|
+
1 : 2 * misprd_label_examples.shape[0] + 1,
|
|
824
|
+
] = misprd_label_examples[:, [0, 1]].flatten()
|
|
825
|
+
|
|
826
|
+
return (
|
|
827
|
+
confusion_matrix,
|
|
828
|
+
hallucinations,
|
|
829
|
+
missing_predictions,
|
|
830
|
+
)
|