valor-lite 0.33.3__py3-none-any.whl → 0.33.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/detection/__init__.py +12 -7
- valor_lite/detection/annotation.py +34 -2
- valor_lite/detection/computation.py +285 -164
- valor_lite/detection/manager.py +693 -202
- valor_lite/detection/metric.py +60 -34
- {valor_lite-0.33.3.dist-info → valor_lite-0.33.5.dist-info}/METADATA +2 -1
- valor_lite-0.33.5.dist-info/RECORD +12 -0
- valor_lite-0.33.3.dist-info/RECORD +0 -12
- {valor_lite-0.33.3.dist-info → valor_lite-0.33.5.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.3.dist-info → valor_lite-0.33.5.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.3.dist-info → valor_lite-0.33.5.dist-info}/top_level.txt +0 -0
valor_lite/detection/__init__.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
from .annotation import Bitmask, BoundingBox, Detection
|
|
1
|
+
from .annotation import Bitmask, BoundingBox, Detection, Polygon
|
|
2
2
|
from .computation import (
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
compute_bbox_iou,
|
|
4
|
+
compute_bitmask_iou,
|
|
5
|
+
compute_confusion_matrix,
|
|
5
6
|
compute_metrics,
|
|
7
|
+
compute_polygon_iou,
|
|
6
8
|
compute_ranked_pairs,
|
|
7
9
|
)
|
|
8
10
|
from .manager import DataLoader, Evaluator
|
|
@@ -13,8 +15,8 @@ from .metric import (
|
|
|
13
15
|
Accuracy,
|
|
14
16
|
APAveragedOverIOUs,
|
|
15
17
|
ARAveragedOverScores,
|
|
18
|
+
ConfusionMatrix,
|
|
16
19
|
Counts,
|
|
17
|
-
DetailedCounts,
|
|
18
20
|
MetricType,
|
|
19
21
|
Precision,
|
|
20
22
|
PrecisionRecallCurve,
|
|
@@ -29,6 +31,7 @@ __all__ = [
|
|
|
29
31
|
"Bitmask",
|
|
30
32
|
"BoundingBox",
|
|
31
33
|
"Detection",
|
|
34
|
+
"Polygon",
|
|
32
35
|
"MetricType",
|
|
33
36
|
"Counts",
|
|
34
37
|
"Precision",
|
|
@@ -44,11 +47,13 @@ __all__ = [
|
|
|
44
47
|
"ARAveragedOverScores",
|
|
45
48
|
"mARAveragedOverScores",
|
|
46
49
|
"PrecisionRecallCurve",
|
|
47
|
-
"
|
|
48
|
-
"
|
|
50
|
+
"ConfusionMatrix",
|
|
51
|
+
"compute_bbox_iou",
|
|
52
|
+
"compute_bitmask_iou",
|
|
53
|
+
"compute_polygon_iou",
|
|
49
54
|
"compute_ranked_pairs",
|
|
50
55
|
"compute_metrics",
|
|
51
|
-
"
|
|
56
|
+
"compute_confusion_matrix",
|
|
52
57
|
"DataLoader",
|
|
53
58
|
"Evaluator",
|
|
54
59
|
]
|
|
@@ -2,6 +2,7 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from numpy.typing import NDArray
|
|
5
|
+
from shapely.geometry import Polygon as ShapelyPolygon
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
@dataclass
|
|
@@ -24,6 +25,37 @@ class BoundingBox:
|
|
|
24
25
|
return (self.xmin, self.xmax, self.ymin, self.ymax)
|
|
25
26
|
|
|
26
27
|
|
|
28
|
+
@dataclass
|
|
29
|
+
class Polygon:
|
|
30
|
+
shape: ShapelyPolygon
|
|
31
|
+
labels: list[tuple[str, str]]
|
|
32
|
+
scores: list[float] = field(default_factory=list)
|
|
33
|
+
|
|
34
|
+
def __post_init__(self):
|
|
35
|
+
if not isinstance(self.shape, ShapelyPolygon):
|
|
36
|
+
raise TypeError("shape must be of type shapely.geometry.Polygon.")
|
|
37
|
+
if len(self.scores) > 0 and len(self.labels) != len(self.scores):
|
|
38
|
+
raise ValueError(
|
|
39
|
+
"If scores are defined, there must be a 1:1 pairing with labels."
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def to_box(self) -> BoundingBox | None:
|
|
43
|
+
|
|
44
|
+
if self.shape.is_empty:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
xmin, ymin, xmax, ymax = self.shape.bounds
|
|
48
|
+
|
|
49
|
+
return BoundingBox(
|
|
50
|
+
xmin=xmin,
|
|
51
|
+
xmax=xmax,
|
|
52
|
+
ymin=ymin,
|
|
53
|
+
ymax=ymax,
|
|
54
|
+
labels=self.labels,
|
|
55
|
+
scores=self.scores,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
27
59
|
@dataclass
|
|
28
60
|
class Bitmask:
|
|
29
61
|
mask: NDArray[np.bool_]
|
|
@@ -55,8 +87,8 @@ class Bitmask:
|
|
|
55
87
|
@dataclass
|
|
56
88
|
class Detection:
|
|
57
89
|
uid: str
|
|
58
|
-
groundtruths: list[BoundingBox]
|
|
59
|
-
predictions: list[BoundingBox]
|
|
90
|
+
groundtruths: list[BoundingBox] | list[Bitmask] | list[Polygon]
|
|
91
|
+
predictions: list[BoundingBox] | list[Bitmask] | list[Polygon]
|
|
60
92
|
|
|
61
93
|
def __post_init__(self):
|
|
62
94
|
for prediction in self.predictions:
|
|
@@ -2,7 +2,7 @@ import numpy as np
|
|
|
2
2
|
from numpy.typing import NDArray
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def
|
|
5
|
+
def compute_bbox_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
6
6
|
"""
|
|
7
7
|
Computes intersection-over-union (IoU) for axis-aligned bounding boxes.
|
|
8
8
|
|
|
@@ -24,14 +24,12 @@ def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
|
24
24
|
Parameters
|
|
25
25
|
----------
|
|
26
26
|
data : NDArray[np.floating]
|
|
27
|
-
A sorted array of
|
|
28
|
-
label_metadata : NDArray[np.int32]
|
|
29
|
-
An array containing metadata related to labels.
|
|
27
|
+
A sorted array of bounding box pairs.
|
|
30
28
|
|
|
31
29
|
Returns
|
|
32
30
|
-------
|
|
33
31
|
NDArray[np.floating]
|
|
34
|
-
|
|
32
|
+
Computed IoU's.
|
|
35
33
|
"""
|
|
36
34
|
|
|
37
35
|
xmin1, xmax1, ymin1, ymax1 = (
|
|
@@ -69,6 +67,73 @@ def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
|
69
67
|
return iou
|
|
70
68
|
|
|
71
69
|
|
|
70
|
+
def compute_bitmask_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
|
|
71
|
+
"""
|
|
72
|
+
Computes intersection-over-union (IoU) for bitmasks.
|
|
73
|
+
|
|
74
|
+
Takes data with shape (N, 2):
|
|
75
|
+
|
|
76
|
+
Index 0 - first bitmask
|
|
77
|
+
Index 1 - second bitmask
|
|
78
|
+
|
|
79
|
+
Returns data with shape (N, 1):
|
|
80
|
+
|
|
81
|
+
Index 0 - IoU
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
data : NDArray[np.floating]
|
|
86
|
+
A sorted array of bitmask pairs.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
NDArray[np.floating]
|
|
91
|
+
Computed IoU's.
|
|
92
|
+
"""
|
|
93
|
+
intersection_ = np.array([np.logical_and(x, y).sum() for x, y in data])
|
|
94
|
+
union_ = np.array([np.logical_or(x, y).sum() for x, y in data])
|
|
95
|
+
|
|
96
|
+
return intersection_ / union_
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def compute_polygon_iou(
|
|
100
|
+
data: NDArray[np.floating],
|
|
101
|
+
) -> NDArray[np.floating]:
|
|
102
|
+
"""
|
|
103
|
+
Computes intersection-over-union (IoU) for shapely polygons.
|
|
104
|
+
|
|
105
|
+
Takes data with shape (N, 2):
|
|
106
|
+
|
|
107
|
+
Index 0 - first polygon
|
|
108
|
+
Index 1 - second polygon
|
|
109
|
+
|
|
110
|
+
Returns data with shape (N, 1):
|
|
111
|
+
|
|
112
|
+
Index 0 - IoU
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
data : NDArray[np.floating]
|
|
117
|
+
A sorted array of polygon pairs.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
NDArray[np.floating]
|
|
122
|
+
Computed IoU's.
|
|
123
|
+
"""
|
|
124
|
+
intersection_ = np.array(
|
|
125
|
+
[poly1.intersection(poly2).area for poly1, poly2 in data]
|
|
126
|
+
)
|
|
127
|
+
union_ = np.array(
|
|
128
|
+
[
|
|
129
|
+
poly1.area + poly2.area - intersection_[i]
|
|
130
|
+
for i, (poly1, poly2) in enumerate(data)
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
return intersection_ / union_
|
|
135
|
+
|
|
136
|
+
|
|
72
137
|
def _compute_ranked_pairs_for_datum(
|
|
73
138
|
data: NDArray[np.floating],
|
|
74
139
|
label_metadata: NDArray[np.int32],
|
|
@@ -133,7 +198,7 @@ def compute_ranked_pairs(
|
|
|
133
198
|
Parameters
|
|
134
199
|
----------
|
|
135
200
|
data : NDArray[np.floating]
|
|
136
|
-
A sorted array of
|
|
201
|
+
A sorted array summarizing the IOU calculations of one or more pairs.
|
|
137
202
|
label_metadata : NDArray[np.int32]
|
|
138
203
|
An array containing metadata related to labels.
|
|
139
204
|
|
|
@@ -161,10 +226,10 @@ def compute_ranked_pairs(
|
|
|
161
226
|
|
|
162
227
|
|
|
163
228
|
def compute_metrics(
|
|
164
|
-
data: np.
|
|
165
|
-
label_metadata: np.
|
|
166
|
-
iou_thresholds: np.
|
|
167
|
-
score_thresholds: np.
|
|
229
|
+
data: NDArray[np.floating],
|
|
230
|
+
label_metadata: NDArray[np.int32],
|
|
231
|
+
iou_thresholds: NDArray[np.floating],
|
|
232
|
+
score_thresholds: NDArray[np.floating],
|
|
168
233
|
) -> tuple[
|
|
169
234
|
tuple[
|
|
170
235
|
NDArray[np.floating],
|
|
@@ -197,7 +262,7 @@ def compute_metrics(
|
|
|
197
262
|
Parameters
|
|
198
263
|
----------
|
|
199
264
|
data : NDArray[np.floating]
|
|
200
|
-
A sorted array of
|
|
265
|
+
A sorted array summarizing the IOU calculations of one or more pairs.
|
|
201
266
|
label_metadata : NDArray[np.int32]
|
|
202
267
|
An array containing metadata related to labels.
|
|
203
268
|
iou_thresholds : NDArray[np.floating]
|
|
@@ -427,13 +492,52 @@ def compute_metrics(
|
|
|
427
492
|
)
|
|
428
493
|
|
|
429
494
|
|
|
430
|
-
def
|
|
495
|
+
def _count_with_examples(
|
|
496
|
+
data: NDArray[np.floating],
|
|
497
|
+
unique_idx: int | list[int],
|
|
498
|
+
label_idx: int | list[int],
|
|
499
|
+
) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
|
|
500
|
+
"""
|
|
501
|
+
Helper function for counting occurences of unique detailed pairs.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
data : NDArray[np.floating]
|
|
506
|
+
A masked portion of a detailed pairs array.
|
|
507
|
+
unique_idx : int | list[int]
|
|
508
|
+
The index or indices upon which uniqueness is constrained.
|
|
509
|
+
label_idx : int | list[int]
|
|
510
|
+
The index or indices within the unique index or indices that encode labels.
|
|
511
|
+
|
|
512
|
+
Returns
|
|
513
|
+
-------
|
|
514
|
+
NDArray[np.floating]
|
|
515
|
+
Examples drawn from the data input.
|
|
516
|
+
NDArray[np.int32]
|
|
517
|
+
Unique label indices.
|
|
518
|
+
NDArray[np.int32]
|
|
519
|
+
Counts for each unique label index.
|
|
520
|
+
"""
|
|
521
|
+
unique_rows, indices = np.unique(
|
|
522
|
+
data.astype(int)[:, unique_idx],
|
|
523
|
+
return_index=True,
|
|
524
|
+
axis=0,
|
|
525
|
+
)
|
|
526
|
+
examples = data[indices]
|
|
527
|
+
labels, counts = np.unique(
|
|
528
|
+
unique_rows[:, label_idx], return_counts=True, axis=0
|
|
529
|
+
)
|
|
530
|
+
return examples, labels, counts
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def compute_confusion_matrix(
|
|
431
534
|
data: NDArray[np.floating],
|
|
432
535
|
label_metadata: NDArray[np.int32],
|
|
433
536
|
iou_thresholds: NDArray[np.floating],
|
|
434
537
|
score_thresholds: NDArray[np.floating],
|
|
435
|
-
|
|
436
|
-
) -> NDArray[np.int32]:
|
|
538
|
+
n_examples: int,
|
|
539
|
+
) -> tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.int32]]:
|
|
540
|
+
|
|
437
541
|
"""
|
|
438
542
|
Compute detailed counts.
|
|
439
543
|
|
|
@@ -447,51 +551,47 @@ def compute_detailed_counts(
|
|
|
447
551
|
Index 5 - Prediction Label Index
|
|
448
552
|
Index 6 - Score
|
|
449
553
|
|
|
450
|
-
Outputs an array with shape (N_IoUs, N_Score, N_Labels, 5 * n_samples + 5):
|
|
451
|
-
|
|
452
|
-
Index 0 - True Positive Count
|
|
453
|
-
... Datum ID Examples
|
|
454
|
-
Index 2 * n_samples + 1 - False Positive Misclassification Count
|
|
455
|
-
... Datum ID Examples
|
|
456
|
-
Index 4 * n_samples + 2 - False Positive Hallucination Count
|
|
457
|
-
... Datum ID Examples
|
|
458
|
-
Index 6 * n_samples + 3 - False Negative Misclassification Count
|
|
459
|
-
... Datum ID Examples
|
|
460
|
-
Index 8 * n_samples + 4 - False Negative Missing Prediction Count
|
|
461
|
-
... Datum ID Examples
|
|
462
|
-
|
|
463
554
|
Parameters
|
|
464
555
|
----------
|
|
465
556
|
data : NDArray[np.floating]
|
|
466
|
-
A sorted array of
|
|
557
|
+
A sorted array summarizing the IOU calculations of one or more pairs.
|
|
467
558
|
label_metadata : NDArray[np.int32]
|
|
468
559
|
An array containing metadata related to labels.
|
|
469
560
|
iou_thresholds : NDArray[np.floating]
|
|
470
561
|
A 1-D array containing IoU thresholds.
|
|
471
562
|
score_thresholds : NDArray[np.floating]
|
|
472
563
|
A 1-D array containing score thresholds.
|
|
473
|
-
|
|
474
|
-
The number of examples to return per count.
|
|
564
|
+
n_examples : int
|
|
565
|
+
The maximum number of examples to return per count.
|
|
475
566
|
|
|
476
567
|
Returns
|
|
477
568
|
-------
|
|
569
|
+
NDArray[np.floating]
|
|
570
|
+
Confusion matrix.
|
|
571
|
+
NDArray[np.floating]
|
|
572
|
+
Hallucinations.
|
|
478
573
|
NDArray[np.int32]
|
|
479
|
-
|
|
574
|
+
Missing Predictions.
|
|
480
575
|
"""
|
|
481
576
|
|
|
482
577
|
n_labels = label_metadata.shape[0]
|
|
483
578
|
n_ious = iou_thresholds.shape[0]
|
|
484
579
|
n_scores = score_thresholds.shape[0]
|
|
485
|
-
n_metrics = 5 * (2 * n_samples + 1)
|
|
486
|
-
|
|
487
|
-
tp_idx = 0
|
|
488
|
-
fp_misclf_idx = 2 * n_samples + 1
|
|
489
|
-
fp_halluc_idx = 4 * n_samples + 2
|
|
490
|
-
fn_misclf_idx = 6 * n_samples + 3
|
|
491
|
-
fn_misprd_idx = 8 * n_samples + 4
|
|
492
580
|
|
|
493
|
-
|
|
494
|
-
(
|
|
581
|
+
confusion_matrix = -1 * np.ones(
|
|
582
|
+
# (datum idx, gt idx, pd idx, pd score) * n_examples + count
|
|
583
|
+
(n_ious, n_scores, n_labels, n_labels, 4 * n_examples + 1),
|
|
584
|
+
dtype=np.float32,
|
|
585
|
+
)
|
|
586
|
+
hallucinations = -1 * np.ones(
|
|
587
|
+
# (datum idx, pd idx, pd score) * n_examples + count
|
|
588
|
+
(n_ious, n_scores, n_labels, 3 * n_examples + 1),
|
|
589
|
+
dtype=np.float32,
|
|
590
|
+
)
|
|
591
|
+
missing_predictions = -1 * np.ones(
|
|
592
|
+
# (datum idx, gt idx) * n_examples + count
|
|
593
|
+
(n_ious, n_scores, n_labels, 2 * n_examples + 1),
|
|
594
|
+
dtype=np.int32,
|
|
495
595
|
)
|
|
496
596
|
|
|
497
597
|
mask_gt_exists = data[:, 1] > -0.5
|
|
@@ -557,9 +657,9 @@ def compute_detailed_counts(
|
|
|
557
657
|
~mask_groundtruths_with_passing_score & mask_gt_exists
|
|
558
658
|
)
|
|
559
659
|
|
|
660
|
+
# create category masks
|
|
560
661
|
mask_tp = mask_score & mask_iou & mask_gt_pd_match
|
|
561
|
-
|
|
562
|
-
mask_fn_misclf = mask_iou & (
|
|
662
|
+
mask_misclf = mask_iou & (
|
|
563
663
|
(
|
|
564
664
|
~mask_score
|
|
565
665
|
& mask_gt_pd_match
|
|
@@ -567,143 +667,164 @@ def compute_detailed_counts(
|
|
|
567
667
|
)
|
|
568
668
|
| (mask_score & mask_gt_pd_mismatch)
|
|
569
669
|
)
|
|
570
|
-
|
|
571
|
-
|
|
670
|
+
mask_halluc = mask_score & mask_predictions_without_passing_ious
|
|
671
|
+
mask_misprd = (
|
|
572
672
|
mask_groundtruths_without_passing_ious
|
|
573
673
|
| mask_groundtruths_without_passing_score
|
|
574
674
|
)
|
|
575
675
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
mask_fp_misclf_is_tp = (
|
|
584
|
-
(fp_misclf.reshape(-1, 1, 3) == tp_pds.reshape(1, -1, 3))
|
|
676
|
+
# filter out true-positives from misclf and misprd
|
|
677
|
+
mask_gts_with_tp_override = (
|
|
678
|
+
(
|
|
679
|
+
data[mask_misclf][:, [0, 1]].reshape(-1, 1, 2)
|
|
680
|
+
== data[mask_tp][:, [0, 1]].reshape(1, -1, 2)
|
|
681
|
+
)
|
|
585
682
|
.all(axis=2)
|
|
586
683
|
.any(axis=1)
|
|
587
684
|
)
|
|
588
|
-
|
|
589
|
-
(
|
|
685
|
+
mask_pds_with_tp_override = (
|
|
686
|
+
(
|
|
687
|
+
data[mask_misclf][:, [0, 2]].reshape(-1, 1, 2)
|
|
688
|
+
== data[mask_tp][:, [0, 2]].reshape(1, -1, 2)
|
|
689
|
+
)
|
|
590
690
|
.all(axis=2)
|
|
591
691
|
.any(axis=1)
|
|
592
692
|
)
|
|
693
|
+
mask_misprd[mask_misclf] |= (
|
|
694
|
+
~mask_gts_with_tp_override & mask_pds_with_tp_override
|
|
695
|
+
)
|
|
696
|
+
mask_misclf[mask_misclf] &= (
|
|
697
|
+
~mask_gts_with_tp_override & ~mask_pds_with_tp_override
|
|
698
|
+
)
|
|
593
699
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
tp_count = np.bincount(tp[:, 2].astype(int), minlength=n_labels)
|
|
601
|
-
fp_misclf_count = np.bincount(
|
|
602
|
-
fp_misclf[:, 2].astype(int), minlength=n_labels
|
|
700
|
+
# count true positives
|
|
701
|
+
tp_examples, tp_labels, tp_counts = _count_with_examples(
|
|
702
|
+
data[mask_tp],
|
|
703
|
+
unique_idx=[0, 2, 5],
|
|
704
|
+
label_idx=2,
|
|
603
705
|
)
|
|
604
|
-
|
|
605
|
-
|
|
706
|
+
|
|
707
|
+
# count misclassifications
|
|
708
|
+
(
|
|
709
|
+
misclf_examples,
|
|
710
|
+
misclf_labels,
|
|
711
|
+
misclf_counts,
|
|
712
|
+
) = _count_with_examples(
|
|
713
|
+
data[mask_misclf], unique_idx=[0, 1, 2, 4, 5], label_idx=[3, 4]
|
|
606
714
|
)
|
|
607
|
-
|
|
608
|
-
|
|
715
|
+
|
|
716
|
+
# count hallucinations
|
|
717
|
+
(
|
|
718
|
+
halluc_examples,
|
|
719
|
+
halluc_labels,
|
|
720
|
+
halluc_counts,
|
|
721
|
+
) = _count_with_examples(
|
|
722
|
+
data[mask_halluc], unique_idx=[0, 2, 5], label_idx=2
|
|
609
723
|
)
|
|
610
|
-
|
|
611
|
-
|
|
724
|
+
|
|
725
|
+
# count missing predictions
|
|
726
|
+
(
|
|
727
|
+
misprd_examples,
|
|
728
|
+
misprd_labels,
|
|
729
|
+
misprd_counts,
|
|
730
|
+
) = _count_with_examples(
|
|
731
|
+
data[mask_misprd], unique_idx=[0, 1, 4], label_idx=2
|
|
612
732
|
)
|
|
613
733
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
iou_idx, score_idx,
|
|
617
|
-
] =
|
|
618
|
-
|
|
619
|
-
iou_idx,
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
734
|
+
# store the counts
|
|
735
|
+
confusion_matrix[
|
|
736
|
+
iou_idx, score_idx, tp_labels, tp_labels, 0
|
|
737
|
+
] = tp_counts
|
|
738
|
+
confusion_matrix[
|
|
739
|
+
iou_idx,
|
|
740
|
+
score_idx,
|
|
741
|
+
misclf_labels[:, 0],
|
|
742
|
+
misclf_labels[:, 1],
|
|
743
|
+
0,
|
|
744
|
+
] = misclf_counts
|
|
745
|
+
hallucinations[
|
|
746
|
+
iou_idx,
|
|
747
|
+
score_idx,
|
|
748
|
+
halluc_labels,
|
|
749
|
+
0,
|
|
750
|
+
] = halluc_counts
|
|
751
|
+
missing_predictions[
|
|
752
|
+
iou_idx,
|
|
753
|
+
score_idx,
|
|
754
|
+
misprd_labels,
|
|
755
|
+
0,
|
|
756
|
+
] = misprd_counts
|
|
757
|
+
|
|
758
|
+
# store examples
|
|
759
|
+
if n_examples > 0:
|
|
629
760
|
for label_idx in range(n_labels):
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
)
|
|
637
|
-
fp_misclf_examples = (
|
|
638
|
-
fp_misclf[fp_misclf[:, 2].astype(int) == label_idx][
|
|
639
|
-
:n_samples, [0, 1]
|
|
640
|
-
]
|
|
641
|
-
.astype(int)
|
|
642
|
-
.flatten()
|
|
643
|
-
)
|
|
644
|
-
fp_halluc_examples = (
|
|
645
|
-
fp_halluc[fp_halluc[:, 2].astype(int) == label_idx][
|
|
646
|
-
:n_samples, [0, 1]
|
|
647
|
-
]
|
|
648
|
-
.astype(int)
|
|
649
|
-
.flatten()
|
|
650
|
-
)
|
|
651
|
-
fn_misclf_examples = (
|
|
652
|
-
fn_misclf[fn_misclf[:, 2].astype(int) == label_idx][
|
|
653
|
-
:n_samples, [0, 1]
|
|
654
|
-
]
|
|
655
|
-
.astype(int)
|
|
656
|
-
.flatten()
|
|
657
|
-
)
|
|
658
|
-
fn_misprd_examples = (
|
|
659
|
-
fn_misprd[fn_misprd[:, 2].astype(int) == label_idx][
|
|
660
|
-
:n_samples, [0, 1]
|
|
761
|
+
|
|
762
|
+
# true-positive examples
|
|
763
|
+
mask_tp_label = tp_examples[:, 5] == label_idx
|
|
764
|
+
if mask_tp_label.sum() > 0:
|
|
765
|
+
tp_label_examples = tp_examples[mask_tp_label][
|
|
766
|
+
:n_examples
|
|
661
767
|
]
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
768
|
+
confusion_matrix[
|
|
769
|
+
iou_idx,
|
|
770
|
+
score_idx,
|
|
771
|
+
label_idx,
|
|
772
|
+
label_idx,
|
|
773
|
+
1 : 4 * tp_label_examples.shape[0] + 1,
|
|
774
|
+
] = tp_label_examples[:, [0, 1, 2, 6]].flatten()
|
|
775
|
+
|
|
776
|
+
# misclassification examples
|
|
777
|
+
mask_misclf_gt_label = misclf_examples[:, 4] == label_idx
|
|
778
|
+
if mask_misclf_gt_label.sum() > 0:
|
|
779
|
+
for pd_label_idx in range(n_labels):
|
|
780
|
+
mask_misclf_pd_label = (
|
|
781
|
+
misclf_examples[:, 5] == pd_label_idx
|
|
782
|
+
)
|
|
783
|
+
mask_misclf_label_combo = (
|
|
784
|
+
mask_misclf_gt_label & mask_misclf_pd_label
|
|
785
|
+
)
|
|
786
|
+
if mask_misclf_label_combo.sum() > 0:
|
|
787
|
+
misclf_label_examples = misclf_examples[
|
|
788
|
+
mask_misclf_label_combo
|
|
789
|
+
][:n_examples]
|
|
790
|
+
confusion_matrix[
|
|
791
|
+
iou_idx,
|
|
792
|
+
score_idx,
|
|
793
|
+
label_idx,
|
|
794
|
+
pd_label_idx,
|
|
795
|
+
1 : 4 * misclf_label_examples.shape[0] + 1,
|
|
796
|
+
] = misclf_label_examples[
|
|
797
|
+
:, [0, 1, 2, 6]
|
|
798
|
+
].flatten()
|
|
799
|
+
|
|
800
|
+
# hallucination examples
|
|
801
|
+
mask_halluc_label = halluc_examples[:, 5] == label_idx
|
|
802
|
+
if mask_halluc_label.sum() > 0:
|
|
803
|
+
halluc_label_examples = halluc_examples[
|
|
804
|
+
mask_halluc_label
|
|
805
|
+
][:n_examples]
|
|
806
|
+
hallucinations[
|
|
807
|
+
iou_idx,
|
|
808
|
+
score_idx,
|
|
809
|
+
label_idx,
|
|
810
|
+
1 : 3 * halluc_label_examples.shape[0] + 1,
|
|
811
|
+
] = halluc_label_examples[:, [0, 2, 6]].flatten()
|
|
812
|
+
|
|
813
|
+
# missing prediction examples
|
|
814
|
+
mask_misprd_label = misprd_examples[:, 4] == label_idx
|
|
815
|
+
if misprd_examples.size > 0:
|
|
816
|
+
misprd_label_examples = misprd_examples[
|
|
817
|
+
mask_misprd_label
|
|
818
|
+
][:n_examples]
|
|
819
|
+
missing_predictions[
|
|
820
|
+
iou_idx,
|
|
821
|
+
score_idx,
|
|
822
|
+
label_idx,
|
|
823
|
+
1 : 2 * misprd_label_examples.shape[0] + 1,
|
|
824
|
+
] = misprd_label_examples[:, [0, 1]].flatten()
|
|
825
|
+
|
|
826
|
+
return (
|
|
827
|
+
confusion_matrix,
|
|
828
|
+
hallucinations,
|
|
829
|
+
missing_predictions,
|
|
830
|
+
)
|