valor-lite 0.33.3__py3-none-any.whl → 0.33.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,12 +3,20 @@ from dataclasses import dataclass
3
3
 
4
4
  import numpy as np
5
5
  from numpy.typing import NDArray
6
+ from shapely.geometry import Polygon as ShapelyPolygon
6
7
  from tqdm import tqdm
7
- from valor_lite.detection.annotation import Detection
8
+ from valor_lite.detection.annotation import (
9
+ Bitmask,
10
+ BoundingBox,
11
+ Detection,
12
+ Polygon,
13
+ )
8
14
  from valor_lite.detection.computation import (
9
- compute_detailed_counts,
10
- compute_iou,
15
+ compute_bbox_iou,
16
+ compute_bitmask_iou,
17
+ compute_confusion_matrix,
11
18
  compute_metrics,
19
+ compute_polygon_iou,
12
20
  compute_ranked_pairs,
13
21
  )
14
22
  from valor_lite.detection.metric import (
@@ -18,8 +26,8 @@ from valor_lite.detection.metric import (
18
26
  Accuracy,
19
27
  APAveragedOverIOUs,
20
28
  ARAveragedOverScores,
29
+ ConfusionMatrix,
21
30
  Counts,
22
- DetailedCounts,
23
31
  MetricType,
24
32
  Precision,
25
33
  PrecisionRecallCurve,
@@ -35,7 +43,7 @@ Usage
35
43
  -----
36
44
 
37
45
  loader = DataLoader()
38
- loader.add_data(
46
+ loader.add_bounding_boxes(
39
47
  groundtruths=groundtruths,
40
48
  predictions=predictions,
41
49
  )
@@ -51,9 +59,107 @@ filtered_metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_mask=filter_m
51
59
  """
52
60
 
53
61
 
62
+ def _get_valor_dict_annotation_key(
63
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
64
+ ) -> str:
65
+ """Get the correct JSON key to extract a given annotation type."""
66
+
67
+ if issubclass(annotation_type, BoundingBox):
68
+ return "bounding_box"
69
+ if issubclass(annotation_type, Polygon):
70
+ return "polygon"
71
+ else:
72
+ return "raster"
73
+
74
+
75
+ def _get_annotation_representation(
76
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
77
+ ) -> str:
78
+ """Get the correct representation of an annotation object."""
79
+
80
+ representation = (
81
+ "extrema"
82
+ if issubclass(annotation_type, BoundingBox)
83
+ else ("mask" if issubclass(annotation_type, Bitmask) else "shape")
84
+ )
85
+
86
+ return representation
87
+
88
+
89
+ def _get_annotation_representation_from_valor_dict(
90
+ data: list,
91
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
92
+ ) -> tuple[float, float, float, float] | ShapelyPolygon | NDArray[np.bool_]:
93
+ """Get the correct representation of an annotation object from a valor dictionary."""
94
+
95
+ if issubclass(annotation_type, BoundingBox):
96
+ x = [point[0] for shape in data for point in shape]
97
+ y = [point[1] for shape in data for point in shape]
98
+ return (min(x), max(x), min(y), max(y))
99
+ if issubclass(annotation_type, Polygon):
100
+ return ShapelyPolygon(data)
101
+ else:
102
+ return np.array(data)
103
+
104
+
105
+ def _get_annotation_data(
106
+ keyed_groundtruths: dict,
107
+ keyed_predictions: dict,
108
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask] | None,
109
+ key=int,
110
+ ) -> np.ndarray:
111
+ """Create an array of annotation pairs for use when calculating IOU. Needed because we unpack bounding box representations, but not bitmask or polygon representations."""
112
+ if annotation_type == BoundingBox:
113
+ return np.array(
114
+ [
115
+ np.array([*gextrema, *pextrema])
116
+ for _, _, _, pextrema in keyed_predictions[key]
117
+ for _, _, gextrema in keyed_groundtruths[key]
118
+ ]
119
+ )
120
+ else:
121
+ return np.array(
122
+ [
123
+ np.array([groundtruth_obj, prediction_obj])
124
+ for _, _, _, prediction_obj in keyed_predictions[key]
125
+ for _, _, groundtruth_obj in keyed_groundtruths[key]
126
+ ]
127
+ )
128
+
129
+
130
+ def compute_iou(
131
+ data: NDArray[np.floating],
132
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
133
+ ) -> NDArray[np.floating]:
134
+ """
135
+ Computes intersection-over-union (IoU) calculations for various annotation types.
136
+
137
+ Parameters
138
+ ----------
139
+ data : NDArray[np.floating]
140
+ A sorted array of bounding box, bitmask, or polygon pairs.
141
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask]
142
+ The type of annotation contained in the data.
143
+
144
+
145
+ Returns
146
+ -------
147
+ NDArray[np.floating]
148
+ Computed IoU's.
149
+ """
150
+
151
+ if annotation_type == BoundingBox:
152
+ return compute_bbox_iou(data=data)
153
+ elif annotation_type == Bitmask:
154
+ return compute_bitmask_iou(data=data)
155
+ else:
156
+ return compute_polygon_iou(data=data)
157
+
158
+
54
159
  @dataclass
55
160
  class Filter:
56
- indices: NDArray[np.int32]
161
+ ranked_indices: NDArray[np.int32]
162
+ detailed_indices: NDArray[np.int32]
57
163
  label_metadata: NDArray[np.int32]
58
164
 
59
165
 
@@ -152,12 +258,14 @@ class Evaluator:
152
258
  Filter
153
259
  A filter object that can be passed to the `evaluate` method.
154
260
  """
155
- n_rows = self._ranked_pairs.shape[0]
156
261
 
157
262
  n_datums = self._label_metadata_per_datum.shape[1]
158
263
  n_labels = self._label_metadata_per_datum.shape[2]
159
264
 
160
- mask_pairs = np.ones((n_rows, 1), dtype=np.bool_)
265
+ mask_ranked = np.ones((self._ranked_pairs.shape[0], 1), dtype=np.bool_)
266
+ mask_detailed = np.ones(
267
+ (self._detailed_pairs.shape[0], 1), dtype=np.bool_
268
+ )
161
269
  mask_datums = np.ones(n_datums, dtype=np.bool_)
162
270
  mask_labels = np.ones(n_labels, dtype=np.bool_)
163
271
 
@@ -167,50 +275,50 @@ class Evaluator:
167
275
  [self.uid_to_index[uid] for uid in datum_uids],
168
276
  dtype=np.int32,
169
277
  )
170
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
171
- mask[
172
- np.isin(self._ranked_pairs[:, 0].astype(int), datum_uids)
173
- ] = True
174
- mask_pairs &= mask
175
-
176
- mask = np.zeros_like(mask_datums, dtype=np.bool_)
177
- mask[datum_uids] = True
178
- mask_datums &= mask
278
+ mask_ranked[
279
+ ~np.isin(self._ranked_pairs[:, 0].astype(int), datum_uids)
280
+ ] = False
281
+ mask_detailed[
282
+ ~np.isin(self._detailed_pairs[:, 0].astype(int), datum_uids)
283
+ ] = False
284
+ mask_datums[~np.isin(np.arange(n_datums), datum_uids)] = False
179
285
 
180
286
  if labels is not None:
181
287
  if isinstance(labels, list):
182
288
  labels = np.array(
183
289
  [self.label_to_index[label] for label in labels]
184
290
  )
185
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
186
- mask[np.isin(self._ranked_pairs[:, 4].astype(int), labels)] = True
187
- mask_pairs &= mask
188
-
189
- mask = np.zeros_like(mask_labels, dtype=np.bool_)
190
- mask[labels] = True
191
- mask_labels &= mask
291
+ mask_ranked[
292
+ ~np.isin(self._ranked_pairs[:, 4].astype(int), labels)
293
+ ] = False
294
+ mask_detailed[
295
+ ~np.isin(self._detailed_pairs[:, 4].astype(int), labels)
296
+ ] = False
297
+ mask_labels[~np.isin(np.arange(n_labels), labels)] = False
192
298
 
193
299
  if label_keys is not None:
194
300
  if isinstance(label_keys, list):
195
301
  label_keys = np.array(
196
302
  [self.label_key_to_index[key] for key in label_keys]
197
303
  )
198
- label_indices = np.where(
199
- np.isclose(self._label_metadata[:, 2], label_keys)
200
- )[0]
201
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
202
- mask[
203
- np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
204
- ] = True
205
- mask_pairs &= mask
206
-
207
- mask = np.zeros_like(mask_labels, dtype=np.bool_)
208
- mask[label_indices] = True
209
- mask_labels &= mask
210
-
211
- mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
304
+ label_indices = (
305
+ np.where(np.isclose(self._label_metadata[:, 2], label_keys))[0]
306
+ if label_keys.size > 0
307
+ else np.array([])
308
+ )
309
+ mask_ranked[
310
+ ~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
311
+ ] = False
312
+ mask_detailed[
313
+ ~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
314
+ ] = False
315
+ mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
316
+
317
+ mask_label_metadata = (
318
+ mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
319
+ )
212
320
  label_metadata_per_datum = self._label_metadata_per_datum.copy()
213
- label_metadata_per_datum[:, ~mask] = 0
321
+ label_metadata_per_datum[:, ~mask_label_metadata] = 0
214
322
 
215
323
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
216
324
  label_metadata[:, :2] = np.transpose(
@@ -222,7 +330,8 @@ class Evaluator:
222
330
  label_metadata[:, 2] = self._label_metadata[:, 2]
223
331
 
224
332
  return Filter(
225
- indices=np.where(mask_pairs)[0],
333
+ ranked_indices=np.where(mask_ranked)[0],
334
+ detailed_indices=np.where(mask_detailed)[0],
226
335
  label_metadata=label_metadata,
227
336
  )
228
337
 
@@ -246,7 +355,7 @@ class Evaluator:
246
355
  score_thresholds : list[float]
247
356
  A list of score thresholds to compute metrics over.
248
357
  number_of_examples : int, default=0
249
- Number of annotation examples to return in DetailedCounts.
358
+ Maximum number of annotation examples to return in ConfusionMatrix.
250
359
  filter_ : Filter, optional
251
360
  An optional filter object.
252
361
 
@@ -256,10 +365,12 @@ class Evaluator:
256
365
  A dictionary mapping MetricType enumerations to lists of computed metrics.
257
366
  """
258
367
 
259
- data = self._ranked_pairs
368
+ ranked_pairs = self._ranked_pairs
369
+ detailed_pairs = self._detailed_pairs
260
370
  label_metadata = self._label_metadata
261
371
  if filter_ is not None:
262
- data = data[filter_.indices]
372
+ ranked_pairs = ranked_pairs[filter_.ranked_indices]
373
+ detailed_pairs = detailed_pairs[filter_.detailed_indices]
263
374
  label_metadata = filter_.label_metadata
264
375
 
265
376
  (
@@ -278,7 +389,7 @@ class Evaluator:
278
389
  precision_recall,
279
390
  pr_curves,
280
391
  ) = compute_metrics(
281
- data=data,
392
+ data=ranked_pairs,
282
393
  label_metadata=label_metadata,
283
394
  iou_thresholds=np.array(iou_thresholds),
284
395
  score_thresholds=np.array(score_thresholds),
@@ -294,7 +405,7 @@ class Evaluator:
294
405
  )
295
406
  for iou_idx in range(average_precision.shape[0])
296
407
  for label_idx in range(average_precision.shape[1])
297
- if int(label_metadata[label_idx][0]) > 0
408
+ if int(label_metadata[label_idx, 0]) > 0
298
409
  ]
299
410
 
300
411
  metrics[MetricType.mAP] = [
@@ -314,7 +425,7 @@ class Evaluator:
314
425
  label=self.index_to_label[label_idx],
315
426
  )
316
427
  for label_idx in range(self.n_labels)
317
- if int(label_metadata[label_idx][0]) > 0
428
+ if int(label_metadata[label_idx, 0]) > 0
318
429
  ]
319
430
 
320
431
  metrics[MetricType.mAPAveragedOverIOUs] = [
@@ -337,7 +448,7 @@ class Evaluator:
337
448
  )
338
449
  for score_idx in range(average_recall.shape[0])
339
450
  for label_idx in range(average_recall.shape[1])
340
- if int(label_metadata[label_idx][0]) > 0
451
+ if int(label_metadata[label_idx, 0]) > 0
341
452
  ]
342
453
 
343
454
  metrics[MetricType.mAR] = [
@@ -359,7 +470,7 @@ class Evaluator:
359
470
  label=self.index_to_label[label_idx],
360
471
  )
361
472
  for label_idx in range(self.n_labels)
362
- if int(label_metadata[label_idx][0]) > 0
473
+ if int(label_metadata[label_idx, 0]) > 0
363
474
  ]
364
475
 
365
476
  metrics[MetricType.mARAveragedOverScores] = [
@@ -382,16 +493,17 @@ class Evaluator:
382
493
  )
383
494
  for iou_idx, iou_threshold in enumerate(iou_thresholds)
384
495
  for label_idx, label in self.index_to_label.items()
385
- if int(label_metadata[label_idx][0]) > 0
496
+ if int(label_metadata[label_idx, 0]) > 0
386
497
  ]
387
498
 
388
499
  for label_idx, label in self.index_to_label.items():
500
+
501
+ if label_metadata[label_idx, 0] == 0:
502
+ continue
503
+
389
504
  for score_idx, score_threshold in enumerate(score_thresholds):
390
505
  for iou_idx, iou_threshold in enumerate(iou_thresholds):
391
506
 
392
- if label_metadata[label_idx, 0] == 0:
393
- continue
394
-
395
507
  row = precision_recall[iou_idx][score_idx][label_idx]
396
508
  kwargs = {
397
509
  "label": label,
@@ -432,11 +544,15 @@ class Evaluator:
432
544
  )
433
545
  )
434
546
 
435
- if MetricType.DetailedCounts in metrics_to_return:
436
- metrics[MetricType.DetailedCounts] = self._compute_detailed_counts(
547
+ if MetricType.ConfusionMatrix in metrics_to_return:
548
+ metrics[
549
+ MetricType.ConfusionMatrix
550
+ ] = self._compute_confusion_matrix(
551
+ data=detailed_pairs,
552
+ label_metadata=label_metadata,
437
553
  iou_thresholds=iou_thresholds,
438
554
  score_thresholds=score_thresholds,
439
- n_samples=number_of_examples,
555
+ number_of_examples=number_of_examples,
440
556
  )
441
557
 
442
558
  for metric in set(metrics.keys()):
@@ -445,14 +561,281 @@ class Evaluator:
445
561
 
446
562
  return metrics
447
563
 
448
- def _compute_detailed_counts(
564
+ def _unpack_confusion_matrix(
565
+ self,
566
+ confusion_matrix: NDArray[np.floating],
567
+ label_key_idx: int,
568
+ number_of_labels: int,
569
+ number_of_examples: int,
570
+ ) -> dict[
571
+ str,
572
+ dict[
573
+ str,
574
+ dict[
575
+ str,
576
+ int
577
+ | list[
578
+ dict[
579
+ str,
580
+ str | float | tuple[float, float, float, float],
581
+ ]
582
+ ],
583
+ ],
584
+ ],
585
+ ]:
586
+ """
587
+ Unpacks a numpy array of confusion matrix counts and examples.
588
+ """
589
+
590
+ datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
591
+ confusion_matrix[
592
+ gt_label_idx,
593
+ pd_label_idx,
594
+ example_idx * 4 + 1,
595
+ ]
596
+ )
597
+
598
+ groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
599
+ confusion_matrix[
600
+ gt_label_idx,
601
+ pd_label_idx,
602
+ example_idx * 4 + 2,
603
+ ]
604
+ )
605
+
606
+ prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
607
+ confusion_matrix[
608
+ gt_label_idx,
609
+ pd_label_idx,
610
+ example_idx * 4 + 3,
611
+ ]
612
+ )
613
+
614
+ score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
615
+ confusion_matrix[
616
+ gt_label_idx,
617
+ pd_label_idx,
618
+ example_idx * 4 + 4,
619
+ ]
620
+ )
621
+
622
+ return {
623
+ self.index_to_label[gt_label_idx][1]: {
624
+ self.index_to_label[pd_label_idx][1]: {
625
+ "count": max(
626
+ int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
627
+ 0,
628
+ ),
629
+ "examples": [
630
+ {
631
+ "datum": self.index_to_uid[
632
+ datum_idx(
633
+ gt_label_idx, pd_label_idx, example_idx
634
+ )
635
+ ],
636
+ "groundtruth": tuple(
637
+ self.groundtruth_examples[
638
+ datum_idx(
639
+ gt_label_idx,
640
+ pd_label_idx,
641
+ example_idx,
642
+ )
643
+ ][
644
+ groundtruth_idx(
645
+ gt_label_idx,
646
+ pd_label_idx,
647
+ example_idx,
648
+ )
649
+ ].tolist()
650
+ ),
651
+ "prediction": tuple(
652
+ self.prediction_examples[
653
+ datum_idx(
654
+ gt_label_idx,
655
+ pd_label_idx,
656
+ example_idx,
657
+ )
658
+ ][
659
+ prediction_idx(
660
+ gt_label_idx,
661
+ pd_label_idx,
662
+ example_idx,
663
+ )
664
+ ].tolist()
665
+ ),
666
+ "score": score_idx(
667
+ gt_label_idx, pd_label_idx, example_idx
668
+ ),
669
+ }
670
+ for example_idx in range(number_of_examples)
671
+ if datum_idx(gt_label_idx, pd_label_idx, example_idx)
672
+ >= 0
673
+ ],
674
+ }
675
+ for pd_label_idx in range(number_of_labels)
676
+ if (
677
+ self.label_index_to_label_key_index[pd_label_idx]
678
+ == label_key_idx
679
+ )
680
+ }
681
+ for gt_label_idx in range(number_of_labels)
682
+ if (
683
+ self.label_index_to_label_key_index[gt_label_idx]
684
+ == label_key_idx
685
+ )
686
+ }
687
+
688
+ def _unpack_hallucinations(
689
+ self,
690
+ hallucinations: NDArray[np.floating],
691
+ label_key_idx: int,
692
+ number_of_labels: int,
693
+ number_of_examples: int,
694
+ ) -> dict[
695
+ str,
696
+ dict[
697
+ str,
698
+ int
699
+ | list[dict[str, str | float | tuple[float, float, float, float]]],
700
+ ],
701
+ ]:
702
+ """
703
+ Unpacks a numpy array of hallucination counts and examples.
704
+ """
705
+
706
+ datum_idx = (
707
+ lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
708
+ hallucinations[
709
+ pd_label_idx,
710
+ example_idx * 3 + 1,
711
+ ]
712
+ )
713
+ )
714
+
715
+ prediction_idx = (
716
+ lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
717
+ hallucinations[
718
+ pd_label_idx,
719
+ example_idx * 3 + 2,
720
+ ]
721
+ )
722
+ )
723
+
724
+ score_idx = (
725
+ lambda pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
726
+ hallucinations[
727
+ pd_label_idx,
728
+ example_idx * 3 + 3,
729
+ ]
730
+ )
731
+ )
732
+
733
+ return {
734
+ self.index_to_label[pd_label_idx][1]: {
735
+ "count": max(
736
+ int(hallucinations[pd_label_idx, 0]),
737
+ 0,
738
+ ),
739
+ "examples": [
740
+ {
741
+ "datum": self.index_to_uid[
742
+ datum_idx(pd_label_idx, example_idx)
743
+ ],
744
+ "prediction": tuple(
745
+ self.prediction_examples[
746
+ datum_idx(pd_label_idx, example_idx)
747
+ ][
748
+ prediction_idx(pd_label_idx, example_idx)
749
+ ].tolist()
750
+ ),
751
+ "score": score_idx(pd_label_idx, example_idx),
752
+ }
753
+ for example_idx in range(number_of_examples)
754
+ if datum_idx(pd_label_idx, example_idx) >= 0
755
+ ],
756
+ }
757
+ for pd_label_idx in range(number_of_labels)
758
+ if (
759
+ self.label_index_to_label_key_index[pd_label_idx]
760
+ == label_key_idx
761
+ )
762
+ }
763
+
764
+ def _unpack_missing_predictions(
765
+ self,
766
+ missing_predictions: NDArray[np.int32],
767
+ label_key_idx: int,
768
+ number_of_labels: int,
769
+ number_of_examples: int,
770
+ ) -> dict[
771
+ str,
772
+ dict[
773
+ str,
774
+ int | list[dict[str, str | tuple[float, float, float, float]]],
775
+ ],
776
+ ]:
777
+ """
778
+ Unpacks a numpy array of missing prediction counts and examples.
779
+ """
780
+
781
+ datum_idx = (
782
+ lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
783
+ missing_predictions[
784
+ gt_label_idx,
785
+ example_idx * 2 + 1,
786
+ ]
787
+ )
788
+ )
789
+
790
+ groundtruth_idx = (
791
+ lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
792
+ missing_predictions[
793
+ gt_label_idx,
794
+ example_idx * 2 + 2,
795
+ ]
796
+ )
797
+ )
798
+
799
+ return {
800
+ self.index_to_label[gt_label_idx][1]: {
801
+ "count": max(
802
+ int(missing_predictions[gt_label_idx, 0]),
803
+ 0,
804
+ ),
805
+ "examples": [
806
+ {
807
+ "datum": self.index_to_uid[
808
+ datum_idx(gt_label_idx, example_idx)
809
+ ],
810
+ "groundtruth": tuple(
811
+ self.groundtruth_examples[
812
+ datum_idx(gt_label_idx, example_idx)
813
+ ][
814
+ groundtruth_idx(gt_label_idx, example_idx)
815
+ ].tolist()
816
+ ),
817
+ }
818
+ for example_idx in range(number_of_examples)
819
+ if datum_idx(gt_label_idx, example_idx) >= 0
820
+ ],
821
+ }
822
+ for gt_label_idx in range(number_of_labels)
823
+ if (
824
+ self.label_index_to_label_key_index[gt_label_idx]
825
+ == label_key_idx
826
+ )
827
+ }
828
+
829
+ def _compute_confusion_matrix(
449
830
  self,
831
+ data: NDArray[np.floating],
832
+ label_metadata: NDArray[np.int32],
450
833
  iou_thresholds: list[float] = [0.5],
451
834
  score_thresholds: list[float] = [
452
835
  score / 10.0 for score in range(1, 11)
453
836
  ],
454
- n_samples: int = 0,
455
- ) -> list[DetailedCounts]:
837
+ number_of_examples: int = 0,
838
+ ) -> list[ConfusionMatrix]:
456
839
  """
457
840
  Computes detailed counting metrics.
458
841
 
@@ -462,132 +845,63 @@ class Evaluator:
462
845
  List of IoU thresholds to compute metrics for.
463
846
  score_thresholds : list[float], default=[0.1,0.2,...,1.0]
464
847
  List of confidence thresholds to compute metrics for.
465
- n_samples : int, default=0
466
- Number of datum samples to return per metric.
848
+ number_of_examples : int, default=0
849
+ Maximum number of annotation examples to return per metric.
467
850
 
468
851
  Returns
469
852
  -------
470
- list[list[DetailedCounts]]
853
+ list[list[ConfusionMatrix]]
471
854
  Outer list is indexed by label, inner list is by IoU.
472
855
  """
473
856
 
474
- if self._detailed_pairs.size == 0:
857
+ if data.size == 0:
475
858
  return list()
476
859
 
477
- metrics = compute_detailed_counts(
478
- self._detailed_pairs,
479
- label_metadata=self._label_metadata,
860
+ (
861
+ confusion_matrix,
862
+ hallucinations,
863
+ missing_predictions,
864
+ ) = compute_confusion_matrix(
865
+ data=data,
866
+ label_metadata=label_metadata,
480
867
  iou_thresholds=np.array(iou_thresholds),
481
868
  score_thresholds=np.array(score_thresholds),
482
- n_samples=n_samples,
869
+ n_examples=number_of_examples,
483
870
  )
484
871
 
485
- tp_idx = 0
486
- fp_misclf_idx = 2 * n_samples + 1
487
- fp_halluc_idx = 4 * n_samples + 2
488
- fn_misclf_idx = 6 * n_samples + 3
489
- fn_misprd_idx = 8 * n_samples + 4
490
-
491
- def _unpack_examples(
492
- iou_idx: int,
493
- label_idx: int,
494
- type_idx: int,
495
- example_source: dict[int, NDArray[np.float16]],
496
- ) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
497
- """
498
- Unpacks metric examples from computation.
499
- """
500
- type_idx += 1
501
-
502
- results = list()
503
- for score_idx in range(n_scores):
504
- examples = list()
505
- for example_idx in range(n_samples):
506
- datum_idx = metrics[
507
- iou_idx,
508
- score_idx,
509
- label_idx,
510
- type_idx + example_idx * 2,
511
- ]
512
- annotation_idx = metrics[
513
- iou_idx,
514
- score_idx,
515
- label_idx,
516
- type_idx + example_idx * 2 + 1,
517
- ]
518
- if datum_idx >= 0:
519
- examples.append(
520
- (
521
- self.index_to_uid[datum_idx],
522
- tuple(
523
- example_source[datum_idx][
524
- annotation_idx
525
- ].tolist()
526
- ),
527
- )
528
- )
529
- results.append(examples)
530
-
531
- return results
532
-
533
- n_ious, n_scores, n_labels, _ = metrics.shape
872
+ n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
534
873
  return [
535
- DetailedCounts(
874
+ ConfusionMatrix(
536
875
  iou_threshold=iou_thresholds[iou_idx],
537
- label=self.index_to_label[label_idx],
538
- score_thresholds=score_thresholds,
539
- tp=metrics[iou_idx, :, label_idx, tp_idx].astype(int).tolist(),
540
- fp_misclassification=metrics[
541
- iou_idx, :, label_idx, fp_misclf_idx
542
- ]
543
- .astype(int)
544
- .tolist(),
545
- fp_hallucination=metrics[iou_idx, :, label_idx, fp_halluc_idx]
546
- .astype(int)
547
- .tolist(),
548
- fn_misclassification=metrics[
549
- iou_idx, :, label_idx, fn_misclf_idx
550
- ]
551
- .astype(int)
552
- .tolist(),
553
- fn_missing_prediction=metrics[
554
- iou_idx, :, label_idx, fn_misprd_idx
555
- ]
556
- .astype(int)
557
- .tolist(),
558
- tp_examples=_unpack_examples(
559
- iou_idx=iou_idx,
560
- label_idx=label_idx,
561
- type_idx=tp_idx,
562
- example_source=self.prediction_examples,
563
- ),
564
- fp_misclassification_examples=_unpack_examples(
565
- iou_idx=iou_idx,
566
- label_idx=label_idx,
567
- type_idx=fp_misclf_idx,
568
- example_source=self.prediction_examples,
569
- ),
570
- fp_hallucination_examples=_unpack_examples(
571
- iou_idx=iou_idx,
572
- label_idx=label_idx,
573
- type_idx=fp_halluc_idx,
574
- example_source=self.prediction_examples,
876
+ score_threshold=score_thresholds[score_idx],
877
+ label_key=label_key,
878
+ number_of_examples=number_of_examples,
879
+ confusion_matrix=self._unpack_confusion_matrix(
880
+ confusion_matrix=confusion_matrix[
881
+ iou_idx, score_idx, :, :, :
882
+ ],
883
+ label_key_idx=label_key_idx,
884
+ number_of_labels=n_labels,
885
+ number_of_examples=number_of_examples,
575
886
  ),
576
- fn_misclassification_examples=_unpack_examples(
577
- iou_idx=iou_idx,
578
- label_idx=label_idx,
579
- type_idx=fn_misclf_idx,
580
- example_source=self.groundtruth_examples,
887
+ hallucinations=self._unpack_hallucinations(
888
+ hallucinations=hallucinations[iou_idx, score_idx, :, :],
889
+ label_key_idx=label_key_idx,
890
+ number_of_labels=n_labels,
891
+ number_of_examples=number_of_examples,
581
892
  ),
582
- fn_missing_prediction_examples=_unpack_examples(
583
- iou_idx=iou_idx,
584
- label_idx=label_idx,
585
- type_idx=fn_misprd_idx,
586
- example_source=self.groundtruth_examples,
893
+ missing_predictions=self._unpack_missing_predictions(
894
+ missing_predictions=missing_predictions[
895
+ iou_idx, score_idx, :, :
896
+ ],
897
+ label_key_idx=label_key_idx,
898
+ number_of_labels=n_labels,
899
+ number_of_examples=number_of_examples,
587
900
  ),
588
901
  )
589
- for label_idx in range(n_labels)
902
+ for label_key_idx, label_key in self.index_to_label_key.items()
590
903
  for iou_idx in range(n_ious)
904
+ for score_idx in range(n_scores)
591
905
  ]
592
906
 
593
907
 
@@ -661,12 +975,27 @@ class DataLoader:
661
975
  self._evaluator.label_key_to_index[label[0]],
662
976
  )
663
977
 
664
- def _add_data(
978
+ def _compute_ious_and_cache_pairs(
665
979
  self,
666
980
  uid_index: int,
667
981
  keyed_groundtruths: dict,
668
982
  keyed_predictions: dict,
669
- ):
983
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
984
+ ) -> None:
985
+ """
986
+ Compute IOUs between groundtruths and preditions before storing as pairs.
987
+
988
+ Parameters
989
+ ----------
990
+ uid_index: int
991
+ The index of the detection.
992
+ keyed_groundtruths: dict
993
+ A dictionary of groundtruths.
994
+ keyed_predictions: dict
995
+ A dictionary of predictions.
996
+ annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask]
997
+ The type of annotation to compute IOUs for.
998
+ """
670
999
  gt_keys = set(keyed_groundtruths.keys())
671
1000
  pd_keys = set(keyed_predictions.keys())
672
1001
  joint_keys = gt_keys.intersection(pd_keys)
@@ -677,14 +1006,13 @@ class DataLoader:
677
1006
  for key in joint_keys:
678
1007
  n_predictions = len(keyed_predictions[key])
679
1008
  n_groundtruths = len(keyed_groundtruths[key])
680
- boxes = np.array(
681
- [
682
- np.array([*gextrema, *pextrema])
683
- for _, _, _, pextrema in keyed_predictions[key]
684
- for _, _, gextrema in keyed_groundtruths[key]
685
- ]
1009
+ data = _get_annotation_data(
1010
+ keyed_groundtruths=keyed_groundtruths,
1011
+ keyed_predictions=keyed_predictions,
1012
+ key=key,
1013
+ annotation_type=annotation_type,
686
1014
  )
687
- ious = compute_iou(boxes)
1015
+ ious = compute_iou(data=data, annotation_type=annotation_type)
688
1016
  mask_nonzero_iou = (ious > 1e-9).reshape(
689
1017
  (n_predictions, n_groundtruths)
690
1018
  )
@@ -780,9 +1108,10 @@ class DataLoader:
780
1108
 
781
1109
  self.pairs.append(np.array(pairs))
782
1110
 
783
- def add_data(
1111
+ def _add_data(
784
1112
  self,
785
1113
  detections: list[Detection],
1114
+ annotation_type: type[Bitmask] | type[BoundingBox] | type[Polygon],
786
1115
  show_progress: bool = False,
787
1116
  ):
788
1117
  """
@@ -792,6 +1121,8 @@ class DataLoader:
792
1121
  ----------
793
1122
  detections : list[Detection]
794
1123
  A list of Detection objects.
1124
+ annotation_type : type[Bitmask] | type[BoundingBox] | type[Polygon]
1125
+ The annotation type to process.
795
1126
  show_progress : bool, default=False
796
1127
  Toggle for tqdm progress bar.
797
1128
  """
@@ -817,45 +1148,146 @@ class DataLoader:
817
1148
  # cache labels and annotations
818
1149
  keyed_groundtruths = defaultdict(list)
819
1150
  keyed_predictions = defaultdict(list)
1151
+
1152
+ representation_property = _get_annotation_representation(
1153
+ annotation_type=annotation_type
1154
+ )
1155
+
820
1156
  for gidx, gann in enumerate(detection.groundtruths):
821
- self._evaluator.groundtruth_examples[uid_index][
822
- gidx
823
- ] = np.array(gann.extrema)
1157
+ if not isinstance(gann, annotation_type):
1158
+ raise ValueError(
1159
+ f"Expected {annotation_type}, but annotation is of type {type(gann)}."
1160
+ )
1161
+
1162
+ if isinstance(gann, BoundingBox):
1163
+ self._evaluator.groundtruth_examples[uid_index][
1164
+ gidx
1165
+ ] = getattr(gann, representation_property)
1166
+ else:
1167
+ converted_box = gann.to_box()
1168
+ self._evaluator.groundtruth_examples[uid_index][gidx] = (
1169
+ getattr(converted_box, "extrema")
1170
+ if converted_box is not None
1171
+ else None
1172
+ )
824
1173
  for glabel in gann.labels:
825
1174
  label_idx, label_key_idx = self._add_label(glabel)
826
1175
  self.groundtruth_count[label_idx][uid_index] += 1
1176
+ representation = getattr(gann, representation_property)
827
1177
  keyed_groundtruths[label_key_idx].append(
828
1178
  (
829
1179
  gidx,
830
1180
  label_idx,
831
- gann.extrema,
1181
+ representation,
832
1182
  )
833
1183
  )
1184
+
834
1185
  for pidx, pann in enumerate(detection.predictions):
835
- self._evaluator.prediction_examples[uid_index][
836
- pidx
837
- ] = np.array(pann.extrema)
1186
+ if not isinstance(pann, annotation_type):
1187
+ raise ValueError(
1188
+ f"Expected {annotation_type}, but annotation is of type {type(pann)}."
1189
+ )
1190
+
1191
+ if isinstance(pann, BoundingBox):
1192
+ self._evaluator.prediction_examples[uid_index][
1193
+ pidx
1194
+ ] = getattr(pann, representation_property)
1195
+ else:
1196
+ converted_box = pann.to_box()
1197
+ self._evaluator.prediction_examples[uid_index][pidx] = (
1198
+ getattr(converted_box, "extrema")
1199
+ if converted_box is not None
1200
+ else None
1201
+ )
838
1202
  for plabel, pscore in zip(pann.labels, pann.scores):
839
1203
  label_idx, label_key_idx = self._add_label(plabel)
840
1204
  self.prediction_count[label_idx][uid_index] += 1
1205
+ representation = representation = getattr(
1206
+ pann, representation_property
1207
+ )
841
1208
  keyed_predictions[label_key_idx].append(
842
1209
  (
843
1210
  pidx,
844
1211
  label_idx,
845
1212
  pscore,
846
- pann.extrema,
1213
+ representation,
847
1214
  )
848
1215
  )
849
1216
 
850
- self._add_data(
1217
+ self._compute_ious_and_cache_pairs(
851
1218
  uid_index=uid_index,
852
1219
  keyed_groundtruths=keyed_groundtruths,
853
1220
  keyed_predictions=keyed_predictions,
1221
+ annotation_type=annotation_type,
854
1222
  )
855
1223
 
856
- def add_data_from_valor_dict(
1224
+ def add_bounding_boxes(
1225
+ self,
1226
+ detections: list[Detection],
1227
+ show_progress: bool = False,
1228
+ ):
1229
+ """
1230
+ Adds bounding box detections to the cache.
1231
+
1232
+ Parameters
1233
+ ----------
1234
+ detections : list[Detection]
1235
+ A list of Detection objects.
1236
+ show_progress : bool, default=False
1237
+ Toggle for tqdm progress bar.
1238
+ """
1239
+ return self._add_data(
1240
+ detections=detections,
1241
+ show_progress=show_progress,
1242
+ annotation_type=BoundingBox,
1243
+ )
1244
+
1245
+ def add_polygons(
1246
+ self,
1247
+ detections: list[Detection],
1248
+ show_progress: bool = False,
1249
+ ):
1250
+ """
1251
+ Adds polygon detections to the cache.
1252
+
1253
+ Parameters
1254
+ ----------
1255
+ detections : list[Detection]
1256
+ A list of Detection objects.
1257
+ show_progress : bool, default=False
1258
+ Toggle for tqdm progress bar.
1259
+ """
1260
+ return self._add_data(
1261
+ detections=detections,
1262
+ show_progress=show_progress,
1263
+ annotation_type=Polygon,
1264
+ )
1265
+
1266
+ def add_bitmasks(
1267
+ self,
1268
+ detections: list[Detection],
1269
+ show_progress: bool = False,
1270
+ ):
1271
+ """
1272
+ Adds bitmask detections to the cache.
1273
+
1274
+ Parameters
1275
+ ----------
1276
+ detections : list[Detection]
1277
+ A list of Detection objects.
1278
+ show_progress : bool, default=False
1279
+ Toggle for tqdm progress bar.
1280
+ """
1281
+ return self._add_data(
1282
+ detections=detections,
1283
+ show_progress=show_progress,
1284
+ annotation_type=Bitmask,
1285
+ )
1286
+
1287
+ def _add_data_from_valor_dict(
857
1288
  self,
858
1289
  detections: list[tuple[dict, dict]],
1290
+ annotation_type: type[Bitmask] | type[BoundingBox] | type[Polygon],
859
1291
  show_progress: bool = False,
860
1292
  ):
861
1293
  """
@@ -865,20 +1297,14 @@ class DataLoader:
865
1297
  ----------
866
1298
  detections : list[tuple[dict, dict]]
867
1299
  A list of groundtruth, prediction pairs in Valor-format dictionaries.
1300
+ annotation_type : type[Bitmask] | type[BoundingBox] | type[Polygon]
1301
+ The annotation type to process.
868
1302
  show_progress : bool, default=False
869
1303
  Toggle for tqdm progress bar.
870
1304
  """
871
1305
 
872
- def _get_bbox_extrema(
873
- data: list[list[list[float]]],
874
- ) -> tuple[float, float, float, float]:
875
- x = [point[0] for shape in data for point in shape]
876
- y = [point[1] for shape in data for point in shape]
877
- return (min(x), max(x), min(y), max(y))
878
-
879
1306
  disable_tqdm = not show_progress
880
1307
  for groundtruth, prediction in tqdm(detections, disable=disable_tqdm):
881
-
882
1308
  # update metadata
883
1309
  self._evaluator.n_datums += 1
884
1310
  self._evaluator.n_groundtruths += len(groundtruth["annotations"])
@@ -898,10 +1324,34 @@ class DataLoader:
898
1324
  # cache labels and annotations
899
1325
  keyed_groundtruths = defaultdict(list)
900
1326
  keyed_predictions = defaultdict(list)
1327
+
1328
+ annotation_key = _get_valor_dict_annotation_key(
1329
+ annotation_type=annotation_type
1330
+ )
1331
+ invalid_keys = list(
1332
+ filter(
1333
+ lambda x: x != annotation_key,
1334
+ ["bounding_box", "raster", "polygon"],
1335
+ )
1336
+ )
1337
+
901
1338
  for gidx, gann in enumerate(groundtruth["annotations"]):
902
- self._evaluator.groundtruth_examples[uid_index][
903
- gidx
904
- ] = np.array(_get_bbox_extrema(gann["bounding_box"]))
1339
+ if (gann[annotation_key] is None) or any(
1340
+ [gann[k] is not None for k in invalid_keys]
1341
+ ):
1342
+ raise ValueError(
1343
+ f"Input JSON doesn't contain {annotation_type} data, or contains data for multiple annotation types."
1344
+ )
1345
+ if annotation_type == BoundingBox:
1346
+ self._evaluator.groundtruth_examples[uid_index][
1347
+ gidx
1348
+ ] = np.array(
1349
+ _get_annotation_representation_from_valor_dict(
1350
+ gann[annotation_key],
1351
+ annotation_type=annotation_type,
1352
+ ),
1353
+ )
1354
+
905
1355
  for valor_label in gann["labels"]:
906
1356
  glabel = (valor_label["key"], valor_label["value"])
907
1357
  label_idx, label_key_idx = self._add_label(glabel)
@@ -910,13 +1360,29 @@ class DataLoader:
910
1360
  (
911
1361
  gidx,
912
1362
  label_idx,
913
- _get_bbox_extrema(gann["bounding_box"]),
1363
+ _get_annotation_representation_from_valor_dict(
1364
+ gann[annotation_key],
1365
+ annotation_type=annotation_type,
1366
+ ),
914
1367
  )
915
1368
  )
916
1369
  for pidx, pann in enumerate(prediction["annotations"]):
917
- self._evaluator.prediction_examples[uid_index][
918
- pidx
919
- ] = np.array(_get_bbox_extrema(pann["bounding_box"]))
1370
+ if (pann[annotation_key] is None) or any(
1371
+ [pann[k] is not None for k in invalid_keys]
1372
+ ):
1373
+ raise ValueError(
1374
+ f"Input JSON doesn't contain {annotation_type} data, or contains data for multiple annotation types."
1375
+ )
1376
+
1377
+ if annotation_type == BoundingBox:
1378
+ self._evaluator.prediction_examples[uid_index][
1379
+ pidx
1380
+ ] = np.array(
1381
+ _get_annotation_representation_from_valor_dict(
1382
+ pann[annotation_key],
1383
+ annotation_type=annotation_type,
1384
+ )
1385
+ )
920
1386
  for valor_label in pann["labels"]:
921
1387
  plabel = (valor_label["key"], valor_label["value"])
922
1388
  pscore = valor_label["score"]
@@ -927,16 +1393,41 @@ class DataLoader:
927
1393
  pidx,
928
1394
  label_idx,
929
1395
  pscore,
930
- _get_bbox_extrema(pann["bounding_box"]),
1396
+ _get_annotation_representation_from_valor_dict(
1397
+ pann[annotation_key],
1398
+ annotation_type=annotation_type,
1399
+ ),
931
1400
  )
932
1401
  )
933
1402
 
934
- self._add_data(
1403
+ self._compute_ious_and_cache_pairs(
935
1404
  uid_index=uid_index,
936
1405
  keyed_groundtruths=keyed_groundtruths,
937
1406
  keyed_predictions=keyed_predictions,
1407
+ annotation_type=annotation_type,
938
1408
  )
939
1409
 
1410
+ def add_bounding_boxes_from_valor_dict(
1411
+ self,
1412
+ detections: list[tuple[dict, dict]],
1413
+ show_progress: bool = False,
1414
+ ):
1415
+ """
1416
+ Adds Valor-format bounding box detections to the cache.
1417
+
1418
+ Parameters
1419
+ ----------
1420
+ detections : list[tuple[dict, dict]]
1421
+ A list of groundtruth, prediction pairs in Valor-format dictionaries.
1422
+ show_progress : bool, default=False
1423
+ Toggle for tqdm progress bar.
1424
+ """
1425
+ return self._add_data_from_valor_dict(
1426
+ detections=detections,
1427
+ show_progress=show_progress,
1428
+ annotation_type=BoundingBox,
1429
+ )
1430
+
940
1431
  def finalize(self) -> Evaluator:
941
1432
  """
942
1433
  Performs data finalization and some preprocessing steps.