valor-lite 0.33.7__py3-none-any.whl → 0.33.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
+ import warnings
1
2
  from collections import defaultdict
2
3
  from dataclasses import dataclass
4
+ from typing import Type
3
5
 
4
6
  import numpy as np
7
+ import valor_lite.detection.annotation as annotation
5
8
  from numpy.typing import NDArray
6
- from shapely.geometry import Polygon as ShapelyPolygon
7
9
  from tqdm import tqdm
8
10
  from valor_lite.detection.annotation import (
9
11
  Bitmask,
@@ -59,103 +61,6 @@ filtered_metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_mask=filter_m
59
61
  """
60
62
 
61
63
 
62
- def _get_valor_dict_annotation_key(
63
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
64
- ) -> str:
65
- """Get the correct JSON key to extract a given annotation type."""
66
-
67
- if issubclass(annotation_type, BoundingBox):
68
- return "bounding_box"
69
- if issubclass(annotation_type, Polygon):
70
- return "polygon"
71
- else:
72
- return "raster"
73
-
74
-
75
- def _get_annotation_representation(
76
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
77
- ) -> str:
78
- """Get the correct representation of an annotation object."""
79
-
80
- representation = (
81
- "extrema"
82
- if issubclass(annotation_type, BoundingBox)
83
- else ("mask" if issubclass(annotation_type, Bitmask) else "shape")
84
- )
85
-
86
- return representation
87
-
88
-
89
- def _get_annotation_representation_from_valor_dict(
90
- data: list,
91
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
92
- ) -> tuple[float, float, float, float] | ShapelyPolygon | NDArray[np.bool_]:
93
- """Get the correct representation of an annotation object from a valor dictionary."""
94
-
95
- if issubclass(annotation_type, BoundingBox):
96
- x = [point[0] for shape in data for point in shape]
97
- y = [point[1] for shape in data for point in shape]
98
- return (min(x), max(x), min(y), max(y))
99
- if issubclass(annotation_type, Polygon):
100
- return ShapelyPolygon(data)
101
- else:
102
- return np.array(data)
103
-
104
-
105
- def _get_annotation_data(
106
- keyed_groundtruths: dict,
107
- keyed_predictions: dict,
108
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask] | None,
109
- key=int,
110
- ) -> np.ndarray:
111
- """Create an array of annotation pairs for use when calculating IOU. Needed because we unpack bounding box representations, but not bitmask or polygon representations."""
112
- if annotation_type == BoundingBox:
113
- return np.array(
114
- [
115
- np.array([*gextrema, *pextrema])
116
- for _, _, _, pextrema in keyed_predictions[key]
117
- for _, _, gextrema in keyed_groundtruths[key]
118
- ]
119
- )
120
- else:
121
- return np.array(
122
- [
123
- np.array([groundtruth_obj, prediction_obj])
124
- for _, _, _, prediction_obj in keyed_predictions[key]
125
- for _, _, groundtruth_obj in keyed_groundtruths[key]
126
- ]
127
- )
128
-
129
-
130
- def compute_iou(
131
- data: NDArray[np.floating],
132
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
133
- ) -> NDArray[np.floating]:
134
- """
135
- Computes intersection-over-union (IoU) calculations for various annotation types.
136
-
137
- Parameters
138
- ----------
139
- data : NDArray[np.floating]
140
- A sorted array of bounding box, bitmask, or polygon pairs.
141
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask]
142
- The type of annotation contained in the data.
143
-
144
-
145
- Returns
146
- -------
147
- NDArray[np.floating]
148
- Computed IoU's.
149
- """
150
-
151
- if annotation_type == BoundingBox:
152
- return compute_bbox_iou(data=data)
153
- elif annotation_type == Bitmask:
154
- return compute_bitmask_iou(data=data)
155
- else:
156
- return compute_polygon_iou(data=data)
157
-
158
-
159
64
  @dataclass
160
65
  class Filter:
161
66
  ranked_indices: NDArray[np.int32]
@@ -185,22 +90,17 @@ class Evaluator:
185
90
  self.prediction_examples: dict[int, NDArray[np.float16]] = dict()
186
91
 
187
92
  # label reference
188
- self.label_to_index: dict[tuple[str, str], int] = dict()
189
- self.index_to_label: dict[int, tuple[str, str]] = dict()
190
-
191
- # label key reference
192
- self.index_to_label_key: dict[int, str] = dict()
193
- self.label_key_to_index: dict[str, int] = dict()
194
- self.label_index_to_label_key_index: dict[int, int] = dict()
93
+ self.label_to_index: dict[str, int] = dict()
94
+ self.index_to_label: dict[int, str] = dict()
195
95
 
196
96
  # computation caches
197
- self._detailed_pairs: NDArray[np.floating] = np.array([])
198
- self._ranked_pairs: NDArray[np.floating] = np.array([])
97
+ self._detailed_pairs: NDArray[np.float64] = np.array([])
98
+ self._ranked_pairs: NDArray[np.float64] = np.array([])
199
99
  self._label_metadata: NDArray[np.int32] = np.array([])
200
100
  self._label_metadata_per_datum: NDArray[np.int32] = np.array([])
201
101
 
202
102
  @property
203
- def ignored_prediction_labels(self) -> list[tuple[str, str]]:
103
+ def ignored_prediction_labels(self) -> list[str]:
204
104
  """
205
105
  Prediction labels that are not present in the ground truth set.
206
106
  """
@@ -211,7 +111,7 @@ class Evaluator:
211
111
  ]
212
112
 
213
113
  @property
214
- def missing_prediction_labels(self) -> list[tuple[str, str]]:
114
+ def missing_prediction_labels(self) -> list[str]:
215
115
  """
216
116
  Ground truth labels that are not present in the prediction set.
217
117
  """
@@ -238,8 +138,7 @@ class Evaluator:
238
138
  def create_filter(
239
139
  self,
240
140
  datum_uids: list[str] | NDArray[np.int32] | None = None,
241
- labels: list[tuple[str, str]] | NDArray[np.int32] | None = None,
242
- label_keys: list[str] | NDArray[np.int32] | None = None,
141
+ labels: list[str] | NDArray[np.int32] | None = None,
243
142
  ) -> Filter:
244
143
  """
245
144
  Creates a filter that can be passed to an evaluation.
@@ -248,10 +147,8 @@ class Evaluator:
248
147
  ----------
249
148
  datum_uids : list[str] | NDArray[np.int32], optional
250
149
  An optional list of string uids or a numpy array of uid indices.
251
- labels : list[tuple[str, str]] | NDArray[np.int32], optional
150
+ labels : list[str] | NDArray[np.int32], optional
252
151
  An optional list of labels or a numpy array of label indices.
253
- label_keys : list[str] | NDArray[np.int32], optional
254
- An optional list of label keys or a numpy array of label key indices.
255
152
 
256
153
  Returns
257
154
  -------
@@ -296,24 +193,6 @@ class Evaluator:
296
193
  ] = False
297
194
  mask_labels[~np.isin(np.arange(n_labels), labels)] = False
298
195
 
299
- if label_keys is not None:
300
- if isinstance(label_keys, list):
301
- label_keys = np.array(
302
- [self.label_key_to_index[key] for key in label_keys]
303
- )
304
- label_indices = (
305
- np.where(np.isclose(self._label_metadata[:, 2], label_keys))[0]
306
- if label_keys.size > 0
307
- else np.array([])
308
- )
309
- mask_ranked[
310
- ~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
311
- ] = False
312
- mask_detailed[
313
- ~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
314
- ] = False
315
- mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
316
-
317
196
  mask_label_metadata = (
318
197
  mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
319
198
  )
@@ -321,13 +200,12 @@ class Evaluator:
321
200
  label_metadata_per_datum[:, ~mask_label_metadata] = 0
322
201
 
323
202
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
324
- label_metadata[:, :2] = np.transpose(
203
+ label_metadata = np.transpose(
325
204
  np.sum(
326
205
  label_metadata_per_datum,
327
206
  axis=1,
328
207
  )
329
208
  )
330
- label_metadata[:, 2] = self._label_metadata[:, 2]
331
209
 
332
210
  return Filter(
333
211
  ranked_indices=np.where(mask_ranked)[0],
@@ -413,12 +291,10 @@ class Evaluator:
413
291
 
414
292
  metrics[MetricType.mAP] = [
415
293
  mAP(
416
- value=mean_average_precision[iou_idx][label_key_idx],
294
+ value=mean_average_precision[iou_idx],
417
295
  iou_threshold=iou_thresholds[iou_idx],
418
- label_key=self.index_to_label_key[label_key_idx],
419
296
  )
420
297
  for iou_idx in range(mean_average_precision.shape[0])
421
- for label_key_idx in range(mean_average_precision.shape[1])
422
298
  ]
423
299
 
424
300
  metrics[MetricType.APAveragedOverIOUs] = [
@@ -433,12 +309,8 @@ class Evaluator:
433
309
 
434
310
  metrics[MetricType.mAPAveragedOverIOUs] = [
435
311
  mAPAveragedOverIOUs(
436
- value=mean_average_precision_average_over_ious[label_key_idx],
312
+ value=mean_average_precision_average_over_ious,
437
313
  iou_thresholds=iou_thresholds,
438
- label_key=self.index_to_label_key[label_key_idx],
439
- )
440
- for label_key_idx in range(
441
- mean_average_precision_average_over_ious.shape[0]
442
314
  )
443
315
  ]
444
316
 
@@ -456,13 +328,11 @@ class Evaluator:
456
328
 
457
329
  metrics[MetricType.mAR] = [
458
330
  mAR(
459
- value=mean_average_recall[score_idx][label_key_idx],
331
+ value=mean_average_recall[score_idx],
460
332
  iou_thresholds=iou_thresholds,
461
333
  score_threshold=score_thresholds[score_idx],
462
- label_key=self.index_to_label_key[label_key_idx],
463
334
  )
464
335
  for score_idx in range(mean_average_recall.shape[0])
465
- for label_key_idx in range(mean_average_recall.shape[1])
466
336
  ]
467
337
 
468
338
  metrics[MetricType.ARAveragedOverScores] = [
@@ -478,13 +348,9 @@ class Evaluator:
478
348
 
479
349
  metrics[MetricType.mARAveragedOverScores] = [
480
350
  mARAveragedOverScores(
481
- value=mean_average_recall_averaged_over_scores[label_key_idx],
351
+ value=mean_average_recall_averaged_over_scores,
482
352
  score_thresholds=score_thresholds,
483
353
  iou_thresholds=iou_thresholds,
484
- label_key=self.index_to_label_key[label_key_idx],
485
- )
486
- for label_key_idx in range(
487
- mean_average_recall_averaged_over_scores.shape[0]
488
354
  )
489
355
  ]
490
356
 
@@ -570,10 +436,22 @@ class Evaluator:
570
436
 
571
437
  return metrics
572
438
 
439
+ def _convert_example_to_dict(
440
+ self, box: NDArray[np.float16]
441
+ ) -> dict[str, float]:
442
+ """
443
+ Converts a cached bounding box example to dictionary format.
444
+ """
445
+ return {
446
+ "xmin": box[0],
447
+ "xmax": box[1],
448
+ "ymin": box[2],
449
+ "ymax": box[3],
450
+ }
451
+
573
452
  def _unpack_confusion_matrix(
574
453
  self,
575
- confusion_matrix: NDArray[np.floating],
576
- label_key_idx: int,
454
+ confusion_matrix: NDArray[np.float64],
577
455
  number_of_labels: int,
578
456
  number_of_examples: int,
579
457
  ) -> dict[
@@ -586,7 +464,7 @@ class Evaluator:
586
464
  | list[
587
465
  dict[
588
466
  str,
589
- str | float | tuple[float, float, float, float],
467
+ str | float | dict[str, float],
590
468
  ]
591
469
  ],
592
470
  ],
@@ -629,8 +507,8 @@ class Evaluator:
629
507
  )
630
508
 
631
509
  return {
632
- self.index_to_label[gt_label_idx][1]: {
633
- self.index_to_label[pd_label_idx][1]: {
510
+ self.index_to_label[gt_label_idx]: {
511
+ self.index_to_label[pd_label_idx]: {
634
512
  "count": max(
635
513
  int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
636
514
  0,
@@ -642,7 +520,7 @@ class Evaluator:
642
520
  gt_label_idx, pd_label_idx, example_idx
643
521
  )
644
522
  ],
645
- "groundtruth": tuple(
523
+ "groundtruth": self._convert_example_to_dict(
646
524
  self.groundtruth_examples[
647
525
  datum_idx(
648
526
  gt_label_idx,
@@ -655,9 +533,9 @@ class Evaluator:
655
533
  pd_label_idx,
656
534
  example_idx,
657
535
  )
658
- ].tolist()
536
+ ]
659
537
  ),
660
- "prediction": tuple(
538
+ "prediction": self._convert_example_to_dict(
661
539
  self.prediction_examples[
662
540
  datum_idx(
663
541
  gt_label_idx,
@@ -670,7 +548,7 @@ class Evaluator:
670
548
  pd_label_idx,
671
549
  example_idx,
672
550
  )
673
- ].tolist()
551
+ ]
674
552
  ),
675
553
  "score": score_idx(
676
554
  gt_label_idx, pd_label_idx, example_idx
@@ -682,30 +560,20 @@ class Evaluator:
682
560
  ],
683
561
  }
684
562
  for pd_label_idx in range(number_of_labels)
685
- if (
686
- self.label_index_to_label_key_index[pd_label_idx]
687
- == label_key_idx
688
- )
689
563
  }
690
564
  for gt_label_idx in range(number_of_labels)
691
- if (
692
- self.label_index_to_label_key_index[gt_label_idx]
693
- == label_key_idx
694
- )
695
565
  }
696
566
 
697
567
  def _unpack_hallucinations(
698
568
  self,
699
- hallucinations: NDArray[np.floating],
700
- label_key_idx: int,
569
+ hallucinations: NDArray[np.float64],
701
570
  number_of_labels: int,
702
571
  number_of_examples: int,
703
572
  ) -> dict[
704
573
  str,
705
574
  dict[
706
575
  str,
707
- int
708
- | list[dict[str, str | float | tuple[float, float, float, float]]],
576
+ int | list[dict[str, str | float | dict[str, float]]],
709
577
  ],
710
578
  ]:
711
579
  """
@@ -740,7 +608,7 @@ class Evaluator:
740
608
  )
741
609
 
742
610
  return {
743
- self.index_to_label[pd_label_idx][1]: {
611
+ self.index_to_label[pd_label_idx]: {
744
612
  "count": max(
745
613
  int(hallucinations[pd_label_idx, 0]),
746
614
  0,
@@ -750,12 +618,10 @@ class Evaluator:
750
618
  "datum": self.index_to_uid[
751
619
  datum_idx(pd_label_idx, example_idx)
752
620
  ],
753
- "prediction": tuple(
621
+ "prediction": self._convert_example_to_dict(
754
622
  self.prediction_examples[
755
623
  datum_idx(pd_label_idx, example_idx)
756
- ][
757
- prediction_idx(pd_label_idx, example_idx)
758
- ].tolist()
624
+ ][prediction_idx(pd_label_idx, example_idx)]
759
625
  ),
760
626
  "score": score_idx(pd_label_idx, example_idx),
761
627
  }
@@ -764,25 +630,14 @@ class Evaluator:
764
630
  ],
765
631
  }
766
632
  for pd_label_idx in range(number_of_labels)
767
- if (
768
- self.label_index_to_label_key_index[pd_label_idx]
769
- == label_key_idx
770
- )
771
633
  }
772
634
 
773
635
  def _unpack_missing_predictions(
774
636
  self,
775
637
  missing_predictions: NDArray[np.int32],
776
- label_key_idx: int,
777
638
  number_of_labels: int,
778
639
  number_of_examples: int,
779
- ) -> dict[
780
- str,
781
- dict[
782
- str,
783
- int | list[dict[str, str | tuple[float, float, float, float]]],
784
- ],
785
- ]:
640
+ ) -> dict[str, dict[str, int | list[dict[str, str | dict[str, float]]]]]:
786
641
  """
787
642
  Unpacks a numpy array of missing prediction counts and examples.
788
643
  """
@@ -806,7 +661,7 @@ class Evaluator:
806
661
  )
807
662
 
808
663
  return {
809
- self.index_to_label[gt_label_idx][1]: {
664
+ self.index_to_label[gt_label_idx]: {
810
665
  "count": max(
811
666
  int(missing_predictions[gt_label_idx, 0]),
812
667
  0,
@@ -816,12 +671,10 @@ class Evaluator:
816
671
  "datum": self.index_to_uid[
817
672
  datum_idx(gt_label_idx, example_idx)
818
673
  ],
819
- "groundtruth": tuple(
674
+ "groundtruth": self._convert_example_to_dict(
820
675
  self.groundtruth_examples[
821
676
  datum_idx(gt_label_idx, example_idx)
822
- ][
823
- groundtruth_idx(gt_label_idx, example_idx)
824
- ].tolist()
677
+ ][groundtruth_idx(gt_label_idx, example_idx)]
825
678
  ),
826
679
  }
827
680
  for example_idx in range(number_of_examples)
@@ -829,15 +682,11 @@ class Evaluator:
829
682
  ],
830
683
  }
831
684
  for gt_label_idx in range(number_of_labels)
832
- if (
833
- self.label_index_to_label_key_index[gt_label_idx]
834
- == label_key_idx
835
- )
836
685
  }
837
686
 
838
687
  def _compute_confusion_matrix(
839
688
  self,
840
- data: NDArray[np.floating],
689
+ data: NDArray[np.float64],
841
690
  label_metadata: NDArray[np.int32],
842
691
  iou_thresholds: list[float],
843
692
  score_thresholds: list[float],
@@ -848,7 +697,7 @@ class Evaluator:
848
697
 
849
698
  Parameters
850
699
  ----------
851
- data : NDArray[np.floating]
700
+ data : NDArray[np.float64]
852
701
  An array containing detailed pairs of detections.
853
702
  label_metadata : NDArray[np.int32]
854
703
  An array containing label metadata.
@@ -885,19 +734,16 @@ class Evaluator:
885
734
  ConfusionMatrix(
886
735
  iou_threshold=iou_thresholds[iou_idx],
887
736
  score_threshold=score_thresholds[score_idx],
888
- label_key=label_key,
889
737
  number_of_examples=number_of_examples,
890
738
  confusion_matrix=self._unpack_confusion_matrix(
891
739
  confusion_matrix=confusion_matrix[
892
740
  iou_idx, score_idx, :, :, :
893
741
  ],
894
- label_key_idx=label_key_idx,
895
742
  number_of_labels=n_labels,
896
743
  number_of_examples=number_of_examples,
897
744
  ),
898
745
  hallucinations=self._unpack_hallucinations(
899
746
  hallucinations=hallucinations[iou_idx, score_idx, :, :],
900
- label_key_idx=label_key_idx,
901
747
  number_of_labels=n_labels,
902
748
  number_of_examples=number_of_examples,
903
749
  ),
@@ -905,12 +751,10 @@ class Evaluator:
905
751
  missing_predictions=missing_predictions[
906
752
  iou_idx, score_idx, :, :
907
753
  ],
908
- label_key_idx=label_key_idx,
909
754
  number_of_labels=n_labels,
910
755
  number_of_examples=number_of_examples,
911
756
  ),
912
757
  )
913
- for label_key_idx, label_key in self.index_to_label_key.items()
914
758
  for iou_idx in range(n_ious)
915
759
  for score_idx in range(n_scores)
916
760
  ]
@@ -923,7 +767,7 @@ class DataLoader:
923
767
 
924
768
  def __init__(self):
925
769
  self._evaluator = Evaluator()
926
- self.pairs: list[NDArray[np.floating]] = list()
770
+ self.pairs: list[NDArray[np.float64]] = list()
927
771
  self.groundtruth_count = defaultdict(lambda: defaultdict(int))
928
772
  self.prediction_count = defaultdict(lambda: defaultdict(int))
929
773
 
@@ -947,51 +791,36 @@ class DataLoader:
947
791
  self._evaluator.index_to_uid[index] = uid
948
792
  return self._evaluator.uid_to_index[uid]
949
793
 
950
- def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
794
+ def _add_label(self, label: str) -> int:
951
795
  """
952
796
  Helper function for adding a label to the cache.
953
797
 
954
798
  Parameters
955
799
  ----------
956
- label : tuple[str, str]
957
- The label as a tuple in format (key, value).
800
+ label : str
801
+ The label associated with the annotation.
958
802
 
959
803
  Returns
960
804
  -------
961
805
  int
962
806
  Label index.
963
- int
964
- Label key index.
965
807
  """
966
808
 
967
809
  label_id = len(self._evaluator.index_to_label)
968
- label_key_id = len(self._evaluator.index_to_label_key)
969
810
  if label not in self._evaluator.label_to_index:
970
811
  self._evaluator.label_to_index[label] = label_id
971
812
  self._evaluator.index_to_label[label_id] = label
972
813
 
973
- # update label key index
974
- if label[0] not in self._evaluator.label_key_to_index:
975
- self._evaluator.label_key_to_index[label[0]] = label_key_id
976
- self._evaluator.index_to_label_key[label_key_id] = label[0]
977
- label_key_id += 1
978
-
979
- self._evaluator.label_index_to_label_key_index[
980
- label_id
981
- ] = self._evaluator.label_key_to_index[label[0]]
982
814
  label_id += 1
983
815
 
984
- return (
985
- self._evaluator.label_to_index[label],
986
- self._evaluator.label_key_to_index[label[0]],
987
- )
816
+ return self._evaluator.label_to_index[label]
988
817
 
989
818
  def _compute_ious_and_cache_pairs(
990
819
  self,
991
820
  uid_index: int,
992
- keyed_groundtruths: dict,
993
- keyed_predictions: dict,
994
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
821
+ groundtruths: list,
822
+ predictions: list,
823
+ annotation_type: Type[BoundingBox] | Type[Polygon] | Type[Bitmask],
995
824
  ) -> None:
996
825
  """
997
826
  Compute IOUs between groundtruths and preditions before storing as pairs.
@@ -1000,122 +829,92 @@ class DataLoader:
1000
829
  ----------
1001
830
  uid_index: int
1002
831
  The index of the detection.
1003
- keyed_groundtruths: dict
1004
- A dictionary of groundtruths.
1005
- keyed_predictions: dict
1006
- A dictionary of predictions.
832
+ groundtruths: list
833
+ A list of groundtruths.
834
+ predictions: list
835
+ A list of predictions.
1007
836
  annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask]
1008
837
  The type of annotation to compute IOUs for.
1009
838
  """
1010
- gt_keys = set(keyed_groundtruths.keys())
1011
- pd_keys = set(keyed_predictions.keys())
1012
- joint_keys = gt_keys.intersection(pd_keys)
1013
- gt_unique_keys = gt_keys - pd_keys
1014
- pd_unique_keys = pd_keys - gt_keys
1015
839
 
1016
840
  pairs = list()
1017
- for key in joint_keys:
1018
- n_predictions = len(keyed_predictions[key])
1019
- n_groundtruths = len(keyed_groundtruths[key])
1020
- data = _get_annotation_data(
1021
- keyed_groundtruths=keyed_groundtruths,
1022
- keyed_predictions=keyed_predictions,
1023
- key=key,
1024
- annotation_type=annotation_type,
1025
- )
1026
- ious = compute_iou(data=data, annotation_type=annotation_type)
1027
- mask_nonzero_iou = (ious > 1e-9).reshape(
1028
- (n_predictions, n_groundtruths)
1029
- )
1030
- mask_ious_halluc = ~(mask_nonzero_iou.any(axis=1))
1031
- mask_ious_misprd = ~(mask_nonzero_iou.any(axis=0))
841
+ n_predictions = len(predictions)
842
+ n_groundtruths = len(groundtruths)
1032
843
 
1033
- pairs.extend(
1034
- [
1035
- np.array(
1036
- [
1037
- float(uid_index),
1038
- float(gidx),
1039
- float(pidx),
1040
- ious[pidx * len(keyed_groundtruths[key]) + gidx],
1041
- float(glabel),
1042
- float(plabel),
1043
- float(score),
1044
- ]
1045
- )
1046
- for pidx, plabel, score, _ in keyed_predictions[key]
1047
- for gidx, glabel, _ in keyed_groundtruths[key]
1048
- if ious[pidx * len(keyed_groundtruths[key]) + gidx] > 1e-9
1049
- ]
1050
- )
1051
- pairs.extend(
1052
- [
1053
- np.array(
1054
- [
1055
- float(uid_index),
1056
- -1.0,
1057
- float(pidx),
1058
- 0.0,
1059
- -1.0,
1060
- float(plabel),
1061
- float(score),
1062
- ]
1063
- )
1064
- for pidx, plabel, score, _ in keyed_predictions[key]
1065
- if mask_ious_halluc[pidx]
1066
- ]
1067
- )
1068
- pairs.extend(
1069
- [
1070
- np.array(
1071
- [
1072
- float(uid_index),
1073
- float(gidx),
1074
- -1.0,
1075
- 0.0,
1076
- float(glabel),
1077
- -1.0,
1078
- -1.0,
1079
- ]
1080
- )
1081
- for gidx, glabel, _ in keyed_groundtruths[key]
1082
- if mask_ious_misprd[gidx]
1083
- ]
1084
- )
1085
- for key in gt_unique_keys:
1086
- pairs.extend(
1087
- [
1088
- np.array(
1089
- [
1090
- float(uid_index),
1091
- float(gidx),
1092
- -1.0,
1093
- 0.0,
1094
- float(glabel),
1095
- -1.0,
1096
- -1.0,
1097
- ]
1098
- )
1099
- for gidx, glabel, _ in keyed_groundtruths[key]
1100
- ]
1101
- )
1102
- for key in pd_unique_keys:
1103
- pairs.extend(
1104
- [
1105
- np.array(
1106
- [
1107
- float(uid_index),
1108
- -1.0,
1109
- float(pidx),
1110
- 0.0,
1111
- -1.0,
1112
- float(plabel),
1113
- float(score),
1114
- ]
1115
- )
1116
- for pidx, plabel, score, _ in keyed_predictions[key]
1117
- ]
1118
- )
844
+ all_pairs = np.array(
845
+ [
846
+ np.array([gann, pann])
847
+ for _, _, _, pann in predictions
848
+ for _, _, gann in groundtruths
849
+ ]
850
+ )
851
+
852
+ match annotation_type:
853
+ case annotation.BoundingBox:
854
+ ious = compute_bbox_iou(all_pairs)
855
+ case annotation.Polygon:
856
+ ious = compute_polygon_iou(all_pairs)
857
+ case annotation.Bitmask:
858
+ ious = compute_bitmask_iou(all_pairs)
859
+ case _:
860
+ raise ValueError(
861
+ f"Invalid annotation type `{annotation_type}`."
862
+ )
863
+
864
+ ious = ious.reshape(n_predictions, n_groundtruths)
865
+ predictions_with_iou_of_zero = np.where((ious < 1e-9).all(axis=1))[0]
866
+ groundtruths_with_iou_of_zero = np.where((ious < 1e-9).all(axis=0))[0]
867
+
868
+ pairs.extend(
869
+ [
870
+ np.array(
871
+ [
872
+ float(uid_index),
873
+ float(gidx),
874
+ float(pidx),
875
+ ious[pidx, gidx],
876
+ float(glabel),
877
+ float(plabel),
878
+ float(score),
879
+ ]
880
+ )
881
+ for pidx, plabel, score, _ in predictions
882
+ for gidx, glabel, _ in groundtruths
883
+ if ious[pidx, gidx] >= 1e-9
884
+ ]
885
+ )
886
+ pairs.extend(
887
+ [
888
+ np.array(
889
+ [
890
+ float(uid_index),
891
+ -1.0,
892
+ float(predictions[index][0]),
893
+ 0.0,
894
+ -1.0,
895
+ float(predictions[index][1]),
896
+ float(predictions[index][2]),
897
+ ]
898
+ )
899
+ for index in predictions_with_iou_of_zero
900
+ ]
901
+ )
902
+ pairs.extend(
903
+ [
904
+ np.array(
905
+ [
906
+ float(uid_index),
907
+ float(groundtruths[index][0]),
908
+ -1.0,
909
+ 0.0,
910
+ float(groundtruths[index][1]),
911
+ -1.0,
912
+ -1.0,
913
+ ]
914
+ )
915
+ for index in groundtruths_with_iou_of_zero
916
+ ]
917
+ )
1119
918
 
1120
919
  self.pairs.append(np.array(pairs))
1121
920
 
@@ -1157,12 +956,8 @@ class DataLoader:
1157
956
  )
1158
957
 
1159
958
  # cache labels and annotations
1160
- keyed_groundtruths = defaultdict(list)
1161
- keyed_predictions = defaultdict(list)
1162
-
1163
- representation_property = _get_annotation_representation(
1164
- annotation_type=annotation_type
1165
- )
959
+ groundtruths = list()
960
+ predictions = list()
1166
961
 
1167
962
  for gidx, gann in enumerate(detection.groundtruths):
1168
963
  if not isinstance(gann, annotation_type):
@@ -1170,26 +965,17 @@ class DataLoader:
1170
965
  f"Expected {annotation_type}, but annotation is of type {type(gann)}."
1171
966
  )
1172
967
 
1173
- if isinstance(gann, BoundingBox):
1174
- self._evaluator.groundtruth_examples[uid_index][
1175
- gidx
1176
- ] = getattr(gann, representation_property)
1177
- else:
1178
- converted_box = gann.to_box()
1179
- self._evaluator.groundtruth_examples[uid_index][gidx] = (
1180
- getattr(converted_box, "extrema")
1181
- if converted_box is not None
1182
- else None
1183
- )
968
+ self._evaluator.groundtruth_examples[uid_index][
969
+ gidx
970
+ ] = gann.extrema
1184
971
  for glabel in gann.labels:
1185
- label_idx, label_key_idx = self._add_label(glabel)
972
+ label_idx = self._add_label(glabel)
1186
973
  self.groundtruth_count[label_idx][uid_index] += 1
1187
- representation = getattr(gann, representation_property)
1188
- keyed_groundtruths[label_key_idx].append(
974
+ groundtruths.append(
1189
975
  (
1190
976
  gidx,
1191
977
  label_idx,
1192
- representation,
978
+ gann.annotation,
1193
979
  )
1194
980
  )
1195
981
 
@@ -1199,36 +985,25 @@ class DataLoader:
1199
985
  f"Expected {annotation_type}, but annotation is of type {type(pann)}."
1200
986
  )
1201
987
 
1202
- if isinstance(pann, BoundingBox):
1203
- self._evaluator.prediction_examples[uid_index][
1204
- pidx
1205
- ] = getattr(pann, representation_property)
1206
- else:
1207
- converted_box = pann.to_box()
1208
- self._evaluator.prediction_examples[uid_index][pidx] = (
1209
- getattr(converted_box, "extrema")
1210
- if converted_box is not None
1211
- else None
1212
- )
988
+ self._evaluator.prediction_examples[uid_index][
989
+ pidx
990
+ ] = pann.extrema
1213
991
  for plabel, pscore in zip(pann.labels, pann.scores):
1214
- label_idx, label_key_idx = self._add_label(plabel)
992
+ label_idx = self._add_label(plabel)
1215
993
  self.prediction_count[label_idx][uid_index] += 1
1216
- representation = representation = getattr(
1217
- pann, representation_property
1218
- )
1219
- keyed_predictions[label_key_idx].append(
994
+ predictions.append(
1220
995
  (
1221
996
  pidx,
1222
997
  label_idx,
1223
998
  pscore,
1224
- representation,
999
+ pann.annotation,
1225
1000
  )
1226
1001
  )
1227
1002
 
1228
1003
  self._compute_ious_and_cache_pairs(
1229
1004
  uid_index=uid_index,
1230
- keyed_groundtruths=keyed_groundtruths,
1231
- keyed_predictions=keyed_predictions,
1005
+ groundtruths=groundtruths,
1006
+ predictions=predictions,
1232
1007
  annotation_type=annotation_type,
1233
1008
  )
1234
1009
 
@@ -1295,10 +1070,9 @@ class DataLoader:
1295
1070
  annotation_type=Bitmask,
1296
1071
  )
1297
1072
 
1298
- def _add_data_from_valor_dict(
1073
+ def add_bounding_boxes_from_valor_dict(
1299
1074
  self,
1300
1075
  detections: list[tuple[dict, dict]],
1301
- annotation_type: type[Bitmask] | type[BoundingBox] | type[Polygon],
1302
1076
  show_progress: bool = False,
1303
1077
  ):
1304
1078
  """
@@ -1313,9 +1087,29 @@ class DataLoader:
1313
1087
  show_progress : bool, default=False
1314
1088
  Toggle for tqdm progress bar.
1315
1089
  """
1090
+ warnings.warn(
1091
+ "The `...from_valor_dict` functions are deprecated and will be deleted in the near future. Use `add_bounding_boxes`, `add_bitmasks`, or `add_polygons` instead.",
1092
+ DeprecationWarning,
1093
+ )
1094
+
1095
+ def _get_bbox_extrema(
1096
+ data: list,
1097
+ ) -> tuple[float, float, float, float]:
1098
+ """Get the correct representation of an annotation object from a valor dictionary."""
1099
+ x = [point[0] for shape in data for point in shape]
1100
+ y = [point[1] for shape in data for point in shape]
1101
+ return (min(x), max(x), min(y), max(y))
1316
1102
 
1317
1103
  disable_tqdm = not show_progress
1318
1104
  for groundtruth, prediction in tqdm(detections, disable=disable_tqdm):
1105
+
1106
+ if not isinstance(groundtruth, dict) or not isinstance(
1107
+ prediction, dict
1108
+ ):
1109
+ raise ValueError(
1110
+ f"Received values with type `{type(groundtruth)}` which are not valid Valor dictionaries."
1111
+ )
1112
+
1319
1113
  # update metadata
1320
1114
  self._evaluator.n_datums += 1
1321
1115
  self._evaluator.n_groundtruths += len(groundtruth["annotations"])
@@ -1333,112 +1127,63 @@ class DataLoader:
1333
1127
  )
1334
1128
 
1335
1129
  # cache labels and annotations
1336
- keyed_groundtruths = defaultdict(list)
1337
- keyed_predictions = defaultdict(list)
1338
-
1339
- annotation_key = _get_valor_dict_annotation_key(
1340
- annotation_type=annotation_type
1341
- )
1342
- invalid_keys = list(
1343
- filter(
1344
- lambda x: x != annotation_key,
1345
- ["bounding_box", "raster", "polygon"],
1346
- )
1347
- )
1130
+ groundtruths = list()
1131
+ predictions = list()
1348
1132
 
1349
1133
  for gidx, gann in enumerate(groundtruth["annotations"]):
1350
- if (gann[annotation_key] is None) or any(
1351
- [gann[k] is not None for k in invalid_keys]
1352
- ):
1134
+ if gann["bounding_box"] is None:
1353
1135
  raise ValueError(
1354
- f"Input JSON doesn't contain {annotation_type} data, or contains data for multiple annotation types."
1355
- )
1356
- if annotation_type == BoundingBox:
1357
- self._evaluator.groundtruth_examples[uid_index][
1358
- gidx
1359
- ] = np.array(
1360
- _get_annotation_representation_from_valor_dict(
1361
- gann[annotation_key],
1362
- annotation_type=annotation_type,
1363
- ),
1136
+ f"Detection `{groundtruth['datum']['uid']}` contains a ground truth without a bounding box."
1364
1137
  )
1365
-
1138
+ self._evaluator.groundtruth_examples[uid_index][
1139
+ gidx
1140
+ ] = np.array(
1141
+ _get_bbox_extrema(gann["bounding_box"]),
1142
+ )
1366
1143
  for valor_label in gann["labels"]:
1367
- glabel = (valor_label["key"], valor_label["value"])
1368
- label_idx, label_key_idx = self._add_label(glabel)
1144
+ if valor_label["key"] != "name":
1145
+ continue
1146
+ glabel = f'{valor_label["key" ]}_{valor_label[ "value" ]}'
1147
+ label_idx = self._add_label(glabel)
1369
1148
  self.groundtruth_count[label_idx][uid_index] += 1
1370
- keyed_groundtruths[label_key_idx].append(
1149
+ groundtruths.append(
1371
1150
  (
1372
1151
  gidx,
1373
1152
  label_idx,
1374
- _get_annotation_representation_from_valor_dict(
1375
- gann[annotation_key],
1376
- annotation_type=annotation_type,
1377
- ),
1153
+ _get_bbox_extrema(gann["bounding_box"]),
1378
1154
  )
1379
1155
  )
1380
1156
  for pidx, pann in enumerate(prediction["annotations"]):
1381
- if (pann[annotation_key] is None) or any(
1382
- [pann[k] is not None for k in invalid_keys]
1383
- ):
1157
+ if pann["bounding_box"] is None:
1384
1158
  raise ValueError(
1385
- f"Input JSON doesn't contain {annotation_type} data, or contains data for multiple annotation types."
1386
- )
1387
-
1388
- if annotation_type == BoundingBox:
1389
- self._evaluator.prediction_examples[uid_index][
1390
- pidx
1391
- ] = np.array(
1392
- _get_annotation_representation_from_valor_dict(
1393
- pann[annotation_key],
1394
- annotation_type=annotation_type,
1395
- )
1159
+ f"Detection `{prediction['datum']['uid']}` contains a prediction without a bounding box."
1396
1160
  )
1161
+ self._evaluator.prediction_examples[uid_index][
1162
+ pidx
1163
+ ] = np.array(_get_bbox_extrema(pann["bounding_box"]))
1397
1164
  for valor_label in pann["labels"]:
1398
- plabel = (valor_label["key"], valor_label["value"])
1165
+ if valor_label["key"] != "name":
1166
+ continue
1167
+ plabel = valor_label["value"]
1399
1168
  pscore = valor_label["score"]
1400
- label_idx, label_key_idx = self._add_label(plabel)
1169
+ label_idx = self._add_label(plabel)
1401
1170
  self.prediction_count[label_idx][uid_index] += 1
1402
- keyed_predictions[label_key_idx].append(
1171
+ predictions.append(
1403
1172
  (
1404
1173
  pidx,
1405
1174
  label_idx,
1406
1175
  pscore,
1407
- _get_annotation_representation_from_valor_dict(
1408
- pann[annotation_key],
1409
- annotation_type=annotation_type,
1410
- ),
1176
+ _get_bbox_extrema(pann["bounding_box"]),
1411
1177
  )
1412
1178
  )
1413
1179
 
1414
1180
  self._compute_ious_and_cache_pairs(
1415
1181
  uid_index=uid_index,
1416
- keyed_groundtruths=keyed_groundtruths,
1417
- keyed_predictions=keyed_predictions,
1418
- annotation_type=annotation_type,
1182
+ groundtruths=groundtruths,
1183
+ predictions=predictions,
1184
+ annotation_type=BoundingBox,
1419
1185
  )
1420
1186
 
1421
- def add_bounding_boxes_from_valor_dict(
1422
- self,
1423
- detections: list[tuple[dict, dict]],
1424
- show_progress: bool = False,
1425
- ):
1426
- """
1427
- Adds Valor-format bounding box detections to the cache.
1428
-
1429
- Parameters
1430
- ----------
1431
- detections : list[tuple[dict, dict]]
1432
- A list of groundtruth, prediction pairs in Valor-format dictionaries.
1433
- show_progress : bool, default=False
1434
- Toggle for tqdm progress bar.
1435
- """
1436
- return self._add_data_from_valor_dict(
1437
- detections=detections,
1438
- show_progress=show_progress,
1439
- annotation_type=BoundingBox,
1440
- )
1441
-
1442
1187
  def finalize(self) -> Evaluator:
1443
1188
  """
1444
1189
  Performs data finalization and some preprocessing steps.
@@ -1494,11 +1239,6 @@ class DataLoader:
1494
1239
  ]
1495
1240
  )
1496
1241
  ),
1497
- float(
1498
- self._evaluator.label_index_to_label_key_index[
1499
- label_idx
1500
- ]
1501
- ),
1502
1242
  ]
1503
1243
  for label_idx in range(n_labels)
1504
1244
  ]