valor-lite 0.33.6__py3-none-any.whl → 0.33.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
+ import warnings
1
2
  from collections import defaultdict
2
3
  from dataclasses import dataclass
4
+ from typing import Type
3
5
 
4
6
  import numpy as np
7
+ import valor_lite.detection.annotation as annotation
5
8
  from numpy.typing import NDArray
6
- from shapely.geometry import Polygon as ShapelyPolygon
7
9
  from tqdm import tqdm
8
10
  from valor_lite.detection.annotation import (
9
11
  Bitmask,
@@ -59,103 +61,6 @@ filtered_metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_mask=filter_m
59
61
  """
60
62
 
61
63
 
62
- def _get_valor_dict_annotation_key(
63
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
64
- ) -> str:
65
- """Get the correct JSON key to extract a given annotation type."""
66
-
67
- if issubclass(annotation_type, BoundingBox):
68
- return "bounding_box"
69
- if issubclass(annotation_type, Polygon):
70
- return "polygon"
71
- else:
72
- return "raster"
73
-
74
-
75
- def _get_annotation_representation(
76
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
77
- ) -> str:
78
- """Get the correct representation of an annotation object."""
79
-
80
- representation = (
81
- "extrema"
82
- if issubclass(annotation_type, BoundingBox)
83
- else ("mask" if issubclass(annotation_type, Bitmask) else "shape")
84
- )
85
-
86
- return representation
87
-
88
-
89
- def _get_annotation_representation_from_valor_dict(
90
- data: list,
91
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
92
- ) -> tuple[float, float, float, float] | ShapelyPolygon | NDArray[np.bool_]:
93
- """Get the correct representation of an annotation object from a valor dictionary."""
94
-
95
- if issubclass(annotation_type, BoundingBox):
96
- x = [point[0] for shape in data for point in shape]
97
- y = [point[1] for shape in data for point in shape]
98
- return (min(x), max(x), min(y), max(y))
99
- if issubclass(annotation_type, Polygon):
100
- return ShapelyPolygon(data)
101
- else:
102
- return np.array(data)
103
-
104
-
105
- def _get_annotation_data(
106
- keyed_groundtruths: dict,
107
- keyed_predictions: dict,
108
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask] | None,
109
- key=int,
110
- ) -> np.ndarray:
111
- """Create an array of annotation pairs for use when calculating IOU. Needed because we unpack bounding box representations, but not bitmask or polygon representations."""
112
- if annotation_type == BoundingBox:
113
- return np.array(
114
- [
115
- np.array([*gextrema, *pextrema])
116
- for _, _, _, pextrema in keyed_predictions[key]
117
- for _, _, gextrema in keyed_groundtruths[key]
118
- ]
119
- )
120
- else:
121
- return np.array(
122
- [
123
- np.array([groundtruth_obj, prediction_obj])
124
- for _, _, _, prediction_obj in keyed_predictions[key]
125
- for _, _, groundtruth_obj in keyed_groundtruths[key]
126
- ]
127
- )
128
-
129
-
130
- def compute_iou(
131
- data: NDArray[np.floating],
132
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
133
- ) -> NDArray[np.floating]:
134
- """
135
- Computes intersection-over-union (IoU) calculations for various annotation types.
136
-
137
- Parameters
138
- ----------
139
- data : NDArray[np.floating]
140
- A sorted array of bounding box, bitmask, or polygon pairs.
141
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask]
142
- The type of annotation contained in the data.
143
-
144
-
145
- Returns
146
- -------
147
- NDArray[np.floating]
148
- Computed IoU's.
149
- """
150
-
151
- if annotation_type == BoundingBox:
152
- return compute_bbox_iou(data=data)
153
- elif annotation_type == Bitmask:
154
- return compute_bitmask_iou(data=data)
155
- else:
156
- return compute_polygon_iou(data=data)
157
-
158
-
159
64
  @dataclass
160
65
  class Filter:
161
66
  ranked_indices: NDArray[np.int32]
@@ -185,22 +90,17 @@ class Evaluator:
185
90
  self.prediction_examples: dict[int, NDArray[np.float16]] = dict()
186
91
 
187
92
  # label reference
188
- self.label_to_index: dict[tuple[str, str], int] = dict()
189
- self.index_to_label: dict[int, tuple[str, str]] = dict()
190
-
191
- # label key reference
192
- self.index_to_label_key: dict[int, str] = dict()
193
- self.label_key_to_index: dict[str, int] = dict()
194
- self.label_index_to_label_key_index: dict[int, int] = dict()
93
+ self.label_to_index: dict[str, int] = dict()
94
+ self.index_to_label: dict[int, str] = dict()
195
95
 
196
96
  # computation caches
197
- self._detailed_pairs: NDArray[np.floating] = np.array([])
198
- self._ranked_pairs: NDArray[np.floating] = np.array([])
97
+ self._detailed_pairs: NDArray[np.float64] = np.array([])
98
+ self._ranked_pairs: NDArray[np.float64] = np.array([])
199
99
  self._label_metadata: NDArray[np.int32] = np.array([])
200
100
  self._label_metadata_per_datum: NDArray[np.int32] = np.array([])
201
101
 
202
102
  @property
203
- def ignored_prediction_labels(self) -> list[tuple[str, str]]:
103
+ def ignored_prediction_labels(self) -> list[str]:
204
104
  """
205
105
  Prediction labels that are not present in the ground truth set.
206
106
  """
@@ -211,7 +111,7 @@ class Evaluator:
211
111
  ]
212
112
 
213
113
  @property
214
- def missing_prediction_labels(self) -> list[tuple[str, str]]:
114
+ def missing_prediction_labels(self) -> list[str]:
215
115
  """
216
116
  Ground truth labels that are not present in the prediction set.
217
117
  """
@@ -238,8 +138,7 @@ class Evaluator:
238
138
  def create_filter(
239
139
  self,
240
140
  datum_uids: list[str] | NDArray[np.int32] | None = None,
241
- labels: list[tuple[str, str]] | NDArray[np.int32] | None = None,
242
- label_keys: list[str] | NDArray[np.int32] | None = None,
141
+ labels: list[str] | NDArray[np.int32] | None = None,
243
142
  ) -> Filter:
244
143
  """
245
144
  Creates a filter that can be passed to an evaluation.
@@ -248,10 +147,8 @@ class Evaluator:
248
147
  ----------
249
148
  datum_uids : list[str] | NDArray[np.int32], optional
250
149
  An optional list of string uids or a numpy array of uid indices.
251
- labels : list[tuple[str, str]] | NDArray[np.int32], optional
150
+ labels : list[str] | NDArray[np.int32], optional
252
151
  An optional list of labels or a numpy array of label indices.
253
- label_keys : list[str] | NDArray[np.int32], optional
254
- An optional list of label keys or a numpy array of label key indices.
255
152
 
256
153
  Returns
257
154
  -------
@@ -296,24 +193,6 @@ class Evaluator:
296
193
  ] = False
297
194
  mask_labels[~np.isin(np.arange(n_labels), labels)] = False
298
195
 
299
- if label_keys is not None:
300
- if isinstance(label_keys, list):
301
- label_keys = np.array(
302
- [self.label_key_to_index[key] for key in label_keys]
303
- )
304
- label_indices = (
305
- np.where(np.isclose(self._label_metadata[:, 2], label_keys))[0]
306
- if label_keys.size > 0
307
- else np.array([])
308
- )
309
- mask_ranked[
310
- ~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
311
- ] = False
312
- mask_detailed[
313
- ~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
314
- ] = False
315
- mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
316
-
317
196
  mask_label_metadata = (
318
197
  mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
319
198
  )
@@ -321,13 +200,12 @@ class Evaluator:
321
200
  label_metadata_per_datum[:, ~mask_label_metadata] = 0
322
201
 
323
202
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
324
- label_metadata[:, :2] = np.transpose(
203
+ label_metadata = np.transpose(
325
204
  np.sum(
326
205
  label_metadata_per_datum,
327
206
  axis=1,
328
207
  )
329
208
  )
330
- label_metadata[:, 2] = self._label_metadata[:, 2]
331
209
 
332
210
  return Filter(
333
211
  ranked_indices=np.where(mask_ranked)[0],
@@ -342,6 +220,7 @@ class Evaluator:
342
220
  score_thresholds: list[float] = [0.5],
343
221
  number_of_examples: int = 0,
344
222
  filter_: Filter | None = None,
223
+ as_dict: bool = False,
345
224
  ) -> dict[MetricType, list]:
346
225
  """
347
226
  Performs an evaluation and returns metrics.
@@ -358,6 +237,8 @@ class Evaluator:
358
237
  Maximum number of annotation examples to return in ConfusionMatrix.
359
238
  filter_ : Filter, optional
360
239
  An optional filter object.
240
+ as_dict : bool, default=False
241
+ An option to return metrics as dictionaries.
361
242
 
362
243
  Returns
363
244
  -------
@@ -410,12 +291,10 @@ class Evaluator:
410
291
 
411
292
  metrics[MetricType.mAP] = [
412
293
  mAP(
413
- value=mean_average_precision[iou_idx][label_key_idx],
294
+ value=mean_average_precision[iou_idx],
414
295
  iou_threshold=iou_thresholds[iou_idx],
415
- label_key=self.index_to_label_key[label_key_idx],
416
296
  )
417
297
  for iou_idx in range(mean_average_precision.shape[0])
418
- for label_key_idx in range(mean_average_precision.shape[1])
419
298
  ]
420
299
 
421
300
  metrics[MetricType.APAveragedOverIOUs] = [
@@ -430,12 +309,8 @@ class Evaluator:
430
309
 
431
310
  metrics[MetricType.mAPAveragedOverIOUs] = [
432
311
  mAPAveragedOverIOUs(
433
- value=mean_average_precision_average_over_ious[label_key_idx],
312
+ value=mean_average_precision_average_over_ious,
434
313
  iou_thresholds=iou_thresholds,
435
- label_key=self.index_to_label_key[label_key_idx],
436
- )
437
- for label_key_idx in range(
438
- mean_average_precision_average_over_ious.shape[0]
439
314
  )
440
315
  ]
441
316
 
@@ -453,13 +328,11 @@ class Evaluator:
453
328
 
454
329
  metrics[MetricType.mAR] = [
455
330
  mAR(
456
- value=mean_average_recall[score_idx][label_key_idx],
331
+ value=mean_average_recall[score_idx],
457
332
  iou_thresholds=iou_thresholds,
458
333
  score_threshold=score_thresholds[score_idx],
459
- label_key=self.index_to_label_key[label_key_idx],
460
334
  )
461
335
  for score_idx in range(mean_average_recall.shape[0])
462
- for label_key_idx in range(mean_average_recall.shape[1])
463
336
  ]
464
337
 
465
338
  metrics[MetricType.ARAveragedOverScores] = [
@@ -475,13 +348,9 @@ class Evaluator:
475
348
 
476
349
  metrics[MetricType.mARAveragedOverScores] = [
477
350
  mARAveragedOverScores(
478
- value=mean_average_recall_averaged_over_scores[label_key_idx],
351
+ value=mean_average_recall_averaged_over_scores,
479
352
  score_thresholds=score_thresholds,
480
353
  iou_thresholds=iou_thresholds,
481
- label_key=self.index_to_label_key[label_key_idx],
482
- )
483
- for label_key_idx in range(
484
- mean_average_recall_averaged_over_scores.shape[0]
485
354
  )
486
355
  ]
487
356
 
@@ -559,12 +428,30 @@ class Evaluator:
559
428
  if metric not in metrics_to_return:
560
429
  del metrics[metric]
561
430
 
431
+ if as_dict:
432
+ return {
433
+ mtype: [metric.to_dict() for metric in mvalues]
434
+ for mtype, mvalues in metrics.items()
435
+ }
436
+
562
437
  return metrics
563
438
 
439
+ def _convert_example_to_dict(
440
+ self, box: NDArray[np.float16]
441
+ ) -> dict[str, float]:
442
+ """
443
+ Converts a cached bounding box example to dictionary format.
444
+ """
445
+ return {
446
+ "xmin": box[0],
447
+ "xmax": box[1],
448
+ "ymin": box[2],
449
+ "ymax": box[3],
450
+ }
451
+
564
452
  def _unpack_confusion_matrix(
565
453
  self,
566
- confusion_matrix: NDArray[np.floating],
567
- label_key_idx: int,
454
+ confusion_matrix: NDArray[np.float64],
568
455
  number_of_labels: int,
569
456
  number_of_examples: int,
570
457
  ) -> dict[
@@ -577,7 +464,7 @@ class Evaluator:
577
464
  | list[
578
465
  dict[
579
466
  str,
580
- str | float | tuple[float, float, float, float],
467
+ str | float | dict[str, float],
581
468
  ]
582
469
  ],
583
470
  ],
@@ -620,8 +507,8 @@ class Evaluator:
620
507
  )
621
508
 
622
509
  return {
623
- self.index_to_label[gt_label_idx][1]: {
624
- self.index_to_label[pd_label_idx][1]: {
510
+ self.index_to_label[gt_label_idx]: {
511
+ self.index_to_label[pd_label_idx]: {
625
512
  "count": max(
626
513
  int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
627
514
  0,
@@ -633,7 +520,7 @@ class Evaluator:
633
520
  gt_label_idx, pd_label_idx, example_idx
634
521
  )
635
522
  ],
636
- "groundtruth": tuple(
523
+ "groundtruth": self._convert_example_to_dict(
637
524
  self.groundtruth_examples[
638
525
  datum_idx(
639
526
  gt_label_idx,
@@ -646,9 +533,9 @@ class Evaluator:
646
533
  pd_label_idx,
647
534
  example_idx,
648
535
  )
649
- ].tolist()
536
+ ]
650
537
  ),
651
- "prediction": tuple(
538
+ "prediction": self._convert_example_to_dict(
652
539
  self.prediction_examples[
653
540
  datum_idx(
654
541
  gt_label_idx,
@@ -661,7 +548,7 @@ class Evaluator:
661
548
  pd_label_idx,
662
549
  example_idx,
663
550
  )
664
- ].tolist()
551
+ ]
665
552
  ),
666
553
  "score": score_idx(
667
554
  gt_label_idx, pd_label_idx, example_idx
@@ -673,30 +560,20 @@ class Evaluator:
673
560
  ],
674
561
  }
675
562
  for pd_label_idx in range(number_of_labels)
676
- if (
677
- self.label_index_to_label_key_index[pd_label_idx]
678
- == label_key_idx
679
- )
680
563
  }
681
564
  for gt_label_idx in range(number_of_labels)
682
- if (
683
- self.label_index_to_label_key_index[gt_label_idx]
684
- == label_key_idx
685
- )
686
565
  }
687
566
 
688
567
  def _unpack_hallucinations(
689
568
  self,
690
- hallucinations: NDArray[np.floating],
691
- label_key_idx: int,
569
+ hallucinations: NDArray[np.float64],
692
570
  number_of_labels: int,
693
571
  number_of_examples: int,
694
572
  ) -> dict[
695
573
  str,
696
574
  dict[
697
575
  str,
698
- int
699
- | list[dict[str, str | float | tuple[float, float, float, float]]],
576
+ int | list[dict[str, str | float | dict[str, float]]],
700
577
  ],
701
578
  ]:
702
579
  """
@@ -731,7 +608,7 @@ class Evaluator:
731
608
  )
732
609
 
733
610
  return {
734
- self.index_to_label[pd_label_idx][1]: {
611
+ self.index_to_label[pd_label_idx]: {
735
612
  "count": max(
736
613
  int(hallucinations[pd_label_idx, 0]),
737
614
  0,
@@ -741,12 +618,10 @@ class Evaluator:
741
618
  "datum": self.index_to_uid[
742
619
  datum_idx(pd_label_idx, example_idx)
743
620
  ],
744
- "prediction": tuple(
621
+ "prediction": self._convert_example_to_dict(
745
622
  self.prediction_examples[
746
623
  datum_idx(pd_label_idx, example_idx)
747
- ][
748
- prediction_idx(pd_label_idx, example_idx)
749
- ].tolist()
624
+ ][prediction_idx(pd_label_idx, example_idx)]
750
625
  ),
751
626
  "score": score_idx(pd_label_idx, example_idx),
752
627
  }
@@ -755,25 +630,14 @@ class Evaluator:
755
630
  ],
756
631
  }
757
632
  for pd_label_idx in range(number_of_labels)
758
- if (
759
- self.label_index_to_label_key_index[pd_label_idx]
760
- == label_key_idx
761
- )
762
633
  }
763
634
 
764
635
  def _unpack_missing_predictions(
765
636
  self,
766
637
  missing_predictions: NDArray[np.int32],
767
- label_key_idx: int,
768
638
  number_of_labels: int,
769
639
  number_of_examples: int,
770
- ) -> dict[
771
- str,
772
- dict[
773
- str,
774
- int | list[dict[str, str | tuple[float, float, float, float]]],
775
- ],
776
- ]:
640
+ ) -> dict[str, dict[str, int | list[dict[str, str | dict[str, float]]]]]:
777
641
  """
778
642
  Unpacks a numpy array of missing prediction counts and examples.
779
643
  """
@@ -797,7 +661,7 @@ class Evaluator:
797
661
  )
798
662
 
799
663
  return {
800
- self.index_to_label[gt_label_idx][1]: {
664
+ self.index_to_label[gt_label_idx]: {
801
665
  "count": max(
802
666
  int(missing_predictions[gt_label_idx, 0]),
803
667
  0,
@@ -807,12 +671,10 @@ class Evaluator:
807
671
  "datum": self.index_to_uid[
808
672
  datum_idx(gt_label_idx, example_idx)
809
673
  ],
810
- "groundtruth": tuple(
674
+ "groundtruth": self._convert_example_to_dict(
811
675
  self.groundtruth_examples[
812
676
  datum_idx(gt_label_idx, example_idx)
813
- ][
814
- groundtruth_idx(gt_label_idx, example_idx)
815
- ].tolist()
677
+ ][groundtruth_idx(gt_label_idx, example_idx)]
816
678
  ),
817
679
  }
818
680
  for example_idx in range(number_of_examples)
@@ -820,15 +682,11 @@ class Evaluator:
820
682
  ],
821
683
  }
822
684
  for gt_label_idx in range(number_of_labels)
823
- if (
824
- self.label_index_to_label_key_index[gt_label_idx]
825
- == label_key_idx
826
- )
827
685
  }
828
686
 
829
687
  def _compute_confusion_matrix(
830
688
  self,
831
- data: NDArray[np.floating],
689
+ data: NDArray[np.float64],
832
690
  label_metadata: NDArray[np.int32],
833
691
  iou_thresholds: list[float],
834
692
  score_thresholds: list[float],
@@ -839,7 +697,7 @@ class Evaluator:
839
697
 
840
698
  Parameters
841
699
  ----------
842
- data : NDArray[np.floating]
700
+ data : NDArray[np.float64]
843
701
  An array containing detailed pairs of detections.
844
702
  label_metadata : NDArray[np.int32]
845
703
  An array containing label metadata.
@@ -876,19 +734,16 @@ class Evaluator:
876
734
  ConfusionMatrix(
877
735
  iou_threshold=iou_thresholds[iou_idx],
878
736
  score_threshold=score_thresholds[score_idx],
879
- label_key=label_key,
880
737
  number_of_examples=number_of_examples,
881
738
  confusion_matrix=self._unpack_confusion_matrix(
882
739
  confusion_matrix=confusion_matrix[
883
740
  iou_idx, score_idx, :, :, :
884
741
  ],
885
- label_key_idx=label_key_idx,
886
742
  number_of_labels=n_labels,
887
743
  number_of_examples=number_of_examples,
888
744
  ),
889
745
  hallucinations=self._unpack_hallucinations(
890
746
  hallucinations=hallucinations[iou_idx, score_idx, :, :],
891
- label_key_idx=label_key_idx,
892
747
  number_of_labels=n_labels,
893
748
  number_of_examples=number_of_examples,
894
749
  ),
@@ -896,12 +751,10 @@ class Evaluator:
896
751
  missing_predictions=missing_predictions[
897
752
  iou_idx, score_idx, :, :
898
753
  ],
899
- label_key_idx=label_key_idx,
900
754
  number_of_labels=n_labels,
901
755
  number_of_examples=number_of_examples,
902
756
  ),
903
757
  )
904
- for label_key_idx, label_key in self.index_to_label_key.items()
905
758
  for iou_idx in range(n_ious)
906
759
  for score_idx in range(n_scores)
907
760
  ]
@@ -914,7 +767,7 @@ class DataLoader:
914
767
 
915
768
  def __init__(self):
916
769
  self._evaluator = Evaluator()
917
- self.pairs: list[NDArray[np.floating]] = list()
770
+ self.pairs: list[NDArray[np.float64]] = list()
918
771
  self.groundtruth_count = defaultdict(lambda: defaultdict(int))
919
772
  self.prediction_count = defaultdict(lambda: defaultdict(int))
920
773
 
@@ -938,51 +791,36 @@ class DataLoader:
938
791
  self._evaluator.index_to_uid[index] = uid
939
792
  return self._evaluator.uid_to_index[uid]
940
793
 
941
- def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
794
+ def _add_label(self, label: str) -> int:
942
795
  """
943
796
  Helper function for adding a label to the cache.
944
797
 
945
798
  Parameters
946
799
  ----------
947
- label : tuple[str, str]
948
- The label as a tuple in format (key, value).
800
+ label : str
801
+ The label associated with the annotation.
949
802
 
950
803
  Returns
951
804
  -------
952
805
  int
953
806
  Label index.
954
- int
955
- Label key index.
956
807
  """
957
808
 
958
809
  label_id = len(self._evaluator.index_to_label)
959
- label_key_id = len(self._evaluator.index_to_label_key)
960
810
  if label not in self._evaluator.label_to_index:
961
811
  self._evaluator.label_to_index[label] = label_id
962
812
  self._evaluator.index_to_label[label_id] = label
963
813
 
964
- # update label key index
965
- if label[0] not in self._evaluator.label_key_to_index:
966
- self._evaluator.label_key_to_index[label[0]] = label_key_id
967
- self._evaluator.index_to_label_key[label_key_id] = label[0]
968
- label_key_id += 1
969
-
970
- self._evaluator.label_index_to_label_key_index[
971
- label_id
972
- ] = self._evaluator.label_key_to_index[label[0]]
973
814
  label_id += 1
974
815
 
975
- return (
976
- self._evaluator.label_to_index[label],
977
- self._evaluator.label_key_to_index[label[0]],
978
- )
816
+ return self._evaluator.label_to_index[label]
979
817
 
980
818
  def _compute_ious_and_cache_pairs(
981
819
  self,
982
820
  uid_index: int,
983
- keyed_groundtruths: dict,
984
- keyed_predictions: dict,
985
- annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask],
821
+ groundtruths: list,
822
+ predictions: list,
823
+ annotation_type: Type[BoundingBox] | Type[Polygon] | Type[Bitmask],
986
824
  ) -> None:
987
825
  """
988
826
  Compute IOUs between groundtruths and preditions before storing as pairs.
@@ -991,122 +829,92 @@ class DataLoader:
991
829
  ----------
992
830
  uid_index: int
993
831
  The index of the detection.
994
- keyed_groundtruths: dict
995
- A dictionary of groundtruths.
996
- keyed_predictions: dict
997
- A dictionary of predictions.
832
+ groundtruths: list
833
+ A list of groundtruths.
834
+ predictions: list
835
+ A list of predictions.
998
836
  annotation_type: type[BoundingBox] | type[Polygon] | type[Bitmask]
999
837
  The type of annotation to compute IOUs for.
1000
838
  """
1001
- gt_keys = set(keyed_groundtruths.keys())
1002
- pd_keys = set(keyed_predictions.keys())
1003
- joint_keys = gt_keys.intersection(pd_keys)
1004
- gt_unique_keys = gt_keys - pd_keys
1005
- pd_unique_keys = pd_keys - gt_keys
1006
839
 
1007
840
  pairs = list()
1008
- for key in joint_keys:
1009
- n_predictions = len(keyed_predictions[key])
1010
- n_groundtruths = len(keyed_groundtruths[key])
1011
- data = _get_annotation_data(
1012
- keyed_groundtruths=keyed_groundtruths,
1013
- keyed_predictions=keyed_predictions,
1014
- key=key,
1015
- annotation_type=annotation_type,
1016
- )
1017
- ious = compute_iou(data=data, annotation_type=annotation_type)
1018
- mask_nonzero_iou = (ious > 1e-9).reshape(
1019
- (n_predictions, n_groundtruths)
1020
- )
1021
- mask_ious_halluc = ~(mask_nonzero_iou.any(axis=1))
1022
- mask_ious_misprd = ~(mask_nonzero_iou.any(axis=0))
841
+ n_predictions = len(predictions)
842
+ n_groundtruths = len(groundtruths)
1023
843
 
1024
- pairs.extend(
1025
- [
1026
- np.array(
1027
- [
1028
- float(uid_index),
1029
- float(gidx),
1030
- float(pidx),
1031
- ious[pidx * len(keyed_groundtruths[key]) + gidx],
1032
- float(glabel),
1033
- float(plabel),
1034
- float(score),
1035
- ]
1036
- )
1037
- for pidx, plabel, score, _ in keyed_predictions[key]
1038
- for gidx, glabel, _ in keyed_groundtruths[key]
1039
- if ious[pidx * len(keyed_groundtruths[key]) + gidx] > 1e-9
1040
- ]
1041
- )
1042
- pairs.extend(
1043
- [
1044
- np.array(
1045
- [
1046
- float(uid_index),
1047
- -1.0,
1048
- float(pidx),
1049
- 0.0,
1050
- -1.0,
1051
- float(plabel),
1052
- float(score),
1053
- ]
1054
- )
1055
- for pidx, plabel, score, _ in keyed_predictions[key]
1056
- if mask_ious_halluc[pidx]
1057
- ]
1058
- )
1059
- pairs.extend(
1060
- [
1061
- np.array(
1062
- [
1063
- float(uid_index),
1064
- float(gidx),
1065
- -1.0,
1066
- 0.0,
1067
- float(glabel),
1068
- -1.0,
1069
- -1.0,
1070
- ]
1071
- )
1072
- for gidx, glabel, _ in keyed_groundtruths[key]
1073
- if mask_ious_misprd[gidx]
1074
- ]
1075
- )
1076
- for key in gt_unique_keys:
1077
- pairs.extend(
1078
- [
1079
- np.array(
1080
- [
1081
- float(uid_index),
1082
- float(gidx),
1083
- -1.0,
1084
- 0.0,
1085
- float(glabel),
1086
- -1.0,
1087
- -1.0,
1088
- ]
1089
- )
1090
- for gidx, glabel, _ in keyed_groundtruths[key]
1091
- ]
1092
- )
1093
- for key in pd_unique_keys:
1094
- pairs.extend(
1095
- [
1096
- np.array(
1097
- [
1098
- float(uid_index),
1099
- -1.0,
1100
- float(pidx),
1101
- 0.0,
1102
- -1.0,
1103
- float(plabel),
1104
- float(score),
1105
- ]
1106
- )
1107
- for pidx, plabel, score, _ in keyed_predictions[key]
1108
- ]
1109
- )
844
+ all_pairs = np.array(
845
+ [
846
+ np.array([gann, pann])
847
+ for _, _, _, pann in predictions
848
+ for _, _, gann in groundtruths
849
+ ]
850
+ )
851
+
852
+ match annotation_type:
853
+ case annotation.BoundingBox:
854
+ ious = compute_bbox_iou(all_pairs)
855
+ case annotation.Polygon:
856
+ ious = compute_polygon_iou(all_pairs)
857
+ case annotation.Bitmask:
858
+ ious = compute_bitmask_iou(all_pairs)
859
+ case _:
860
+ raise ValueError(
861
+ f"Invalid annotation type `{annotation_type}`."
862
+ )
863
+
864
+ ious = ious.reshape(n_predictions, n_groundtruths)
865
+ predictions_with_iou_of_zero = np.where((ious < 1e-9).all(axis=1))[0]
866
+ groundtruths_with_iou_of_zero = np.where((ious < 1e-9).all(axis=0))[0]
867
+
868
+ pairs.extend(
869
+ [
870
+ np.array(
871
+ [
872
+ float(uid_index),
873
+ float(gidx),
874
+ float(pidx),
875
+ ious[pidx, gidx],
876
+ float(glabel),
877
+ float(plabel),
878
+ float(score),
879
+ ]
880
+ )
881
+ for pidx, plabel, score, _ in predictions
882
+ for gidx, glabel, _ in groundtruths
883
+ if ious[pidx, gidx] >= 1e-9
884
+ ]
885
+ )
886
+ pairs.extend(
887
+ [
888
+ np.array(
889
+ [
890
+ float(uid_index),
891
+ -1.0,
892
+ float(predictions[index][0]),
893
+ 0.0,
894
+ -1.0,
895
+ float(predictions[index][1]),
896
+ float(predictions[index][2]),
897
+ ]
898
+ )
899
+ for index in predictions_with_iou_of_zero
900
+ ]
901
+ )
902
+ pairs.extend(
903
+ [
904
+ np.array(
905
+ [
906
+ float(uid_index),
907
+ float(groundtruths[index][0]),
908
+ -1.0,
909
+ 0.0,
910
+ float(groundtruths[index][1]),
911
+ -1.0,
912
+ -1.0,
913
+ ]
914
+ )
915
+ for index in groundtruths_with_iou_of_zero
916
+ ]
917
+ )
1110
918
 
1111
919
  self.pairs.append(np.array(pairs))
1112
920
 
@@ -1148,12 +956,8 @@ class DataLoader:
1148
956
  )
1149
957
 
1150
958
  # cache labels and annotations
1151
- keyed_groundtruths = defaultdict(list)
1152
- keyed_predictions = defaultdict(list)
1153
-
1154
- representation_property = _get_annotation_representation(
1155
- annotation_type=annotation_type
1156
- )
959
+ groundtruths = list()
960
+ predictions = list()
1157
961
 
1158
962
  for gidx, gann in enumerate(detection.groundtruths):
1159
963
  if not isinstance(gann, annotation_type):
@@ -1161,26 +965,17 @@ class DataLoader:
1161
965
  f"Expected {annotation_type}, but annotation is of type {type(gann)}."
1162
966
  )
1163
967
 
1164
- if isinstance(gann, BoundingBox):
1165
- self._evaluator.groundtruth_examples[uid_index][
1166
- gidx
1167
- ] = getattr(gann, representation_property)
1168
- else:
1169
- converted_box = gann.to_box()
1170
- self._evaluator.groundtruth_examples[uid_index][gidx] = (
1171
- getattr(converted_box, "extrema")
1172
- if converted_box is not None
1173
- else None
1174
- )
968
+ self._evaluator.groundtruth_examples[uid_index][
969
+ gidx
970
+ ] = gann.extrema
1175
971
  for glabel in gann.labels:
1176
- label_idx, label_key_idx = self._add_label(glabel)
972
+ label_idx = self._add_label(glabel)
1177
973
  self.groundtruth_count[label_idx][uid_index] += 1
1178
- representation = getattr(gann, representation_property)
1179
- keyed_groundtruths[label_key_idx].append(
974
+ groundtruths.append(
1180
975
  (
1181
976
  gidx,
1182
977
  label_idx,
1183
- representation,
978
+ gann.annotation,
1184
979
  )
1185
980
  )
1186
981
 
@@ -1190,36 +985,25 @@ class DataLoader:
1190
985
  f"Expected {annotation_type}, but annotation is of type {type(pann)}."
1191
986
  )
1192
987
 
1193
- if isinstance(pann, BoundingBox):
1194
- self._evaluator.prediction_examples[uid_index][
1195
- pidx
1196
- ] = getattr(pann, representation_property)
1197
- else:
1198
- converted_box = pann.to_box()
1199
- self._evaluator.prediction_examples[uid_index][pidx] = (
1200
- getattr(converted_box, "extrema")
1201
- if converted_box is not None
1202
- else None
1203
- )
988
+ self._evaluator.prediction_examples[uid_index][
989
+ pidx
990
+ ] = pann.extrema
1204
991
  for plabel, pscore in zip(pann.labels, pann.scores):
1205
- label_idx, label_key_idx = self._add_label(plabel)
992
+ label_idx = self._add_label(plabel)
1206
993
  self.prediction_count[label_idx][uid_index] += 1
1207
- representation = representation = getattr(
1208
- pann, representation_property
1209
- )
1210
- keyed_predictions[label_key_idx].append(
994
+ predictions.append(
1211
995
  (
1212
996
  pidx,
1213
997
  label_idx,
1214
998
  pscore,
1215
- representation,
999
+ pann.annotation,
1216
1000
  )
1217
1001
  )
1218
1002
 
1219
1003
  self._compute_ious_and_cache_pairs(
1220
1004
  uid_index=uid_index,
1221
- keyed_groundtruths=keyed_groundtruths,
1222
- keyed_predictions=keyed_predictions,
1005
+ groundtruths=groundtruths,
1006
+ predictions=predictions,
1223
1007
  annotation_type=annotation_type,
1224
1008
  )
1225
1009
 
@@ -1286,10 +1070,9 @@ class DataLoader:
1286
1070
  annotation_type=Bitmask,
1287
1071
  )
1288
1072
 
1289
- def _add_data_from_valor_dict(
1073
+ def add_bounding_boxes_from_valor_dict(
1290
1074
  self,
1291
1075
  detections: list[tuple[dict, dict]],
1292
- annotation_type: type[Bitmask] | type[BoundingBox] | type[Polygon],
1293
1076
  show_progress: bool = False,
1294
1077
  ):
1295
1078
  """
@@ -1304,9 +1087,29 @@ class DataLoader:
1304
1087
  show_progress : bool, default=False
1305
1088
  Toggle for tqdm progress bar.
1306
1089
  """
1090
+ warnings.warn(
1091
+ "The `...from_valor_dict` functions are deprecated and will be deleted in the near future. Use `add_bounding_boxes`, `add_bitmasks`, or `add_polygons` instead.",
1092
+ DeprecationWarning,
1093
+ )
1094
+
1095
+ def _get_bbox_extrema(
1096
+ data: list,
1097
+ ) -> tuple[float, float, float, float]:
1098
+ """Get the correct representation of an annotation object from a valor dictionary."""
1099
+ x = [point[0] for shape in data for point in shape]
1100
+ y = [point[1] for shape in data for point in shape]
1101
+ return (min(x), max(x), min(y), max(y))
1307
1102
 
1308
1103
  disable_tqdm = not show_progress
1309
1104
  for groundtruth, prediction in tqdm(detections, disable=disable_tqdm):
1105
+
1106
+ if not isinstance(groundtruth, dict) or not isinstance(
1107
+ prediction, dict
1108
+ ):
1109
+ raise ValueError(
1110
+ f"Received values with type `{type(groundtruth)}` which are not valid Valor dictionaries."
1111
+ )
1112
+
1310
1113
  # update metadata
1311
1114
  self._evaluator.n_datums += 1
1312
1115
  self._evaluator.n_groundtruths += len(groundtruth["annotations"])
@@ -1324,112 +1127,63 @@ class DataLoader:
1324
1127
  )
1325
1128
 
1326
1129
  # cache labels and annotations
1327
- keyed_groundtruths = defaultdict(list)
1328
- keyed_predictions = defaultdict(list)
1329
-
1330
- annotation_key = _get_valor_dict_annotation_key(
1331
- annotation_type=annotation_type
1332
- )
1333
- invalid_keys = list(
1334
- filter(
1335
- lambda x: x != annotation_key,
1336
- ["bounding_box", "raster", "polygon"],
1337
- )
1338
- )
1130
+ groundtruths = list()
1131
+ predictions = list()
1339
1132
 
1340
1133
  for gidx, gann in enumerate(groundtruth["annotations"]):
1341
- if (gann[annotation_key] is None) or any(
1342
- [gann[k] is not None for k in invalid_keys]
1343
- ):
1134
+ if gann["bounding_box"] is None:
1344
1135
  raise ValueError(
1345
- f"Input JSON doesn't contain {annotation_type} data, or contains data for multiple annotation types."
1346
- )
1347
- if annotation_type == BoundingBox:
1348
- self._evaluator.groundtruth_examples[uid_index][
1349
- gidx
1350
- ] = np.array(
1351
- _get_annotation_representation_from_valor_dict(
1352
- gann[annotation_key],
1353
- annotation_type=annotation_type,
1354
- ),
1136
+ f"Detection `{groundtruth['datum']['uid']}` contains a ground truth without a bounding box."
1355
1137
  )
1356
-
1138
+ self._evaluator.groundtruth_examples[uid_index][
1139
+ gidx
1140
+ ] = np.array(
1141
+ _get_bbox_extrema(gann["bounding_box"]),
1142
+ )
1357
1143
  for valor_label in gann["labels"]:
1358
- glabel = (valor_label["key"], valor_label["value"])
1359
- label_idx, label_key_idx = self._add_label(glabel)
1144
+ if valor_label["key"] != "name":
1145
+ continue
1146
+ glabel = f'{valor_label["key" ]}_{valor_label[ "value" ]}'
1147
+ label_idx = self._add_label(glabel)
1360
1148
  self.groundtruth_count[label_idx][uid_index] += 1
1361
- keyed_groundtruths[label_key_idx].append(
1149
+ groundtruths.append(
1362
1150
  (
1363
1151
  gidx,
1364
1152
  label_idx,
1365
- _get_annotation_representation_from_valor_dict(
1366
- gann[annotation_key],
1367
- annotation_type=annotation_type,
1368
- ),
1153
+ _get_bbox_extrema(gann["bounding_box"]),
1369
1154
  )
1370
1155
  )
1371
1156
  for pidx, pann in enumerate(prediction["annotations"]):
1372
- if (pann[annotation_key] is None) or any(
1373
- [pann[k] is not None for k in invalid_keys]
1374
- ):
1157
+ if pann["bounding_box"] is None:
1375
1158
  raise ValueError(
1376
- f"Input JSON doesn't contain {annotation_type} data, or contains data for multiple annotation types."
1377
- )
1378
-
1379
- if annotation_type == BoundingBox:
1380
- self._evaluator.prediction_examples[uid_index][
1381
- pidx
1382
- ] = np.array(
1383
- _get_annotation_representation_from_valor_dict(
1384
- pann[annotation_key],
1385
- annotation_type=annotation_type,
1386
- )
1159
+ f"Detection `{prediction['datum']['uid']}` contains a prediction without a bounding box."
1387
1160
  )
1161
+ self._evaluator.prediction_examples[uid_index][
1162
+ pidx
1163
+ ] = np.array(_get_bbox_extrema(pann["bounding_box"]))
1388
1164
  for valor_label in pann["labels"]:
1389
- plabel = (valor_label["key"], valor_label["value"])
1165
+ if valor_label["key"] != "name":
1166
+ continue
1167
+ plabel = valor_label["value"]
1390
1168
  pscore = valor_label["score"]
1391
- label_idx, label_key_idx = self._add_label(plabel)
1169
+ label_idx = self._add_label(plabel)
1392
1170
  self.prediction_count[label_idx][uid_index] += 1
1393
- keyed_predictions[label_key_idx].append(
1171
+ predictions.append(
1394
1172
  (
1395
1173
  pidx,
1396
1174
  label_idx,
1397
1175
  pscore,
1398
- _get_annotation_representation_from_valor_dict(
1399
- pann[annotation_key],
1400
- annotation_type=annotation_type,
1401
- ),
1176
+ _get_bbox_extrema(pann["bounding_box"]),
1402
1177
  )
1403
1178
  )
1404
1179
 
1405
1180
  self._compute_ious_and_cache_pairs(
1406
1181
  uid_index=uid_index,
1407
- keyed_groundtruths=keyed_groundtruths,
1408
- keyed_predictions=keyed_predictions,
1409
- annotation_type=annotation_type,
1182
+ groundtruths=groundtruths,
1183
+ predictions=predictions,
1184
+ annotation_type=BoundingBox,
1410
1185
  )
1411
1186
 
1412
- def add_bounding_boxes_from_valor_dict(
1413
- self,
1414
- detections: list[tuple[dict, dict]],
1415
- show_progress: bool = False,
1416
- ):
1417
- """
1418
- Adds Valor-format bounding box detections to the cache.
1419
-
1420
- Parameters
1421
- ----------
1422
- detections : list[tuple[dict, dict]]
1423
- A list of groundtruth, prediction pairs in Valor-format dictionaries.
1424
- show_progress : bool, default=False
1425
- Toggle for tqdm progress bar.
1426
- """
1427
- return self._add_data_from_valor_dict(
1428
- detections=detections,
1429
- show_progress=show_progress,
1430
- annotation_type=BoundingBox,
1431
- )
1432
-
1433
1187
  def finalize(self) -> Evaluator:
1434
1188
  """
1435
1189
  Performs data finalization and some preprocessing steps.
@@ -1485,11 +1239,6 @@ class DataLoader:
1485
1239
  ]
1486
1240
  )
1487
1241
  ),
1488
- float(
1489
- self._evaluator.label_index_to_label_key_index[
1490
- label_idx
1491
- ]
1492
- ),
1493
1242
  ]
1494
1243
  for label_idx in range(n_labels)
1495
1244
  ]