valor-lite 0.33.4__py3-none-any.whl → 0.33.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -14,7 +14,7 @@ from valor_lite.detection.annotation import (
14
14
  from valor_lite.detection.computation import (
15
15
  compute_bbox_iou,
16
16
  compute_bitmask_iou,
17
- compute_detailed_counts,
17
+ compute_confusion_matrix,
18
18
  compute_metrics,
19
19
  compute_polygon_iou,
20
20
  compute_ranked_pairs,
@@ -26,8 +26,8 @@ from valor_lite.detection.metric import (
26
26
  Accuracy,
27
27
  APAveragedOverIOUs,
28
28
  ARAveragedOverScores,
29
+ ConfusionMatrix,
29
30
  Counts,
30
- DetailedCounts,
31
31
  MetricType,
32
32
  Precision,
33
33
  PrecisionRecallCurve,
@@ -158,7 +158,8 @@ def compute_iou(
158
158
 
159
159
  @dataclass
160
160
  class Filter:
161
- indices: NDArray[np.int32]
161
+ ranked_indices: NDArray[np.int32]
162
+ detailed_indices: NDArray[np.int32]
162
163
  label_metadata: NDArray[np.int32]
163
164
 
164
165
 
@@ -257,12 +258,14 @@ class Evaluator:
257
258
  Filter
258
259
  A filter object that can be passed to the `evaluate` method.
259
260
  """
260
- n_rows = self._ranked_pairs.shape[0]
261
261
 
262
262
  n_datums = self._label_metadata_per_datum.shape[1]
263
263
  n_labels = self._label_metadata_per_datum.shape[2]
264
264
 
265
- mask_pairs = np.ones((n_rows, 1), dtype=np.bool_)
265
+ mask_ranked = np.ones((self._ranked_pairs.shape[0], 1), dtype=np.bool_)
266
+ mask_detailed = np.ones(
267
+ (self._detailed_pairs.shape[0], 1), dtype=np.bool_
268
+ )
266
269
  mask_datums = np.ones(n_datums, dtype=np.bool_)
267
270
  mask_labels = np.ones(n_labels, dtype=np.bool_)
268
271
 
@@ -272,9 +275,12 @@ class Evaluator:
272
275
  [self.uid_to_index[uid] for uid in datum_uids],
273
276
  dtype=np.int32,
274
277
  )
275
- mask_pairs[
278
+ mask_ranked[
276
279
  ~np.isin(self._ranked_pairs[:, 0].astype(int), datum_uids)
277
280
  ] = False
281
+ mask_detailed[
282
+ ~np.isin(self._detailed_pairs[:, 0].astype(int), datum_uids)
283
+ ] = False
278
284
  mask_datums[~np.isin(np.arange(n_datums), datum_uids)] = False
279
285
 
280
286
  if labels is not None:
@@ -282,9 +288,12 @@ class Evaluator:
282
288
  labels = np.array(
283
289
  [self.label_to_index[label] for label in labels]
284
290
  )
285
- mask_pairs[
291
+ mask_ranked[
286
292
  ~np.isin(self._ranked_pairs[:, 4].astype(int), labels)
287
293
  ] = False
294
+ mask_detailed[
295
+ ~np.isin(self._detailed_pairs[:, 4].astype(int), labels)
296
+ ] = False
288
297
  mask_labels[~np.isin(np.arange(n_labels), labels)] = False
289
298
 
290
299
  if label_keys is not None:
@@ -297,14 +306,19 @@ class Evaluator:
297
306
  if label_keys.size > 0
298
307
  else np.array([])
299
308
  )
300
- mask_pairs[
309
+ mask_ranked[
301
310
  ~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
302
311
  ] = False
312
+ mask_detailed[
313
+ ~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
314
+ ] = False
303
315
  mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
304
316
 
305
- mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
317
+ mask_label_metadata = (
318
+ mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
319
+ )
306
320
  label_metadata_per_datum = self._label_metadata_per_datum.copy()
307
- label_metadata_per_datum[:, ~mask] = 0
321
+ label_metadata_per_datum[:, ~mask_label_metadata] = 0
308
322
 
309
323
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
310
324
  label_metadata[:, :2] = np.transpose(
@@ -316,7 +330,8 @@ class Evaluator:
316
330
  label_metadata[:, 2] = self._label_metadata[:, 2]
317
331
 
318
332
  return Filter(
319
- indices=np.where(mask_pairs)[0],
333
+ ranked_indices=np.where(mask_ranked)[0],
334
+ detailed_indices=np.where(mask_detailed)[0],
320
335
  label_metadata=label_metadata,
321
336
  )
322
337
 
@@ -340,7 +355,7 @@ class Evaluator:
340
355
  score_thresholds : list[float]
341
356
  A list of score thresholds to compute metrics over.
342
357
  number_of_examples : int, default=0
343
- Number of annotation examples to return in DetailedCounts.
358
+ Maximum number of annotation examples to return in ConfusionMatrix.
344
359
  filter_ : Filter, optional
345
360
  An optional filter object.
346
361
 
@@ -350,10 +365,12 @@ class Evaluator:
350
365
  A dictionary mapping MetricType enumerations to lists of computed metrics.
351
366
  """
352
367
 
353
- data = self._ranked_pairs
368
+ ranked_pairs = self._ranked_pairs
369
+ detailed_pairs = self._detailed_pairs
354
370
  label_metadata = self._label_metadata
355
371
  if filter_ is not None:
356
- data = data[filter_.indices]
372
+ ranked_pairs = ranked_pairs[filter_.ranked_indices]
373
+ detailed_pairs = detailed_pairs[filter_.detailed_indices]
357
374
  label_metadata = filter_.label_metadata
358
375
 
359
376
  (
@@ -372,7 +389,7 @@ class Evaluator:
372
389
  precision_recall,
373
390
  pr_curves,
374
391
  ) = compute_metrics(
375
- data=data,
392
+ data=ranked_pairs,
376
393
  label_metadata=label_metadata,
377
394
  iou_thresholds=np.array(iou_thresholds),
378
395
  score_thresholds=np.array(score_thresholds),
@@ -527,11 +544,15 @@ class Evaluator:
527
544
  )
528
545
  )
529
546
 
530
- if MetricType.DetailedCounts in metrics_to_return:
531
- metrics[MetricType.DetailedCounts] = self._compute_detailed_counts(
547
+ if MetricType.ConfusionMatrix in metrics_to_return:
548
+ metrics[
549
+ MetricType.ConfusionMatrix
550
+ ] = self._compute_confusion_matrix(
551
+ data=detailed_pairs,
552
+ label_metadata=label_metadata,
532
553
  iou_thresholds=iou_thresholds,
533
554
  score_thresholds=score_thresholds,
534
- n_samples=number_of_examples,
555
+ number_of_examples=number_of_examples,
535
556
  )
536
557
 
537
558
  for metric in set(metrics.keys()):
@@ -540,149 +561,349 @@ class Evaluator:
540
561
 
541
562
  return metrics
542
563
 
543
- def _compute_detailed_counts(
564
+ def _unpack_confusion_matrix(
565
+ self,
566
+ confusion_matrix: NDArray[np.floating],
567
+ label_key_idx: int,
568
+ number_of_labels: int,
569
+ number_of_examples: int,
570
+ ) -> dict[
571
+ str,
572
+ dict[
573
+ str,
574
+ dict[
575
+ str,
576
+ int
577
+ | list[
578
+ dict[
579
+ str,
580
+ str | float | tuple[float, float, float, float],
581
+ ]
582
+ ],
583
+ ],
584
+ ],
585
+ ]:
586
+ """
587
+ Unpacks a numpy array of confusion matrix counts and examples.
588
+ """
589
+
590
+ datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
591
+ confusion_matrix[
592
+ gt_label_idx,
593
+ pd_label_idx,
594
+ example_idx * 4 + 1,
595
+ ]
596
+ )
597
+
598
+ groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
599
+ confusion_matrix[
600
+ gt_label_idx,
601
+ pd_label_idx,
602
+ example_idx * 4 + 2,
603
+ ]
604
+ )
605
+
606
+ prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
607
+ confusion_matrix[
608
+ gt_label_idx,
609
+ pd_label_idx,
610
+ example_idx * 4 + 3,
611
+ ]
612
+ )
613
+
614
+ score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
615
+ confusion_matrix[
616
+ gt_label_idx,
617
+ pd_label_idx,
618
+ example_idx * 4 + 4,
619
+ ]
620
+ )
621
+
622
+ return {
623
+ self.index_to_label[gt_label_idx][1]: {
624
+ self.index_to_label[pd_label_idx][1]: {
625
+ "count": max(
626
+ int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
627
+ 0,
628
+ ),
629
+ "examples": [
630
+ {
631
+ "datum": self.index_to_uid[
632
+ datum_idx(
633
+ gt_label_idx, pd_label_idx, example_idx
634
+ )
635
+ ],
636
+ "groundtruth": tuple(
637
+ self.groundtruth_examples[
638
+ datum_idx(
639
+ gt_label_idx,
640
+ pd_label_idx,
641
+ example_idx,
642
+ )
643
+ ][
644
+ groundtruth_idx(
645
+ gt_label_idx,
646
+ pd_label_idx,
647
+ example_idx,
648
+ )
649
+ ].tolist()
650
+ ),
651
+ "prediction": tuple(
652
+ self.prediction_examples[
653
+ datum_idx(
654
+ gt_label_idx,
655
+ pd_label_idx,
656
+ example_idx,
657
+ )
658
+ ][
659
+ prediction_idx(
660
+ gt_label_idx,
661
+ pd_label_idx,
662
+ example_idx,
663
+ )
664
+ ].tolist()
665
+ ),
666
+ "score": score_idx(
667
+ gt_label_idx, pd_label_idx, example_idx
668
+ ),
669
+ }
670
+ for example_idx in range(number_of_examples)
671
+ if datum_idx(gt_label_idx, pd_label_idx, example_idx)
672
+ >= 0
673
+ ],
674
+ }
675
+ for pd_label_idx in range(number_of_labels)
676
+ if (
677
+ self.label_index_to_label_key_index[pd_label_idx]
678
+ == label_key_idx
679
+ )
680
+ }
681
+ for gt_label_idx in range(number_of_labels)
682
+ if (
683
+ self.label_index_to_label_key_index[gt_label_idx]
684
+ == label_key_idx
685
+ )
686
+ }
687
+
688
+ def _unpack_hallucinations(
689
+ self,
690
+ hallucinations: NDArray[np.floating],
691
+ label_key_idx: int,
692
+ number_of_labels: int,
693
+ number_of_examples: int,
694
+ ) -> dict[
695
+ str,
696
+ dict[
697
+ str,
698
+ int
699
+ | list[dict[str, str | float | tuple[float, float, float, float]]],
700
+ ],
701
+ ]:
702
+ """
703
+ Unpacks a numpy array of hallucination counts and examples.
704
+ """
705
+
706
+ datum_idx = (
707
+ lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
708
+ hallucinations[
709
+ pd_label_idx,
710
+ example_idx * 3 + 1,
711
+ ]
712
+ )
713
+ )
714
+
715
+ prediction_idx = (
716
+ lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
717
+ hallucinations[
718
+ pd_label_idx,
719
+ example_idx * 3 + 2,
720
+ ]
721
+ )
722
+ )
723
+
724
+ score_idx = (
725
+ lambda pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
726
+ hallucinations[
727
+ pd_label_idx,
728
+ example_idx * 3 + 3,
729
+ ]
730
+ )
731
+ )
732
+
733
+ return {
734
+ self.index_to_label[pd_label_idx][1]: {
735
+ "count": max(
736
+ int(hallucinations[pd_label_idx, 0]),
737
+ 0,
738
+ ),
739
+ "examples": [
740
+ {
741
+ "datum": self.index_to_uid[
742
+ datum_idx(pd_label_idx, example_idx)
743
+ ],
744
+ "prediction": tuple(
745
+ self.prediction_examples[
746
+ datum_idx(pd_label_idx, example_idx)
747
+ ][
748
+ prediction_idx(pd_label_idx, example_idx)
749
+ ].tolist()
750
+ ),
751
+ "score": score_idx(pd_label_idx, example_idx),
752
+ }
753
+ for example_idx in range(number_of_examples)
754
+ if datum_idx(pd_label_idx, example_idx) >= 0
755
+ ],
756
+ }
757
+ for pd_label_idx in range(number_of_labels)
758
+ if (
759
+ self.label_index_to_label_key_index[pd_label_idx]
760
+ == label_key_idx
761
+ )
762
+ }
763
+
764
+ def _unpack_missing_predictions(
544
765
  self,
545
- iou_thresholds: list[float] = [0.5],
546
- score_thresholds: list[float] = [
547
- score / 10.0 for score in range(1, 11)
766
+ missing_predictions: NDArray[np.int32],
767
+ label_key_idx: int,
768
+ number_of_labels: int,
769
+ number_of_examples: int,
770
+ ) -> dict[
771
+ str,
772
+ dict[
773
+ str,
774
+ int | list[dict[str, str | tuple[float, float, float, float]]],
548
775
  ],
549
- n_samples: int = 0,
550
- ) -> list[DetailedCounts]:
776
+ ]:
777
+ """
778
+ Unpacks a numpy array of missing prediction counts and examples.
779
+ """
780
+
781
+ datum_idx = (
782
+ lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
783
+ missing_predictions[
784
+ gt_label_idx,
785
+ example_idx * 2 + 1,
786
+ ]
787
+ )
788
+ )
789
+
790
+ groundtruth_idx = (
791
+ lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
792
+ missing_predictions[
793
+ gt_label_idx,
794
+ example_idx * 2 + 2,
795
+ ]
796
+ )
797
+ )
798
+
799
+ return {
800
+ self.index_to_label[gt_label_idx][1]: {
801
+ "count": max(
802
+ int(missing_predictions[gt_label_idx, 0]),
803
+ 0,
804
+ ),
805
+ "examples": [
806
+ {
807
+ "datum": self.index_to_uid[
808
+ datum_idx(gt_label_idx, example_idx)
809
+ ],
810
+ "groundtruth": tuple(
811
+ self.groundtruth_examples[
812
+ datum_idx(gt_label_idx, example_idx)
813
+ ][
814
+ groundtruth_idx(gt_label_idx, example_idx)
815
+ ].tolist()
816
+ ),
817
+ }
818
+ for example_idx in range(number_of_examples)
819
+ if datum_idx(gt_label_idx, example_idx) >= 0
820
+ ],
821
+ }
822
+ for gt_label_idx in range(number_of_labels)
823
+ if (
824
+ self.label_index_to_label_key_index[gt_label_idx]
825
+ == label_key_idx
826
+ )
827
+ }
828
+
829
+ def _compute_confusion_matrix(
830
+ self,
831
+ data: NDArray[np.floating],
832
+ label_metadata: NDArray[np.int32],
833
+ iou_thresholds: list[float],
834
+ score_thresholds: list[float],
835
+ number_of_examples: int,
836
+ ) -> list[ConfusionMatrix]:
551
837
  """
552
838
  Computes detailed counting metrics.
553
839
 
554
840
  Parameters
555
841
  ----------
556
- iou_thresholds : list[float], default=[0.5]
842
+ data : NDArray[np.floating]
843
+ An array containing detailed pairs of detections.
844
+ label_metadata : NDArray[np.int32]
845
+ An array containing label metadata.
846
+ iou_thresholds : list[float]
557
847
  List of IoU thresholds to compute metrics for.
558
- score_thresholds : list[float], default=[0.1,0.2,...,1.0]
848
+ score_thresholds : list[float]
559
849
  List of confidence thresholds to compute metrics for.
560
- n_samples : int, default=0
561
- Number of datum samples to return per metric.
850
+ number_of_examples : int
851
+ Maximum number of annotation examples to return per metric.
562
852
 
563
853
  Returns
564
854
  -------
565
- list[list[DetailedCounts]]
855
+ list[list[ConfusionMatrix]]
566
856
  Outer list is indexed by label, inner list is by IoU.
567
857
  """
568
858
 
569
- if self._detailed_pairs.size == 0:
859
+ if data.size == 0:
570
860
  return list()
571
861
 
572
- metrics = compute_detailed_counts(
573
- data=self._detailed_pairs,
574
- label_metadata=self._label_metadata,
862
+ (
863
+ confusion_matrix,
864
+ hallucinations,
865
+ missing_predictions,
866
+ ) = compute_confusion_matrix(
867
+ data=data,
868
+ label_metadata=label_metadata,
575
869
  iou_thresholds=np.array(iou_thresholds),
576
870
  score_thresholds=np.array(score_thresholds),
577
- n_samples=n_samples,
871
+ n_examples=number_of_examples,
578
872
  )
579
873
 
580
- tp_idx = 0
581
- fp_misclf_idx = 2 * n_samples + 1
582
- fp_halluc_idx = 4 * n_samples + 2
583
- fn_misclf_idx = 6 * n_samples + 3
584
- fn_misprd_idx = 8 * n_samples + 4
585
-
586
- def _unpack_examples(
587
- iou_idx: int,
588
- label_idx: int,
589
- type_idx: int,
590
- example_source: dict[int, NDArray[np.float16]],
591
- ) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
592
- """
593
- Unpacks metric examples from computation.
594
- """
595
- type_idx += 1
596
-
597
- results = list()
598
- for score_idx in range(n_scores):
599
- examples = list()
600
- for example_idx in range(n_samples):
601
- datum_idx = metrics[
602
- iou_idx,
603
- score_idx,
604
- label_idx,
605
- type_idx + example_idx * 2,
606
- ]
607
- annotation_idx = metrics[
608
- iou_idx,
609
- score_idx,
610
- label_idx,
611
- type_idx + example_idx * 2 + 1,
612
- ]
613
- if datum_idx >= 0:
614
- examples.append(
615
- (
616
- self.index_to_uid[datum_idx],
617
- tuple(
618
- example_source[datum_idx][
619
- annotation_idx
620
- ].tolist()
621
- ),
622
- )
623
- )
624
- results.append(examples)
625
-
626
- return results
627
-
628
- n_ious, n_scores, n_labels, _ = metrics.shape
874
+ n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
629
875
  return [
630
- DetailedCounts(
876
+ ConfusionMatrix(
631
877
  iou_threshold=iou_thresholds[iou_idx],
632
- label=self.index_to_label[label_idx],
633
- score_thresholds=score_thresholds,
634
- tp=metrics[iou_idx, :, label_idx, tp_idx].astype(int).tolist(),
635
- fp_misclassification=metrics[
636
- iou_idx, :, label_idx, fp_misclf_idx
637
- ]
638
- .astype(int)
639
- .tolist(),
640
- fp_hallucination=metrics[iou_idx, :, label_idx, fp_halluc_idx]
641
- .astype(int)
642
- .tolist(),
643
- fn_misclassification=metrics[
644
- iou_idx, :, label_idx, fn_misclf_idx
645
- ]
646
- .astype(int)
647
- .tolist(),
648
- fn_missing_prediction=metrics[
649
- iou_idx, :, label_idx, fn_misprd_idx
650
- ]
651
- .astype(int)
652
- .tolist(),
653
- tp_examples=_unpack_examples(
654
- iou_idx=iou_idx,
655
- label_idx=label_idx,
656
- type_idx=tp_idx,
657
- example_source=self.prediction_examples,
658
- ),
659
- fp_misclassification_examples=_unpack_examples(
660
- iou_idx=iou_idx,
661
- label_idx=label_idx,
662
- type_idx=fp_misclf_idx,
663
- example_source=self.prediction_examples,
664
- ),
665
- fp_hallucination_examples=_unpack_examples(
666
- iou_idx=iou_idx,
667
- label_idx=label_idx,
668
- type_idx=fp_halluc_idx,
669
- example_source=self.prediction_examples,
878
+ score_threshold=score_thresholds[score_idx],
879
+ label_key=label_key,
880
+ number_of_examples=number_of_examples,
881
+ confusion_matrix=self._unpack_confusion_matrix(
882
+ confusion_matrix=confusion_matrix[
883
+ iou_idx, score_idx, :, :, :
884
+ ],
885
+ label_key_idx=label_key_idx,
886
+ number_of_labels=n_labels,
887
+ number_of_examples=number_of_examples,
670
888
  ),
671
- fn_misclassification_examples=_unpack_examples(
672
- iou_idx=iou_idx,
673
- label_idx=label_idx,
674
- type_idx=fn_misclf_idx,
675
- example_source=self.groundtruth_examples,
889
+ hallucinations=self._unpack_hallucinations(
890
+ hallucinations=hallucinations[iou_idx, score_idx, :, :],
891
+ label_key_idx=label_key_idx,
892
+ number_of_labels=n_labels,
893
+ number_of_examples=number_of_examples,
676
894
  ),
677
- fn_missing_prediction_examples=_unpack_examples(
678
- iou_idx=iou_idx,
679
- label_idx=label_idx,
680
- type_idx=fn_misprd_idx,
681
- example_source=self.groundtruth_examples,
895
+ missing_predictions=self._unpack_missing_predictions(
896
+ missing_predictions=missing_predictions[
897
+ iou_idx, score_idx, :, :
898
+ ],
899
+ label_key_idx=label_key_idx,
900
+ number_of_labels=n_labels,
901
+ number_of_examples=number_of_examples,
682
902
  ),
683
903
  )
684
- for label_idx in range(n_labels)
904
+ for label_key_idx, label_key in self.index_to_label_key.items()
685
905
  for iou_idx in range(n_ious)
906
+ for score_idx in range(n_scores)
686
907
  ]
687
908
 
688
909