valor-lite 0.33.2__py3-none-any.whl → 0.33.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -36,8 +36,20 @@ class Bitmask:
36
36
  "If scores are defined, there must be a 1:1 pairing with labels."
37
37
  )
38
38
 
39
- def to_box(self) -> BoundingBox:
40
- raise NotImplementedError
39
+ def to_box(self) -> BoundingBox | None:
40
+
41
+ if not self.mask.any():
42
+ return None
43
+
44
+ rows, cols = np.nonzero(self.mask)
45
+ return BoundingBox(
46
+ xmin=cols.min(),
47
+ xmax=cols.max(),
48
+ ymin=rows.min(),
49
+ ymax=rows.max(),
50
+ labels=self.labels,
51
+ scores=self.scores,
52
+ )
41
53
 
42
54
 
43
55
  @dataclass
@@ -70,9 +70,9 @@ def compute_iou(data: NDArray[np.floating]) -> NDArray[np.floating]:
70
70
 
71
71
 
72
72
  def _compute_ranked_pairs_for_datum(
73
- data: np.ndarray,
74
- label_metadata: np.ndarray,
75
- ) -> np.ndarray:
73
+ data: NDArray[np.floating],
74
+ label_metadata: NDArray[np.int32],
75
+ ) -> NDArray[np.floating]:
76
76
  """
77
77
  Computes ranked pairs for a datum.
78
78
  """
@@ -113,7 +113,7 @@ def _compute_ranked_pairs_for_datum(
113
113
 
114
114
  def compute_ranked_pairs(
115
115
  data: list[NDArray[np.floating]],
116
- label_metadata: NDArray[np.integer],
116
+ label_metadata: NDArray[np.int32],
117
117
  ) -> NDArray[np.floating]:
118
118
  """
119
119
  Performs pair ranking on input data.
@@ -142,23 +142,22 @@ def compute_ranked_pairs(
142
142
  NDArray[np.floating]
143
143
  A filtered array containing only ranked pairs.
144
144
  """
145
- pairs = np.concatenate(
146
- [
147
- _compute_ranked_pairs_for_datum(
148
- datum,
149
- label_metadata=label_metadata,
150
- )
151
- for datum in data
152
- ],
153
- axis=0,
154
- )
145
+
146
+ ranked_pairs_by_datum = [
147
+ _compute_ranked_pairs_for_datum(
148
+ data=datum,
149
+ label_metadata=label_metadata,
150
+ )
151
+ for datum in data
152
+ ]
153
+ ranked_pairs = np.concatenate(ranked_pairs_by_datum, axis=0)
155
154
  indices = np.lexsort(
156
155
  (
157
- -pairs[:, 3], # iou
158
- -pairs[:, 6], # score
156
+ -ranked_pairs[:, 3], # iou
157
+ -ranked_pairs[:, 6], # score
159
158
  )
160
159
  )
161
- return pairs[indices]
160
+ return ranked_pairs[indices]
162
161
 
163
162
 
164
163
  def compute_metrics(
@@ -429,12 +428,12 @@ def compute_metrics(
429
428
 
430
429
 
431
430
  def compute_detailed_counts(
432
- data: np.ndarray,
433
- label_metadata: np.ndarray,
434
- iou_thresholds: np.ndarray,
435
- score_thresholds: np.ndarray,
431
+ data: NDArray[np.floating],
432
+ label_metadata: NDArray[np.int32],
433
+ iou_thresholds: NDArray[np.floating],
434
+ score_thresholds: NDArray[np.floating],
436
435
  n_samples: int,
437
- ) -> np.ndarray:
436
+ ) -> NDArray[np.int32]:
438
437
  """
439
438
  Compute detailed counts.
440
439
 
@@ -452,13 +451,13 @@ def compute_detailed_counts(
452
451
 
453
452
  Index 0 - True Positive Count
454
453
  ... Datum ID Examples
455
- Index n_samples + 1 - False Positive Misclassification Count
454
+ Index 2 * n_samples + 1 - False Positive Misclassification Count
456
455
  ... Datum ID Examples
457
- Index 2 * n_samples + 2 - False Positive Hallucination Count
456
+ Index 4 * n_samples + 2 - False Positive Hallucination Count
458
457
  ... Datum ID Examples
459
- Index 3 * n_samples + 3 - False Negative Misclassification Count
458
+ Index 6 * n_samples + 3 - False Negative Misclassification Count
460
459
  ... Datum ID Examples
461
- Index 4 * n_samples + 4 - False Negative Missing Prediction Count
460
+ Index 8 * n_samples + 4 - False Negative Missing Prediction Count
462
461
  ... Datum ID Examples
463
462
 
464
463
  Parameters
@@ -476,22 +475,24 @@ def compute_detailed_counts(
476
475
 
477
476
  Returns
478
477
  -------
479
- NDArray[np.floating]
478
+ NDArray[np.int32]
480
479
  The detailed counts with optional examples.
481
480
  """
482
481
 
483
482
  n_labels = label_metadata.shape[0]
484
483
  n_ious = iou_thresholds.shape[0]
485
484
  n_scores = score_thresholds.shape[0]
486
- n_metrics = 5 * (n_samples + 1)
485
+ n_metrics = 5 * (2 * n_samples + 1)
487
486
 
488
487
  tp_idx = 0
489
- fp_misclf_idx = tp_idx + n_samples + 1
490
- fp_halluc_idx = fp_misclf_idx + n_samples + 1
491
- fn_misclf_idx = fp_halluc_idx + n_samples + 1
492
- fn_misprd_idx = fn_misclf_idx + n_samples + 1
488
+ fp_misclf_idx = 2 * n_samples + 1
489
+ fp_halluc_idx = 4 * n_samples + 2
490
+ fn_misclf_idx = 6 * n_samples + 3
491
+ fn_misprd_idx = 8 * n_samples + 4
493
492
 
494
- detailed_pr_curve = np.ones((n_ious, n_scores, n_labels, n_metrics)) * -1.0
493
+ detailed_pr_curve = -1 * np.ones(
494
+ (n_ious, n_scores, n_labels, n_metrics), dtype=np.int32
495
+ )
495
496
 
496
497
  mask_gt_exists = data[:, 1] > -0.5
497
498
  mask_pd_exists = data[:, 2] > -0.5
@@ -509,13 +510,14 @@ def compute_detailed_counts(
509
510
  mask_iou_threshold = data[:, 3] >= iou_thresholds[iou_idx]
510
511
  mask_iou = mask_iou_nonzero & mask_iou_threshold
511
512
 
512
- groundtruths_with_pairs = np.unique(groundtruths[mask_iou], axis=0)
513
- mask_groundtruths_with_passing_ious = (
514
- groundtruths.reshape(-1, 1, 2)
515
- == groundtruths_with_pairs.reshape(1, -1, 2)
516
- ).all(axis=2)
513
+ groundtruths_passing_ious = np.unique(groundtruths[mask_iou], axis=0)
517
514
  mask_groundtruths_with_passing_ious = (
518
- mask_groundtruths_with_passing_ious.any(axis=1)
515
+ (
516
+ groundtruths.reshape(-1, 1, 2)
517
+ == groundtruths_passing_ious.reshape(1, -1, 2)
518
+ )
519
+ .all(axis=2)
520
+ .any(axis=1)
519
521
  )
520
522
  mask_groundtruths_without_passing_ious = (
521
523
  ~mask_groundtruths_with_passing_ious & mask_gt_exists
@@ -525,11 +527,12 @@ def compute_detailed_counts(
525
527
  predictions[mask_iou], axis=0
526
528
  )
527
529
  mask_predictions_with_passing_ious = (
528
- predictions.reshape(-1, 1, 2)
529
- == predictions_with_passing_ious.reshape(1, -1, 2)
530
- ).all(axis=2)
531
- mask_predictions_with_passing_ious = (
532
- mask_predictions_with_passing_ious.any(axis=1)
530
+ (
531
+ predictions.reshape(-1, 1, 2)
532
+ == predictions_with_passing_ious.reshape(1, -1, 2)
533
+ )
534
+ .all(axis=2)
535
+ .any(axis=1)
533
536
  )
534
537
  mask_predictions_without_passing_ious = (
535
538
  ~mask_predictions_with_passing_ious & mask_pd_exists
@@ -543,11 +546,12 @@ def compute_detailed_counts(
543
546
  groundtruths[mask_iou & mask_score], axis=0
544
547
  )
545
548
  mask_groundtruths_with_passing_score = (
546
- groundtruths.reshape(-1, 1, 2)
547
- == groundtruths_with_passing_score.reshape(1, -1, 2)
548
- ).all(axis=2)
549
- mask_groundtruths_with_passing_score = (
550
- mask_groundtruths_with_passing_score.any(axis=1)
549
+ (
550
+ groundtruths.reshape(-1, 1, 2)
551
+ == groundtruths_with_passing_score.reshape(1, -1, 2)
552
+ )
553
+ .all(axis=2)
554
+ .any(axis=1)
551
555
  )
552
556
  mask_groundtruths_without_passing_score = (
553
557
  ~mask_groundtruths_with_passing_score & mask_gt_exists
@@ -623,21 +627,41 @@ def compute_detailed_counts(
623
627
 
624
628
  if n_samples > 0:
625
629
  for label_idx in range(n_labels):
626
- tp_examples = tp[tp[:, 2].astype(int) == label_idx][
627
- :n_samples, 0
628
- ]
629
- fp_misclf_examples = fp_misclf[
630
- fp_misclf[:, 2].astype(int) == label_idx
631
- ][:n_samples, 0]
632
- fp_halluc_examples = fp_halluc[
633
- fp_halluc[:, 2].astype(int) == label_idx
634
- ][:n_samples, 0]
635
- fn_misclf_examples = fn_misclf[
636
- fn_misclf[:, 2].astype(int) == label_idx
637
- ][:n_samples, 0]
638
- fn_misprd_examples = fn_misprd[
639
- fn_misprd[:, 2].astype(int) == label_idx
640
- ][:n_samples, 0]
630
+ tp_examples = (
631
+ tp[tp[:, 2].astype(int) == label_idx][
632
+ :n_samples, [0, 1]
633
+ ]
634
+ .astype(int)
635
+ .flatten()
636
+ )
637
+ fp_misclf_examples = (
638
+ fp_misclf[fp_misclf[:, 2].astype(int) == label_idx][
639
+ :n_samples, [0, 1]
640
+ ]
641
+ .astype(int)
642
+ .flatten()
643
+ )
644
+ fp_halluc_examples = (
645
+ fp_halluc[fp_halluc[:, 2].astype(int) == label_idx][
646
+ :n_samples, [0, 1]
647
+ ]
648
+ .astype(int)
649
+ .flatten()
650
+ )
651
+ fn_misclf_examples = (
652
+ fn_misclf[fn_misclf[:, 2].astype(int) == label_idx][
653
+ :n_samples, [0, 1]
654
+ ]
655
+ .astype(int)
656
+ .flatten()
657
+ )
658
+ fn_misprd_examples = (
659
+ fn_misprd[fn_misprd[:, 2].astype(int) == label_idx][
660
+ :n_samples, [0, 1]
661
+ ]
662
+ .astype(int)
663
+ .flatten()
664
+ )
641
665
 
642
666
  detailed_pr_curve[
643
667
  iou_idx,
@@ -74,6 +74,10 @@ class Evaluator:
74
74
  self.uid_to_index: dict[str, int] = dict()
75
75
  self.index_to_uid: dict[int, str] = dict()
76
76
 
77
+ # annotation reference
78
+ self.groundtruth_examples: dict[int, NDArray[np.float16]] = dict()
79
+ self.prediction_examples: dict[int, NDArray[np.float16]] = dict()
80
+
77
81
  # label reference
78
82
  self.label_to_index: dict[tuple[str, str], int] = dict()
79
83
  self.index_to_label: dict[int, tuple[str, str]] = dict()
@@ -84,10 +88,10 @@ class Evaluator:
84
88
  self.label_index_to_label_key_index: dict[int, int] = dict()
85
89
 
86
90
  # computation caches
87
- self._detailed_pairs = np.array([])
88
- self._ranked_pairs = np.array([])
89
- self._label_metadata = np.array([])
90
- self._label_metadata_per_datum = np.array([])
91
+ self._detailed_pairs: NDArray[np.floating] = np.array([])
92
+ self._ranked_pairs: NDArray[np.floating] = np.array([])
93
+ self._label_metadata: NDArray[np.int32] = np.array([])
94
+ self._label_metadata_per_datum: NDArray[np.int32] = np.array([])
91
95
 
92
96
  @property
93
97
  def ignored_prediction_labels(self) -> list[tuple[str, str]]:
@@ -224,8 +228,10 @@ class Evaluator:
224
228
 
225
229
  def evaluate(
226
230
  self,
231
+ metrics_to_return: list[MetricType] = MetricType.base_metrics(),
227
232
  iou_thresholds: list[float] = [0.5, 0.75, 0.9],
228
233
  score_thresholds: list[float] = [0.5],
234
+ number_of_examples: int = 0,
229
235
  filter_: Filter | None = None,
230
236
  ) -> dict[MetricType, list]:
231
237
  """
@@ -233,10 +239,14 @@ class Evaluator:
233
239
 
234
240
  Parameters
235
241
  ----------
242
+ metrics_to_return : list[MetricType]
243
+ A list of metrics to return in the results.
236
244
  iou_thresholds : list[float]
237
245
  A list of IoU thresholds to compute metrics over.
238
246
  score_thresholds : list[float]
239
247
  A list of score thresholds to compute metrics over.
248
+ number_of_examples : int, default=0
249
+ Number of annotation examples to return in DetailedCounts.
240
250
  filter_ : Filter, optional
241
251
  An optional filter object.
242
252
 
@@ -422,16 +432,27 @@ class Evaluator:
422
432
  )
423
433
  )
424
434
 
435
+ if MetricType.DetailedCounts in metrics_to_return:
436
+ metrics[MetricType.DetailedCounts] = self._compute_detailed_counts(
437
+ iou_thresholds=iou_thresholds,
438
+ score_thresholds=score_thresholds,
439
+ n_samples=number_of_examples,
440
+ )
441
+
442
+ for metric in set(metrics.keys()):
443
+ if metric not in metrics_to_return:
444
+ del metrics[metric]
445
+
425
446
  return metrics
426
447
 
427
- def compute_detailed_counts(
448
+ def _compute_detailed_counts(
428
449
  self,
429
450
  iou_thresholds: list[float] = [0.5],
430
451
  score_thresholds: list[float] = [
431
452
  score / 10.0 for score in range(1, 11)
432
453
  ],
433
454
  n_samples: int = 0,
434
- ) -> list[list[DetailedCounts]]:
455
+ ) -> list[DetailedCounts]:
435
456
  """
436
457
  Computes detailed counting metrics.
437
458
 
@@ -462,95 +483,111 @@ class Evaluator:
462
483
  )
463
484
 
464
485
  tp_idx = 0
465
- fp_misclf_idx = tp_idx + n_samples + 1
466
- fp_halluc_idx = fp_misclf_idx + n_samples + 1
467
- fn_misclf_idx = fp_halluc_idx + n_samples + 1
468
- fn_misprd_idx = fn_misclf_idx + n_samples + 1
486
+ fp_misclf_idx = 2 * n_samples + 1
487
+ fp_halluc_idx = 4 * n_samples + 2
488
+ fn_misclf_idx = 6 * n_samples + 3
489
+ fn_misprd_idx = 8 * n_samples + 4
490
+
491
+ def _unpack_examples(
492
+ iou_idx: int,
493
+ label_idx: int,
494
+ type_idx: int,
495
+ example_source: dict[int, NDArray[np.float16]],
496
+ ) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
497
+ """
498
+ Unpacks metric examples from computation.
499
+ """
500
+ type_idx += 1
501
+
502
+ results = list()
503
+ for score_idx in range(n_scores):
504
+ examples = list()
505
+ for example_idx in range(n_samples):
506
+ datum_idx = metrics[
507
+ iou_idx,
508
+ score_idx,
509
+ label_idx,
510
+ type_idx + example_idx * 2,
511
+ ]
512
+ annotation_idx = metrics[
513
+ iou_idx,
514
+ score_idx,
515
+ label_idx,
516
+ type_idx + example_idx * 2 + 1,
517
+ ]
518
+ if datum_idx >= 0:
519
+ examples.append(
520
+ (
521
+ self.index_to_uid[datum_idx],
522
+ tuple(
523
+ example_source[datum_idx][
524
+ annotation_idx
525
+ ].tolist()
526
+ ),
527
+ )
528
+ )
529
+ results.append(examples)
530
+
531
+ return results
469
532
 
470
533
  n_ious, n_scores, n_labels, _ = metrics.shape
471
534
  return [
472
- [
473
- DetailedCounts(
474
- iou_threshold=iou_thresholds[iou_idx],
475
- label=self.index_to_label[label_idx],
476
- score_thresholds=score_thresholds,
477
- tp=metrics[iou_idx, :, label_idx, tp_idx]
478
- .astype(int)
479
- .tolist(),
480
- tp_examples=[
481
- [
482
- self.index_to_uid[int(datum_idx)]
483
- for datum_idx in metrics[iou_idx][score_idx][
484
- label_idx
485
- ][tp_idx + 1 : fp_misclf_idx]
486
- if int(datum_idx) >= 0
487
- ]
488
- for score_idx in range(n_scores)
489
- ],
490
- fp_misclassification=metrics[
491
- iou_idx, :, label_idx, fp_misclf_idx
492
- ]
493
- .astype(int)
494
- .tolist(),
495
- fp_misclassification_examples=[
496
- [
497
- self.index_to_uid[int(datum_idx)]
498
- for datum_idx in metrics[iou_idx][score_idx][
499
- label_idx
500
- ][fp_misclf_idx + 1 : fp_halluc_idx]
501
- if int(datum_idx) >= 0
502
- ]
503
- for score_idx in range(n_scores)
504
- ],
505
- fp_hallucination=metrics[
506
- iou_idx, :, label_idx, fp_halluc_idx
507
- ]
508
- .astype(int)
509
- .tolist(),
510
- fp_hallucination_examples=[
511
- [
512
- self.index_to_uid[int(datum_idx)]
513
- for datum_idx in metrics[iou_idx][score_idx][
514
- label_idx
515
- ][fp_halluc_idx + 1 : fn_misclf_idx]
516
- if int(datum_idx) >= 0
517
- ]
518
- for score_idx in range(n_scores)
519
- ],
520
- fn_misclassification=metrics[
521
- iou_idx, :, label_idx, fn_misclf_idx
522
- ]
523
- .astype(int)
524
- .tolist(),
525
- fn_misclassification_examples=[
526
- [
527
- self.index_to_uid[int(datum_idx)]
528
- for datum_idx in metrics[iou_idx][score_idx][
529
- label_idx
530
- ][fn_misclf_idx + 1 : fn_misprd_idx]
531
- if int(datum_idx) >= 0
532
- ]
533
- for score_idx in range(n_scores)
534
- ],
535
- fn_missing_prediction=metrics[
536
- iou_idx, :, label_idx, fn_misprd_idx
537
- ]
538
- .astype(int)
539
- .tolist(),
540
- fn_missing_prediction_examples=[
541
- [
542
- self.index_to_uid[int(datum_idx)]
543
- for datum_idx in metrics[iou_idx][score_idx][
544
- label_idx
545
- ][fn_misprd_idx + 1 :]
546
- if int(datum_idx) >= 0
547
- ]
548
- for score_idx in range(n_scores)
549
- ],
550
- )
551
- for iou_idx in range(n_ious)
552
- ]
535
+ DetailedCounts(
536
+ iou_threshold=iou_thresholds[iou_idx],
537
+ label=self.index_to_label[label_idx],
538
+ score_thresholds=score_thresholds,
539
+ tp=metrics[iou_idx, :, label_idx, tp_idx].astype(int).tolist(),
540
+ fp_misclassification=metrics[
541
+ iou_idx, :, label_idx, fp_misclf_idx
542
+ ]
543
+ .astype(int)
544
+ .tolist(),
545
+ fp_hallucination=metrics[iou_idx, :, label_idx, fp_halluc_idx]
546
+ .astype(int)
547
+ .tolist(),
548
+ fn_misclassification=metrics[
549
+ iou_idx, :, label_idx, fn_misclf_idx
550
+ ]
551
+ .astype(int)
552
+ .tolist(),
553
+ fn_missing_prediction=metrics[
554
+ iou_idx, :, label_idx, fn_misprd_idx
555
+ ]
556
+ .astype(int)
557
+ .tolist(),
558
+ tp_examples=_unpack_examples(
559
+ iou_idx=iou_idx,
560
+ label_idx=label_idx,
561
+ type_idx=tp_idx,
562
+ example_source=self.prediction_examples,
563
+ ),
564
+ fp_misclassification_examples=_unpack_examples(
565
+ iou_idx=iou_idx,
566
+ label_idx=label_idx,
567
+ type_idx=fp_misclf_idx,
568
+ example_source=self.prediction_examples,
569
+ ),
570
+ fp_hallucination_examples=_unpack_examples(
571
+ iou_idx=iou_idx,
572
+ label_idx=label_idx,
573
+ type_idx=fp_halluc_idx,
574
+ example_source=self.prediction_examples,
575
+ ),
576
+ fn_misclassification_examples=_unpack_examples(
577
+ iou_idx=iou_idx,
578
+ label_idx=label_idx,
579
+ type_idx=fn_misclf_idx,
580
+ example_source=self.groundtruth_examples,
581
+ ),
582
+ fn_missing_prediction_examples=_unpack_examples(
583
+ iou_idx=iou_idx,
584
+ label_idx=label_idx,
585
+ type_idx=fn_misprd_idx,
586
+ example_source=self.groundtruth_examples,
587
+ ),
588
+ )
553
589
  for label_idx in range(n_labels)
590
+ for iou_idx in range(n_ious)
554
591
  ]
555
592
 
556
593
 
@@ -561,7 +598,7 @@ class DataLoader:
561
598
 
562
599
  def __init__(self):
563
600
  self._evaluator = Evaluator()
564
- self.pairs = list()
601
+ self.pairs: list[NDArray[np.floating]] = list()
565
602
  self.groundtruth_count = defaultdict(lambda: defaultdict(int))
566
603
  self.prediction_count = defaultdict(lambda: defaultdict(int))
567
604
 
@@ -624,6 +661,125 @@ class DataLoader:
624
661
  self._evaluator.label_key_to_index[label[0]],
625
662
  )
626
663
 
664
+ def _add_data(
665
+ self,
666
+ uid_index: int,
667
+ keyed_groundtruths: dict,
668
+ keyed_predictions: dict,
669
+ ):
670
+ gt_keys = set(keyed_groundtruths.keys())
671
+ pd_keys = set(keyed_predictions.keys())
672
+ joint_keys = gt_keys.intersection(pd_keys)
673
+ gt_unique_keys = gt_keys - pd_keys
674
+ pd_unique_keys = pd_keys - gt_keys
675
+
676
+ pairs = list()
677
+ for key in joint_keys:
678
+ n_predictions = len(keyed_predictions[key])
679
+ n_groundtruths = len(keyed_groundtruths[key])
680
+ boxes = np.array(
681
+ [
682
+ np.array([*gextrema, *pextrema])
683
+ for _, _, _, pextrema in keyed_predictions[key]
684
+ for _, _, gextrema in keyed_groundtruths[key]
685
+ ]
686
+ )
687
+ ious = compute_iou(boxes)
688
+ mask_nonzero_iou = (ious > 1e-9).reshape(
689
+ (n_predictions, n_groundtruths)
690
+ )
691
+ mask_ious_halluc = ~(mask_nonzero_iou.any(axis=1))
692
+ mask_ious_misprd = ~(mask_nonzero_iou.any(axis=0))
693
+
694
+ pairs.extend(
695
+ [
696
+ np.array(
697
+ [
698
+ float(uid_index),
699
+ float(gidx),
700
+ float(pidx),
701
+ ious[pidx * len(keyed_groundtruths[key]) + gidx],
702
+ float(glabel),
703
+ float(plabel),
704
+ float(score),
705
+ ]
706
+ )
707
+ for pidx, plabel, score, _ in keyed_predictions[key]
708
+ for gidx, glabel, _ in keyed_groundtruths[key]
709
+ if ious[pidx * len(keyed_groundtruths[key]) + gidx] > 1e-9
710
+ ]
711
+ )
712
+ pairs.extend(
713
+ [
714
+ np.array(
715
+ [
716
+ float(uid_index),
717
+ -1.0,
718
+ float(pidx),
719
+ 0.0,
720
+ -1.0,
721
+ float(plabel),
722
+ float(score),
723
+ ]
724
+ )
725
+ for pidx, plabel, score, _ in keyed_predictions[key]
726
+ if mask_ious_halluc[pidx]
727
+ ]
728
+ )
729
+ pairs.extend(
730
+ [
731
+ np.array(
732
+ [
733
+ float(uid_index),
734
+ float(gidx),
735
+ -1.0,
736
+ 0.0,
737
+ float(glabel),
738
+ -1.0,
739
+ -1.0,
740
+ ]
741
+ )
742
+ for gidx, glabel, _ in keyed_groundtruths[key]
743
+ if mask_ious_misprd[gidx]
744
+ ]
745
+ )
746
+ for key in gt_unique_keys:
747
+ pairs.extend(
748
+ [
749
+ np.array(
750
+ [
751
+ float(uid_index),
752
+ float(gidx),
753
+ -1.0,
754
+ 0.0,
755
+ float(glabel),
756
+ -1.0,
757
+ -1.0,
758
+ ]
759
+ )
760
+ for gidx, glabel, _ in keyed_groundtruths[key]
761
+ ]
762
+ )
763
+ for key in pd_unique_keys:
764
+ pairs.extend(
765
+ [
766
+ np.array(
767
+ [
768
+ float(uid_index),
769
+ -1.0,
770
+ float(pidx),
771
+ 0.0,
772
+ -1.0,
773
+ float(plabel),
774
+ float(score),
775
+ ]
776
+ )
777
+ for pidx, plabel, score, _ in keyed_predictions[key]
778
+ ]
779
+ )
780
+
781
+ self.pairs.append(np.array(pairs))
782
+
627
783
  def add_data(
628
784
  self,
629
785
  detections: list[Detection],
@@ -650,10 +806,21 @@ class DataLoader:
650
806
  # update datum uid index
651
807
  uid_index = self._add_datum(uid=detection.uid)
652
808
 
809
+ # initialize bounding box examples
810
+ self._evaluator.groundtruth_examples[uid_index] = np.zeros(
811
+ (len(detection.groundtruths), 4), dtype=np.float16
812
+ )
813
+ self._evaluator.prediction_examples[uid_index] = np.zeros(
814
+ (len(detection.predictions), 4), dtype=np.float16
815
+ )
816
+
653
817
  # cache labels and annotations
654
818
  keyed_groundtruths = defaultdict(list)
655
819
  keyed_predictions = defaultdict(list)
656
820
  for gidx, gann in enumerate(detection.groundtruths):
821
+ self._evaluator.groundtruth_examples[uid_index][
822
+ gidx
823
+ ] = np.array(gann.extrema)
657
824
  for glabel in gann.labels:
658
825
  label_idx, label_key_idx = self._add_label(glabel)
659
826
  self.groundtruth_count[label_idx][uid_index] += 1
@@ -665,6 +832,9 @@ class DataLoader:
665
832
  )
666
833
  )
667
834
  for pidx, pann in enumerate(detection.predictions):
835
+ self._evaluator.prediction_examples[uid_index][
836
+ pidx
837
+ ] = np.array(pann.extrema)
668
838
  for plabel, pscore in zip(pann.labels, pann.scores):
669
839
  label_idx, label_key_idx = self._add_label(plabel)
670
840
  self.prediction_count[label_idx][uid_index] += 1
@@ -677,77 +847,11 @@ class DataLoader:
677
847
  )
678
848
  )
679
849
 
680
- gt_keys = set(keyed_groundtruths.keys())
681
- pd_keys = set(keyed_predictions.keys())
682
- joint_keys = gt_keys.intersection(pd_keys)
683
- gt_unique_keys = gt_keys - pd_keys
684
- pd_unique_keys = pd_keys - gt_keys
685
-
686
- pairs = list()
687
- for key in joint_keys:
688
- boxes = np.array(
689
- [
690
- np.array([*gextrema, *pextrema])
691
- for _, _, _, pextrema in keyed_predictions[key]
692
- for _, _, gextrema in keyed_groundtruths[key]
693
- ]
694
- )
695
- ious = compute_iou(boxes)
696
- pairs.extend(
697
- [
698
- np.array(
699
- [
700
- float(uid_index),
701
- float(gidx),
702
- float(pidx),
703
- ious[
704
- pidx * len(keyed_groundtruths[key]) + gidx
705
- ],
706
- float(glabel),
707
- float(plabel),
708
- float(score),
709
- ]
710
- )
711
- for pidx, plabel, score, _ in keyed_predictions[key]
712
- for gidx, glabel, _ in keyed_groundtruths[key]
713
- ]
714
- )
715
- for key in gt_unique_keys:
716
- pairs.extend(
717
- [
718
- np.array(
719
- [
720
- float(uid_index),
721
- float(gidx),
722
- -1.0,
723
- 0.0,
724
- float(glabel),
725
- -1.0,
726
- -1.0,
727
- ]
728
- )
729
- for gidx, glabel, _ in keyed_groundtruths[key]
730
- ]
731
- )
732
- for key in pd_unique_keys:
733
- pairs.extend(
734
- [
735
- np.array(
736
- [
737
- float(uid_index),
738
- -1.0,
739
- float(pidx),
740
- 0.0,
741
- -1.0,
742
- float(plabel),
743
- float(score),
744
- ]
745
- )
746
- for pidx, plabel, score, _ in keyed_predictions[key]
747
- ]
748
- )
749
-
750
- self.pairs.append(np.array(pairs))
850
+ self._add_data(
851
+ uid_index=uid_index,
852
+ keyed_groundtruths=keyed_groundtruths,
853
+ keyed_predictions=keyed_predictions,
854
+ )
751
855
 
752
856
  def add_data_from_valor_dict(
753
857
  self,
@@ -783,10 +887,21 @@ class DataLoader:
783
887
  # update datum uid index
784
888
  uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
785
889
 
890
+ # initialize bounding box examples
891
+ self._evaluator.groundtruth_examples[uid_index] = np.zeros(
892
+ (len(groundtruth["annotations"]), 4), dtype=np.float16
893
+ )
894
+ self._evaluator.prediction_examples[uid_index] = np.zeros(
895
+ (len(prediction["annotations"]), 4), dtype=np.float16
896
+ )
897
+
786
898
  # cache labels and annotations
787
899
  keyed_groundtruths = defaultdict(list)
788
900
  keyed_predictions = defaultdict(list)
789
901
  for gidx, gann in enumerate(groundtruth["annotations"]):
902
+ self._evaluator.groundtruth_examples[uid_index][
903
+ gidx
904
+ ] = np.array(_get_bbox_extrema(gann["bounding_box"]))
790
905
  for valor_label in gann["labels"]:
791
906
  glabel = (valor_label["key"], valor_label["value"])
792
907
  label_idx, label_key_idx = self._add_label(glabel)
@@ -799,6 +914,9 @@ class DataLoader:
799
914
  )
800
915
  )
801
916
  for pidx, pann in enumerate(prediction["annotations"]):
917
+ self._evaluator.prediction_examples[uid_index][
918
+ pidx
919
+ ] = np.array(_get_bbox_extrema(pann["bounding_box"]))
802
920
  for valor_label in pann["labels"]:
803
921
  plabel = (valor_label["key"], valor_label["value"])
804
922
  pscore = valor_label["score"]
@@ -813,77 +931,11 @@ class DataLoader:
813
931
  )
814
932
  )
815
933
 
816
- gt_keys = set(keyed_groundtruths.keys())
817
- pd_keys = set(keyed_predictions.keys())
818
- joint_keys = gt_keys.intersection(pd_keys)
819
- gt_unique_keys = gt_keys - pd_keys
820
- pd_unique_keys = pd_keys - gt_keys
821
-
822
- pairs = list()
823
- for key in joint_keys:
824
- boxes = np.array(
825
- [
826
- np.array([*gextrema, *pextrema])
827
- for _, _, _, pextrema in keyed_predictions[key]
828
- for _, _, gextrema in keyed_groundtruths[key]
829
- ]
830
- )
831
- ious = compute_iou(boxes)
832
- pairs.extend(
833
- [
834
- np.array(
835
- [
836
- float(uid_index),
837
- float(gidx),
838
- float(pidx),
839
- ious[
840
- pidx * len(keyed_groundtruths[key]) + gidx
841
- ],
842
- float(glabel),
843
- float(plabel),
844
- float(score),
845
- ]
846
- )
847
- for pidx, plabel, score, _ in keyed_predictions[key]
848
- for gidx, glabel, _ in keyed_groundtruths[key]
849
- ]
850
- )
851
- for key in gt_unique_keys:
852
- pairs.extend(
853
- [
854
- np.array(
855
- [
856
- float(uid_index),
857
- float(gidx),
858
- -1.0,
859
- 0.0,
860
- float(glabel),
861
- -1.0,
862
- -1.0,
863
- ]
864
- )
865
- for gidx, glabel, _ in keyed_groundtruths[key]
866
- ]
867
- )
868
- for key in pd_unique_keys:
869
- pairs.extend(
870
- [
871
- np.array(
872
- [
873
- float(uid_index),
874
- -1.0,
875
- float(pidx),
876
- 0.0,
877
- -1.0,
878
- float(plabel),
879
- float(score),
880
- ]
881
- )
882
- for pidx, plabel, score, _ in keyed_predictions[key]
883
- ]
884
- )
885
-
886
- self.pairs.append(np.array(pairs))
934
+ self._add_data(
935
+ uid_index=uid_index,
936
+ keyed_groundtruths=keyed_groundtruths,
937
+ keyed_predictions=keyed_predictions,
938
+ )
887
939
 
888
940
  def finalize(self) -> Evaluator:
889
941
  """
@@ -21,6 +21,25 @@ class MetricType(str, Enum):
21
21
  PrecisionRecallCurve = "PrecisionRecallCurve"
22
22
  DetailedCounts = "DetailedCounts"
23
23
 
24
+ @classmethod
25
+ def base_metrics(cls):
26
+ return [
27
+ cls.Counts,
28
+ cls.Accuracy,
29
+ cls.Precision,
30
+ cls.Recall,
31
+ cls.F1,
32
+ cls.AP,
33
+ cls.AR,
34
+ cls.mAP,
35
+ cls.mAR,
36
+ cls.APAveragedOverIOUs,
37
+ cls.mAPAveragedOverIOUs,
38
+ cls.ARAveragedOverScores,
39
+ cls.mARAveragedOverScores,
40
+ cls.PrecisionRecallCurve,
41
+ ]
42
+
24
43
 
25
44
  @dataclass
26
45
  class Counts:
@@ -316,11 +335,19 @@ class DetailedCounts:
316
335
  fp_hallucination: list[int]
317
336
  fn_misclassification: list[int]
318
337
  fn_missing_prediction: list[int]
319
- tp_examples: list[list[str]]
320
- fp_misclassification_examples: list[list[str]]
321
- fp_hallucination_examples: list[list[str]]
322
- fn_misclassification_examples: list[list[str]]
323
- fn_missing_prediction_examples: list[list[str]]
338
+ tp_examples: list[list[tuple[str, tuple[float, float, float, float]]]]
339
+ fp_misclassification_examples: list[
340
+ list[tuple[str, tuple[float, float, float, float]]]
341
+ ]
342
+ fp_hallucination_examples: list[
343
+ list[tuple[str, tuple[float, float, float, float]]]
344
+ ]
345
+ fn_misclassification_examples: list[
346
+ list[tuple[str, tuple[float, float, float, float]]]
347
+ ]
348
+ fn_missing_prediction_examples: list[
349
+ list[tuple[str, tuple[float, float, float, float]]]
350
+ ]
324
351
  score_thresholds: list[float]
325
352
  iou_threshold: float
326
353
  label: tuple[str, str]
@@ -335,13 +362,11 @@ class DetailedCounts:
335
362
  "fp_hallucination": self.fp_hallucination,
336
363
  "fn_misclassification": self.fn_misclassification,
337
364
  "fn_missing_prediction": self.fn_missing_prediction,
338
- "tn": None,
339
365
  "tp_examples": self.tp_examples,
340
366
  "fp_misclassification_examples": self.fp_misclassification_examples,
341
367
  "fp_hallucination_examples": self.fp_hallucination_examples,
342
368
  "fn_misclassification_examples": self.fn_misclassification_examples,
343
369
  "fn_missing_prediction_examples": self.fn_missing_prediction_examples,
344
- "tn_examples": None,
345
370
  },
346
371
  parameters={
347
372
  "score_thresholds": self.score_thresholds,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.2
3
+ Version: 0.33.3
4
4
  Summary: Compute valor metrics directly in your client.
5
5
  License: MIT License
6
6
 
@@ -0,0 +1,12 @@
1
+ valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
3
+ valor_lite/detection/__init__.py,sha256=WHLHwHoKzXTBjkjC6E1_lhqB7gRWkiGWVWPqkKn-yK8,997
4
+ valor_lite/detection/annotation.py,sha256=c45pZD1Pp2vf5GeyW_6Kl9JCx5FoaaktCaaa4q3QDUo,1758
5
+ valor_lite/detection/computation.py,sha256=7PttK0VuOWlhRN92wpLVrGzB7RAdfdZyT3b1aTm_WaI,23214
6
+ valor_lite/detection/manager.py,sha256=ziVnukGs-WrkyBEBBO3LVSv4LTbaWFaWqLWarVosj2c,35807
7
+ valor_lite/detection/metric.py,sha256=DLqpODJZOG7SCqt7TCgR4am68PQORRCIQW_SXiTb1IA,9473
8
+ valor_lite-0.33.3.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
9
+ valor_lite-0.33.3.dist-info/METADATA,sha256=FBpd-wMWv-m37EK5BfFuiVmnJXg4GNzCaJrTDHv4-gE,1842
10
+ valor_lite-0.33.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
+ valor_lite-0.33.3.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
12
+ valor_lite-0.33.3.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
3
- valor_lite/detection/__init__.py,sha256=WHLHwHoKzXTBjkjC6E1_lhqB7gRWkiGWVWPqkKn-yK8,997
4
- valor_lite/detection/annotation.py,sha256=ON9iVa33pxysUmZVTCb0wNz-eFX6MDOqDhGDz-ouymc,1466
5
- valor_lite/detection/computation.py,sha256=L8FIwZ-qxOQnoT7mxgNzLyNyI-Bvga0i-gtbow3hN-o,22575
6
- valor_lite/detection/manager.py,sha256=Y45Wy3PWi7dQ0VnDERdtpOixUbKVXTZxBcCR92ny0QY,34278
7
- valor_lite/detection/metric.py,sha256=hHqClS7c71ztoUnfoaW3T7RmGYaVNU1SlM6vUs1P08I,8809
8
- valor_lite-0.33.2.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
9
- valor_lite-0.33.2.dist-info/METADATA,sha256=fe-Sj568DB-E9cyC5P8GA_lLjmM1t3MZUHj1f0JF6fM,1842
10
- valor_lite-0.33.2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
- valor_lite-0.33.2.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
12
- valor_lite-0.33.2.dist-info/RECORD,,