valor-lite 0.33.13__py3-none-any.whl → 0.33.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -16,27 +16,14 @@ from valor_lite.object_detection.computation import (
16
16
  compute_bbox_iou,
17
17
  compute_bitmask_iou,
18
18
  compute_confusion_matrix,
19
- compute_metrics,
20
19
  compute_polygon_iou,
20
+ compute_precion_recall,
21
21
  compute_ranked_pairs,
22
22
  )
23
- from valor_lite.object_detection.metric import (
24
- AP,
25
- AR,
26
- F1,
27
- Accuracy,
28
- APAveragedOverIOUs,
29
- ARAveragedOverScores,
30
- ConfusionMatrix,
31
- Counts,
32
- MetricType,
33
- Precision,
34
- PrecisionRecallCurve,
35
- Recall,
36
- mAP,
37
- mAPAveragedOverIOUs,
38
- mAR,
39
- mARAveragedOverScores,
23
+ from valor_lite.object_detection.metric import Metric, MetricType
24
+ from valor_lite.object_detection.utilities import (
25
+ unpack_confusion_matrix_into_metric_list,
26
+ unpack_precision_recall_into_metric_lists,
40
27
  )
41
28
 
42
29
  """
@@ -212,274 +199,23 @@ class Evaluator:
212
199
  label_metadata=label_metadata,
213
200
  )
214
201
 
215
- def _convert_example_to_dict(
216
- self, box: NDArray[np.float16]
217
- ) -> dict[str, float]:
218
- """
219
- Converts a cached bounding box example to dictionary format.
220
- """
221
- return {
222
- "xmin": float(box[0]),
223
- "xmax": float(box[1]),
224
- "ymin": float(box[2]),
225
- "ymax": float(box[3]),
226
- }
227
-
228
- def _unpack_confusion_matrix(
229
- self,
230
- confusion_matrix: NDArray[np.float64],
231
- number_of_labels: int,
232
- number_of_examples: int,
233
- ) -> dict[
234
- str,
235
- dict[
236
- str,
237
- dict[
238
- str,
239
- int
240
- | list[
241
- dict[
242
- str,
243
- str | dict[str, float] | float,
244
- ]
245
- ],
246
- ],
247
- ],
248
- ]:
249
- """
250
- Unpacks a numpy array of confusion matrix counts and examples.
251
- """
252
-
253
- datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
254
- confusion_matrix[
255
- gt_label_idx,
256
- pd_label_idx,
257
- example_idx * 4 + 1,
258
- ]
259
- )
260
-
261
- groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
262
- confusion_matrix[
263
- gt_label_idx,
264
- pd_label_idx,
265
- example_idx * 4 + 2,
266
- ]
267
- )
268
-
269
- prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
270
- confusion_matrix[
271
- gt_label_idx,
272
- pd_label_idx,
273
- example_idx * 4 + 3,
274
- ]
275
- )
276
-
277
- score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
278
- confusion_matrix[
279
- gt_label_idx,
280
- pd_label_idx,
281
- example_idx * 4 + 4,
282
- ]
283
- )
284
-
285
- return {
286
- self.index_to_label[gt_label_idx]: {
287
- self.index_to_label[pd_label_idx]: {
288
- "count": max(
289
- int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
290
- 0,
291
- ),
292
- "examples": [
293
- {
294
- "datum": self.index_to_uid[
295
- datum_idx(
296
- gt_label_idx, pd_label_idx, example_idx
297
- )
298
- ],
299
- "groundtruth": self._convert_example_to_dict(
300
- self.groundtruth_examples[
301
- datum_idx(
302
- gt_label_idx,
303
- pd_label_idx,
304
- example_idx,
305
- )
306
- ][
307
- groundtruth_idx(
308
- gt_label_idx,
309
- pd_label_idx,
310
- example_idx,
311
- )
312
- ]
313
- ),
314
- "prediction": self._convert_example_to_dict(
315
- self.prediction_examples[
316
- datum_idx(
317
- gt_label_idx,
318
- pd_label_idx,
319
- example_idx,
320
- )
321
- ][
322
- prediction_idx(
323
- gt_label_idx,
324
- pd_label_idx,
325
- example_idx,
326
- )
327
- ]
328
- ),
329
- "score": score_idx(
330
- gt_label_idx, pd_label_idx, example_idx
331
- ),
332
- }
333
- for example_idx in range(number_of_examples)
334
- if datum_idx(gt_label_idx, pd_label_idx, example_idx)
335
- >= 0
336
- ],
337
- }
338
- for pd_label_idx in range(number_of_labels)
339
- }
340
- for gt_label_idx in range(number_of_labels)
341
- }
342
-
343
- def _unpack_hallucinations(
344
- self,
345
- hallucinations: NDArray[np.float64],
346
- number_of_labels: int,
347
- number_of_examples: int,
348
- ) -> dict[
349
- str,
350
- dict[
351
- str,
352
- int | list[dict[str, str | float | dict[str, float]]],
353
- ],
354
- ]:
355
- """
356
- Unpacks a numpy array of hallucination counts and examples.
357
- """
358
-
359
- datum_idx = (
360
- lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
361
- hallucinations[
362
- pd_label_idx,
363
- example_idx * 3 + 1,
364
- ]
365
- )
366
- )
367
-
368
- prediction_idx = (
369
- lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
370
- hallucinations[
371
- pd_label_idx,
372
- example_idx * 3 + 2,
373
- ]
374
- )
375
- )
376
-
377
- score_idx = (
378
- lambda pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
379
- hallucinations[
380
- pd_label_idx,
381
- example_idx * 3 + 3,
382
- ]
383
- )
384
- )
385
-
386
- return {
387
- self.index_to_label[pd_label_idx]: {
388
- "count": max(
389
- int(hallucinations[pd_label_idx, 0]),
390
- 0,
391
- ),
392
- "examples": [
393
- {
394
- "datum": self.index_to_uid[
395
- datum_idx(pd_label_idx, example_idx)
396
- ],
397
- "prediction": self._convert_example_to_dict(
398
- self.prediction_examples[
399
- datum_idx(pd_label_idx, example_idx)
400
- ][prediction_idx(pd_label_idx, example_idx)]
401
- ),
402
- "score": score_idx(pd_label_idx, example_idx),
403
- }
404
- for example_idx in range(number_of_examples)
405
- if datum_idx(pd_label_idx, example_idx) >= 0
406
- ],
407
- }
408
- for pd_label_idx in range(number_of_labels)
409
- }
410
-
411
- def _unpack_missing_predictions(
412
- self,
413
- missing_predictions: NDArray[np.int32],
414
- number_of_labels: int,
415
- number_of_examples: int,
416
- ) -> dict[str, dict[str, int | list[dict[str, str | dict[str, float]]]]]:
417
- """
418
- Unpacks a numpy array of missing prediction counts and examples.
419
- """
420
-
421
- datum_idx = (
422
- lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
423
- missing_predictions[
424
- gt_label_idx,
425
- example_idx * 2 + 1,
426
- ]
427
- )
428
- )
429
-
430
- groundtruth_idx = (
431
- lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
432
- missing_predictions[
433
- gt_label_idx,
434
- example_idx * 2 + 2,
435
- ]
436
- )
437
- )
438
-
439
- return {
440
- self.index_to_label[gt_label_idx]: {
441
- "count": max(
442
- int(missing_predictions[gt_label_idx, 0]),
443
- 0,
444
- ),
445
- "examples": [
446
- {
447
- "datum": self.index_to_uid[
448
- datum_idx(gt_label_idx, example_idx)
449
- ],
450
- "groundtruth": self._convert_example_to_dict(
451
- self.groundtruth_examples[
452
- datum_idx(gt_label_idx, example_idx)
453
- ][groundtruth_idx(gt_label_idx, example_idx)]
454
- ),
455
- }
456
- for example_idx in range(number_of_examples)
457
- if datum_idx(gt_label_idx, example_idx) >= 0
458
- ],
459
- }
460
- for gt_label_idx in range(number_of_labels)
461
- }
462
-
463
202
  def compute_precision_recall(
464
203
  self,
465
204
  iou_thresholds: list[float] = [0.5, 0.75, 0.9],
466
205
  score_thresholds: list[float] = [0.5],
467
206
  filter_: Filter | None = None,
468
- as_dict: bool = False,
469
- ) -> dict[MetricType, list]:
207
+ ) -> dict[MetricType, list[Metric]]:
470
208
  """
471
209
  Computes all metrics except for ConfusionMatrix
472
210
 
473
211
  Parameters
474
212
  ----------
475
213
  iou_thresholds : list[float]
476
- A list of IoU thresholds to compute metrics over.
214
+ A list of IOU thresholds to compute metrics over.
477
215
  score_thresholds : list[float]
478
216
  A list of score thresholds to compute metrics over.
479
217
  filter_ : Filter, optional
480
218
  An optional filter object.
481
- as_dict : bool, default=False
482
- An option to return metrics as dictionaries.
483
219
 
484
220
  Returns
485
221
  -------
@@ -493,182 +229,20 @@ class Evaluator:
493
229
  ranked_pairs = ranked_pairs[filter_.ranked_indices]
494
230
  label_metadata = filter_.label_metadata
495
231
 
496
- (
497
- (
498
- average_precision,
499
- mean_average_precision,
500
- average_precision_average_over_ious,
501
- mean_average_precision_average_over_ious,
502
- ),
503
- (
504
- average_recall,
505
- mean_average_recall,
506
- average_recall_averaged_over_scores,
507
- mean_average_recall_averaged_over_scores,
508
- ),
509
- accuracy,
510
- precision_recall,
511
- pr_curves,
512
- ) = compute_metrics(
232
+ results = compute_precion_recall(
513
233
  data=ranked_pairs,
514
234
  label_metadata=label_metadata,
515
235
  iou_thresholds=np.array(iou_thresholds),
516
236
  score_thresholds=np.array(score_thresholds),
517
237
  )
518
238
 
519
- metrics = defaultdict(list)
520
-
521
- metrics[MetricType.AP] = [
522
- AP(
523
- value=float(average_precision[iou_idx][label_idx]),
524
- iou_threshold=iou_thresholds[iou_idx],
525
- label=self.index_to_label[label_idx],
526
- )
527
- for iou_idx in range(average_precision.shape[0])
528
- for label_idx in range(average_precision.shape[1])
529
- if int(label_metadata[label_idx, 0]) > 0
530
- ]
531
-
532
- metrics[MetricType.mAP] = [
533
- mAP(
534
- value=float(mean_average_precision[iou_idx]),
535
- iou_threshold=iou_thresholds[iou_idx],
536
- )
537
- for iou_idx in range(mean_average_precision.shape[0])
538
- ]
539
-
540
- metrics[MetricType.APAveragedOverIOUs] = [
541
- APAveragedOverIOUs(
542
- value=float(average_precision_average_over_ious[label_idx]),
543
- iou_thresholds=iou_thresholds,
544
- label=self.index_to_label[label_idx],
545
- )
546
- for label_idx in range(self.n_labels)
547
- if int(label_metadata[label_idx, 0]) > 0
548
- ]
549
-
550
- metrics[MetricType.mAPAveragedOverIOUs] = [
551
- mAPAveragedOverIOUs(
552
- value=float(mean_average_precision_average_over_ious),
553
- iou_thresholds=iou_thresholds,
554
- )
555
- ]
556
-
557
- metrics[MetricType.AR] = [
558
- AR(
559
- value=float(average_recall[score_idx][label_idx]),
560
- iou_thresholds=iou_thresholds,
561
- score_threshold=score_thresholds[score_idx],
562
- label=self.index_to_label[label_idx],
563
- )
564
- for score_idx in range(average_recall.shape[0])
565
- for label_idx in range(average_recall.shape[1])
566
- if int(label_metadata[label_idx, 0]) > 0
567
- ]
568
-
569
- metrics[MetricType.mAR] = [
570
- mAR(
571
- value=float(mean_average_recall[score_idx]),
572
- iou_thresholds=iou_thresholds,
573
- score_threshold=score_thresholds[score_idx],
574
- )
575
- for score_idx in range(mean_average_recall.shape[0])
576
- ]
577
-
578
- metrics[MetricType.ARAveragedOverScores] = [
579
- ARAveragedOverScores(
580
- value=float(average_recall_averaged_over_scores[label_idx]),
581
- score_thresholds=score_thresholds,
582
- iou_thresholds=iou_thresholds,
583
- label=self.index_to_label[label_idx],
584
- )
585
- for label_idx in range(self.n_labels)
586
- if int(label_metadata[label_idx, 0]) > 0
587
- ]
588
-
589
- metrics[MetricType.mARAveragedOverScores] = [
590
- mARAveragedOverScores(
591
- value=float(mean_average_recall_averaged_over_scores),
592
- score_thresholds=score_thresholds,
593
- iou_thresholds=iou_thresholds,
594
- )
595
- ]
596
-
597
- metrics[MetricType.Accuracy] = [
598
- Accuracy(
599
- value=float(accuracy[iou_idx, score_idx]),
600
- iou_threshold=iou_thresholds[iou_idx],
601
- score_threshold=score_thresholds[score_idx],
602
- )
603
- for iou_idx in range(accuracy.shape[0])
604
- for score_idx in range(accuracy.shape[1])
605
- ]
606
-
607
- metrics[MetricType.PrecisionRecallCurve] = [
608
- PrecisionRecallCurve(
609
- precisions=pr_curves[iou_idx, label_idx, :, 0]
610
- .astype(float)
611
- .tolist(),
612
- scores=pr_curves[iou_idx, label_idx, :, 1]
613
- .astype(float)
614
- .tolist(),
615
- iou_threshold=iou_threshold,
616
- label=label,
617
- )
618
- for iou_idx, iou_threshold in enumerate(iou_thresholds)
619
- for label_idx, label in self.index_to_label.items()
620
- if int(label_metadata[label_idx, 0]) > 0
621
- ]
622
-
623
- for label_idx, label in self.index_to_label.items():
624
-
625
- if label_metadata[label_idx, 0] == 0:
626
- continue
627
-
628
- for score_idx, score_threshold in enumerate(score_thresholds):
629
- for iou_idx, iou_threshold in enumerate(iou_thresholds):
630
-
631
- row = precision_recall[iou_idx][score_idx][label_idx]
632
- kwargs = {
633
- "label": label,
634
- "iou_threshold": iou_threshold,
635
- "score_threshold": score_threshold,
636
- }
637
- metrics[MetricType.Counts].append(
638
- Counts(
639
- tp=int(row[0]),
640
- fp=int(row[1]),
641
- fn=int(row[2]),
642
- **kwargs,
643
- )
644
- )
645
-
646
- metrics[MetricType.Precision].append(
647
- Precision(
648
- value=float(row[3]),
649
- **kwargs,
650
- )
651
- )
652
- metrics[MetricType.Recall].append(
653
- Recall(
654
- value=float(row[4]),
655
- **kwargs,
656
- )
657
- )
658
- metrics[MetricType.F1].append(
659
- F1(
660
- value=float(row[5]),
661
- **kwargs,
662
- )
663
- )
664
-
665
- if as_dict:
666
- return {
667
- mtype: [metric.to_dict() for metric in mvalues]
668
- for mtype, mvalues in metrics.items()
669
- }
670
-
671
- return metrics
239
+ return unpack_precision_recall_into_metric_lists(
240
+ results=results,
241
+ label_metadata=label_metadata,
242
+ iou_thresholds=iou_thresholds,
243
+ score_thresholds=score_thresholds,
244
+ index_to_label=self.index_to_label,
245
+ )
672
246
 
673
247
  def compute_confusion_matrix(
674
248
  self,
@@ -676,27 +250,24 @@ class Evaluator:
676
250
  score_thresholds: list[float] = [0.5],
677
251
  number_of_examples: int = 0,
678
252
  filter_: Filter | None = None,
679
- as_dict: bool = False,
680
- ) -> list:
253
+ ) -> list[Metric]:
681
254
  """
682
255
  Computes confusion matrices at various thresholds.
683
256
 
684
257
  Parameters
685
258
  ----------
686
259
  iou_thresholds : list[float]
687
- A list of IoU thresholds to compute metrics over.
260
+ A list of IOU thresholds to compute metrics over.
688
261
  score_thresholds : list[float]
689
262
  A list of score thresholds to compute metrics over.
690
263
  number_of_examples : int, default=0
691
264
  Maximum number of annotation examples to return in ConfusionMatrix.
692
265
  filter_ : Filter, optional
693
266
  An optional filter object.
694
- as_dict : bool, default=False
695
- An option to return metrics as dictionaries.
696
267
 
697
268
  Returns
698
269
  -------
699
- list[ConfusionMatrix] | list[dict]
270
+ list[Metric]
700
271
  List of confusion matrices per threshold pair.
701
272
  """
702
273
 
@@ -709,11 +280,7 @@ class Evaluator:
709
280
  if detailed_pairs.size == 0:
710
281
  return list()
711
282
 
712
- (
713
- confusion_matrix,
714
- hallucinations,
715
- missing_predictions,
716
- ) = compute_confusion_matrix(
283
+ results = compute_confusion_matrix(
717
284
  data=detailed_pairs,
718
285
  label_metadata=label_metadata,
719
286
  iou_thresholds=np.array(iou_thresholds),
@@ -721,39 +288,16 @@ class Evaluator:
721
288
  n_examples=number_of_examples,
722
289
  )
723
290
 
724
- n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
725
- matrices = [
726
- ConfusionMatrix(
727
- iou_threshold=iou_thresholds[iou_idx],
728
- score_threshold=score_thresholds[score_idx],
729
- number_of_examples=number_of_examples,
730
- confusion_matrix=self._unpack_confusion_matrix(
731
- confusion_matrix=confusion_matrix[
732
- iou_idx, score_idx, :, :, :
733
- ],
734
- number_of_labels=n_labels,
735
- number_of_examples=number_of_examples,
736
- ),
737
- hallucinations=self._unpack_hallucinations(
738
- hallucinations=hallucinations[iou_idx, score_idx, :, :],
739
- number_of_labels=n_labels,
740
- number_of_examples=number_of_examples,
741
- ),
742
- missing_predictions=self._unpack_missing_predictions(
743
- missing_predictions=missing_predictions[
744
- iou_idx, score_idx, :, :
745
- ],
746
- number_of_labels=n_labels,
747
- number_of_examples=number_of_examples,
748
- ),
749
- )
750
- for iou_idx in range(n_ious)
751
- for score_idx in range(n_scores)
752
- ]
753
-
754
- if as_dict:
755
- return [m.to_dict() for m in matrices]
756
- return matrices
291
+ return unpack_confusion_matrix_into_metric_list(
292
+ results=results,
293
+ iou_thresholds=iou_thresholds,
294
+ score_thresholds=score_thresholds,
295
+ number_of_examples=number_of_examples,
296
+ index_to_uid=self.index_to_uid,
297
+ index_to_label=self.index_to_label,
298
+ groundtruth_examples=self.groundtruth_examples,
299
+ prediction_examples=self.prediction_examples,
300
+ )
757
301
 
758
302
  def evaluate(
759
303
  self,
@@ -761,43 +305,40 @@ class Evaluator:
761
305
  score_thresholds: list[float] = [0.5],
762
306
  number_of_examples: int = 0,
763
307
  filter_: Filter | None = None,
764
- as_dict: bool = False,
765
- ) -> dict[MetricType, list]:
308
+ ) -> dict[MetricType, list[Metric]]:
766
309
  """
767
310
  Computes all avaiable metrics.
768
311
 
769
312
  Parameters
770
313
  ----------
771
314
  iou_thresholds : list[float]
772
- A list of IoU thresholds to compute metrics over.
315
+ A list of IOU thresholds to compute metrics over.
773
316
  score_thresholds : list[float]
774
317
  A list of score thresholds to compute metrics over.
775
318
  number_of_examples : int, default=0
776
319
  Maximum number of annotation examples to return in ConfusionMatrix.
777
320
  filter_ : Filter, optional
778
321
  An optional filter object.
779
- as_dict : bool, default=False
780
- An option to return metrics as dictionaries.
781
322
 
782
323
  Returns
783
324
  -------
784
- dict[MetricType, list]
785
- A dictionary mapping metric type to a list of metrics.
325
+ dict[MetricType, list[Metric]]
326
+ Lists of metrics organized by metric type.
786
327
  """
787
- results = self.compute_precision_recall(
328
+ metrics = self.compute_precision_recall(
788
329
  iou_thresholds=iou_thresholds,
789
330
  score_thresholds=score_thresholds,
790
331
  filter_=filter_,
791
- as_dict=as_dict,
792
332
  )
793
- results[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
333
+
334
+ metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
794
335
  iou_thresholds=iou_thresholds,
795
336
  score_thresholds=score_thresholds,
796
337
  number_of_examples=number_of_examples,
797
338
  filter_=filter_,
798
- as_dict=as_dict,
799
339
  )
800
- return results
340
+
341
+ return metrics
801
342
 
802
343
 
803
344
  class DataLoader: