valor-lite 0.33.0__py3-none-any.whl → 0.33.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ from numpy.typing import NDArray
6
6
  from tqdm import tqdm
7
7
  from valor_lite.detection.annotation import Detection
8
8
  from valor_lite.detection.computation import (
9
- compute_detailed_pr_curve,
9
+ compute_detailed_counts,
10
10
  compute_iou,
11
11
  compute_metrics,
12
12
  compute_ranked_pairs,
@@ -19,8 +19,7 @@ from valor_lite.detection.metric import (
19
19
  APAveragedOverIOUs,
20
20
  ARAveragedOverScores,
21
21
  Counts,
22
- DetailedPrecisionRecallCurve,
23
- DetailedPrecisionRecallPoint,
22
+ DetailedCounts,
24
23
  MetricType,
25
24
  Precision,
26
25
  PrecisionRecallCurve,
@@ -35,12 +34,12 @@ from valor_lite.detection.metric import (
35
34
  Usage
36
35
  -----
37
36
 
38
- manager = DataLoader()
39
- manager.add_data(
37
+ loader = DataLoader()
38
+ loader.add_data(
40
39
  groundtruths=groundtruths,
41
40
  predictions=predictions,
42
41
  )
43
- evaluator = manager.finalize()
42
+ evaluator = loader.finalize()
44
43
 
45
44
  metrics = evaluator.evaluate(iou_thresholds=[0.5])
46
45
 
@@ -59,6 +58,10 @@ class Filter:
59
58
 
60
59
 
61
60
  class Evaluator:
61
+ """
62
+ Object Detection Evaluator
63
+ """
64
+
62
65
  def __init__(self):
63
66
 
64
67
  # metadata
@@ -88,6 +91,9 @@ class Evaluator:
88
91
 
89
92
  @property
90
93
  def ignored_prediction_labels(self) -> list[tuple[str, str]]:
94
+ """
95
+ Prediction labels that are not present in the ground truth set.
96
+ """
91
97
  glabels = set(np.where(self._label_metadata[:, 0] > 0)[0])
92
98
  plabels = set(np.where(self._label_metadata[:, 1] > 0)[0])
93
99
  return [
@@ -96,6 +102,9 @@ class Evaluator:
96
102
 
97
103
  @property
98
104
  def missing_prediction_labels(self) -> list[tuple[str, str]]:
105
+ """
106
+ Ground truth labels that are not present in the prediction set.
107
+ """
99
108
  glabels = set(np.where(self._label_metadata[:, 0] > 0)[0])
100
109
  plabels = set(np.where(self._label_metadata[:, 1] > 0)[0])
101
110
  return [
@@ -104,6 +113,9 @@ class Evaluator:
104
113
 
105
114
  @property
106
115
  def metadata(self) -> dict:
116
+ """
117
+ Evaluation metadata.
118
+ """
107
119
  return {
108
120
  "n_datums": self.n_datums,
109
121
  "n_groundtruths": self.n_groundtruths,
@@ -120,7 +132,7 @@ class Evaluator:
120
132
  label_keys: list[str] | NDArray[np.int32] | None = None,
121
133
  ) -> Filter:
122
134
  """
123
- Creates a boolean mask that can be passed to an evaluation.
135
+ Creates a filter that can be passed to an evaluation.
124
136
 
125
137
  Parameters
126
138
  ----------
@@ -208,9 +220,6 @@ class Evaluator:
208
220
  return Filter(
209
221
  indices=np.where(mask_pairs)[0],
210
222
  label_metadata=label_metadata,
211
- # uids=datum_uids,
212
- # labels=labels,
213
- # label_keys=label_keys,
214
223
  )
215
224
 
216
225
  def evaluate(
@@ -220,16 +229,21 @@ class Evaluator:
220
229
  filter_: Filter | None = None,
221
230
  ) -> dict[MetricType, list]:
222
231
  """
223
- Runs evaluation over cached data.
232
+ Performs an evaluation and returns metrics.
224
233
 
225
234
  Parameters
226
235
  ----------
227
236
  iou_thresholds : list[float]
228
- A list of iou thresholds to compute over.
237
+ A list of IoU thresholds to compute metrics over.
229
238
  score_thresholds : list[float]
230
- A list of score thresholds to compute over.
231
- filter_mask : NDArray[bool], optional
232
- A boolean mask that filters the cached data.
239
+ A list of score thresholds to compute metrics over.
240
+ filter_ : Filter, optional
241
+ An optional filter object.
242
+
243
+ Returns
244
+ -------
245
+ dict[MetricType, list]
246
+ A dictionary mapping MetricType enumerations to lists of computed metrics.
233
247
  """
234
248
 
235
249
  data = self._ranked_pairs
@@ -255,7 +269,7 @@ class Evaluator:
255
269
  pr_curves,
256
270
  ) = compute_metrics(
257
271
  data=data,
258
- label_counts=label_metadata,
272
+ label_metadata=label_metadata,
259
273
  iou_thresholds=np.array(iou_thresholds),
260
274
  score_thresholds=np.array(score_thresholds),
261
275
  )
@@ -265,7 +279,7 @@ class Evaluator:
265
279
  metrics[MetricType.AP] = [
266
280
  AP(
267
281
  value=average_precision[iou_idx][label_idx],
268
- iou=iou_thresholds[iou_idx],
282
+ iou_threshold=iou_thresholds[iou_idx],
269
283
  label=self.index_to_label[label_idx],
270
284
  )
271
285
  for iou_idx in range(average_precision.shape[0])
@@ -276,7 +290,7 @@ class Evaluator:
276
290
  metrics[MetricType.mAP] = [
277
291
  mAP(
278
292
  value=mean_average_precision[iou_idx][label_key_idx],
279
- iou=iou_thresholds[iou_idx],
293
+ iou_threshold=iou_thresholds[iou_idx],
280
294
  label_key=self.index_to_label_key[label_key_idx],
281
295
  )
282
296
  for iou_idx in range(mean_average_precision.shape[0])
@@ -286,7 +300,7 @@ class Evaluator:
286
300
  metrics[MetricType.APAveragedOverIOUs] = [
287
301
  APAveragedOverIOUs(
288
302
  value=average_precision_average_over_ious[label_idx],
289
- ious=iou_thresholds,
303
+ iou_thresholds=iou_thresholds,
290
304
  label=self.index_to_label[label_idx],
291
305
  )
292
306
  for label_idx in range(self.n_labels)
@@ -296,7 +310,7 @@ class Evaluator:
296
310
  metrics[MetricType.mAPAveragedOverIOUs] = [
297
311
  mAPAveragedOverIOUs(
298
312
  value=mean_average_precision_average_over_ious[label_key_idx],
299
- ious=iou_thresholds,
313
+ iou_thresholds=iou_thresholds,
300
314
  label_key=self.index_to_label_key[label_key_idx],
301
315
  )
302
316
  for label_key_idx in range(
@@ -307,8 +321,8 @@ class Evaluator:
307
321
  metrics[MetricType.AR] = [
308
322
  AR(
309
323
  value=average_recall[score_idx][label_idx],
310
- ious=iou_thresholds,
311
- score=score_thresholds[score_idx],
324
+ iou_thresholds=iou_thresholds,
325
+ score_threshold=score_thresholds[score_idx],
312
326
  label=self.index_to_label[label_idx],
313
327
  )
314
328
  for score_idx in range(average_recall.shape[0])
@@ -319,8 +333,8 @@ class Evaluator:
319
333
  metrics[MetricType.mAR] = [
320
334
  mAR(
321
335
  value=mean_average_recall[score_idx][label_key_idx],
322
- ious=iou_thresholds,
323
- score=score_thresholds[score_idx],
336
+ iou_thresholds=iou_thresholds,
337
+ score_threshold=score_thresholds[score_idx],
324
338
  label_key=self.index_to_label_key[label_key_idx],
325
339
  )
326
340
  for score_idx in range(mean_average_recall.shape[0])
@@ -330,8 +344,8 @@ class Evaluator:
330
344
  metrics[MetricType.ARAveragedOverScores] = [
331
345
  ARAveragedOverScores(
332
346
  value=average_recall_averaged_over_scores[label_idx],
333
- scores=score_thresholds,
334
- ious=iou_thresholds,
347
+ score_thresholds=score_thresholds,
348
+ iou_thresholds=iou_thresholds,
335
349
  label=self.index_to_label[label_idx],
336
350
  )
337
351
  for label_idx in range(self.n_labels)
@@ -341,8 +355,8 @@ class Evaluator:
341
355
  metrics[MetricType.mARAveragedOverScores] = [
342
356
  mARAveragedOverScores(
343
357
  value=mean_average_recall_averaged_over_scores[label_key_idx],
344
- scores=score_thresholds,
345
- ious=iou_thresholds,
358
+ score_thresholds=score_thresholds,
359
+ iou_thresholds=iou_thresholds,
346
360
  label_key=self.index_to_label_key[label_key_idx],
347
361
  )
348
362
  for label_key_idx in range(
@@ -353,7 +367,7 @@ class Evaluator:
353
367
  metrics[MetricType.PrecisionRecallCurve] = [
354
368
  PrecisionRecallCurve(
355
369
  precision=list(pr_curves[iou_idx][label_idx]),
356
- iou=iou_threshold,
370
+ iou_threshold=iou_threshold,
357
371
  label=label,
358
372
  )
359
373
  for iou_idx, iou_threshold in enumerate(iou_thresholds)
@@ -361,14 +375,18 @@ class Evaluator:
361
375
  if int(label_metadata[label_idx][0]) > 0
362
376
  ]
363
377
 
364
- for iou_idx, iou_threshold in enumerate(iou_thresholds):
378
+ for label_idx, label in self.index_to_label.items():
365
379
  for score_idx, score_threshold in enumerate(score_thresholds):
366
- for label_idx, label in self.index_to_label.items():
380
+ for iou_idx, iou_threshold in enumerate(iou_thresholds):
381
+
382
+ if label_metadata[label_idx, 0] == 0:
383
+ continue
384
+
367
385
  row = precision_recall[iou_idx][score_idx][label_idx]
368
386
  kwargs = {
369
387
  "label": label,
370
- "iou": iou_threshold,
371
- "score": score_threshold,
388
+ "iou_threshold": iou_threshold,
389
+ "score_threshold": score_threshold,
372
390
  }
373
391
  metrics[MetricType.Counts].append(
374
392
  Counts(
@@ -378,6 +396,7 @@ class Evaluator:
378
396
  **kwargs,
379
397
  )
380
398
  )
399
+
381
400
  metrics[MetricType.Precision].append(
382
401
  Precision(
383
402
  value=row[3],
@@ -405,21 +424,38 @@ class Evaluator:
405
424
 
406
425
  return metrics
407
426
 
408
- def compute_detailed_pr_curve(
427
+ def compute_detailed_counts(
409
428
  self,
410
429
  iou_thresholds: list[float] = [0.5],
411
430
  score_thresholds: list[float] = [
412
431
  score / 10.0 for score in range(1, 11)
413
432
  ],
414
433
  n_samples: int = 0,
415
- ) -> list[DetailedPrecisionRecallCurve]:
434
+ ) -> list[list[DetailedCounts]]:
435
+ """
436
+ Computes detailed counting metrics.
437
+
438
+ Parameters
439
+ ----------
440
+ iou_thresholds : list[float], default=[0.5]
441
+ List of IoU thresholds to compute metrics for.
442
+ score_thresholds : list[float], default=[0.1,0.2,...,1.0]
443
+ List of confidence thresholds to compute metrics for.
444
+ n_samples : int, default=0
445
+ Number of datum samples to return per metric.
446
+
447
+ Returns
448
+ -------
449
+ list[list[DetailedCounts]]
450
+ Outer list is indexed by label, inner list is by IoU.
451
+ """
416
452
 
417
453
  if self._detailed_pairs.size == 0:
418
454
  return list()
419
455
 
420
- metrics = compute_detailed_pr_curve(
456
+ metrics = compute_detailed_counts(
421
457
  self._detailed_pairs,
422
- label_counts=self._label_metadata,
458
+ label_metadata=self._label_metadata,
423
459
  iou_thresholds=np.array(iou_thresholds),
424
460
  score_thresholds=np.array(score_thresholds),
425
461
  n_samples=n_samples,
@@ -431,74 +467,98 @@ class Evaluator:
431
467
  fn_misclf_idx = fp_halluc_idx + n_samples + 1
432
468
  fn_misprd_idx = fn_misclf_idx + n_samples + 1
433
469
 
434
- results = list()
435
- for label_idx in range(len(metrics)):
436
- n_ious, n_scores, _, _ = metrics.shape
437
- for iou_idx in range(n_ious):
438
- curve = DetailedPrecisionRecallCurve(
439
- iou=iou_thresholds[iou_idx],
440
- value=list(),
470
+ n_ious, n_scores, n_labels, _ = metrics.shape
471
+ return [
472
+ [
473
+ DetailedCounts(
474
+ iou_threshold=iou_thresholds[iou_idx],
441
475
  label=self.index_to_label[label_idx],
442
- )
443
- for score_idx in range(n_scores):
444
- curve.value.append(
445
- DetailedPrecisionRecallPoint(
446
- score=score_thresholds[score_idx],
447
- tp=metrics[iou_idx][score_idx][label_idx][tp_idx],
448
- tp_examples=[
449
- self.index_to_uid[int(datum_idx)]
450
- for datum_idx in metrics[iou_idx][score_idx][
451
- label_idx
452
- ][tp_idx + 1 : fp_misclf_idx]
453
- if int(datum_idx) >= 0
454
- ],
455
- fp_misclassification=metrics[iou_idx][score_idx][
476
+ score_thresholds=score_thresholds,
477
+ tp=metrics[iou_idx, :, label_idx, tp_idx]
478
+ .astype(int)
479
+ .tolist(),
480
+ tp_examples=[
481
+ [
482
+ self.index_to_uid[int(datum_idx)]
483
+ for datum_idx in metrics[iou_idx][score_idx][
484
+ label_idx
485
+ ][tp_idx + 1 : fp_misclf_idx]
486
+ if int(datum_idx) >= 0
487
+ ]
488
+ for score_idx in range(n_scores)
489
+ ],
490
+ fp_misclassification=metrics[
491
+ iou_idx, :, label_idx, fp_misclf_idx
492
+ ]
493
+ .astype(int)
494
+ .tolist(),
495
+ fp_misclassification_examples=[
496
+ [
497
+ self.index_to_uid[int(datum_idx)]
498
+ for datum_idx in metrics[iou_idx][score_idx][
456
499
  label_idx
457
- ][fp_misclf_idx],
458
- fp_misclassification_examples=[
459
- self.index_to_uid[int(datum_idx)]
460
- for datum_idx in metrics[iou_idx][score_idx][
461
- label_idx
462
- ][fp_misclf_idx + 1 : fp_halluc_idx]
463
- if int(datum_idx) >= 0
464
- ],
465
- fp_hallucination=metrics[iou_idx][score_idx][
500
+ ][fp_misclf_idx + 1 : fp_halluc_idx]
501
+ if int(datum_idx) >= 0
502
+ ]
503
+ for score_idx in range(n_scores)
504
+ ],
505
+ fp_hallucination=metrics[
506
+ iou_idx, :, label_idx, fp_halluc_idx
507
+ ]
508
+ .astype(int)
509
+ .tolist(),
510
+ fp_hallucination_examples=[
511
+ [
512
+ self.index_to_uid[int(datum_idx)]
513
+ for datum_idx in metrics[iou_idx][score_idx][
466
514
  label_idx
467
- ][fp_halluc_idx],
468
- fp_hallucination_examples=[
469
- self.index_to_uid[int(datum_idx)]
470
- for datum_idx in metrics[iou_idx][score_idx][
471
- label_idx
472
- ][fp_halluc_idx + 1 : fn_misclf_idx]
473
- if int(datum_idx) >= 0
474
- ],
475
- fn_misclassification=metrics[iou_idx][score_idx][
515
+ ][fp_halluc_idx + 1 : fn_misclf_idx]
516
+ if int(datum_idx) >= 0
517
+ ]
518
+ for score_idx in range(n_scores)
519
+ ],
520
+ fn_misclassification=metrics[
521
+ iou_idx, :, label_idx, fn_misclf_idx
522
+ ]
523
+ .astype(int)
524
+ .tolist(),
525
+ fn_misclassification_examples=[
526
+ [
527
+ self.index_to_uid[int(datum_idx)]
528
+ for datum_idx in metrics[iou_idx][score_idx][
476
529
  label_idx
477
- ][fn_misclf_idx],
478
- fn_misclassification_examples=[
479
- self.index_to_uid[int(datum_idx)]
480
- for datum_idx in metrics[iou_idx][score_idx][
481
- label_idx
482
- ][fn_misclf_idx + 1 : fn_misprd_idx]
483
- if int(datum_idx) >= 0
484
- ],
485
- fn_missing_prediction=metrics[iou_idx][score_idx][
530
+ ][fn_misclf_idx + 1 : fn_misprd_idx]
531
+ if int(datum_idx) >= 0
532
+ ]
533
+ for score_idx in range(n_scores)
534
+ ],
535
+ fn_missing_prediction=metrics[
536
+ iou_idx, :, label_idx, fn_misprd_idx
537
+ ]
538
+ .astype(int)
539
+ .tolist(),
540
+ fn_missing_prediction_examples=[
541
+ [
542
+ self.index_to_uid[int(datum_idx)]
543
+ for datum_idx in metrics[iou_idx][score_idx][
486
544
  label_idx
487
- ][fn_misprd_idx],
488
- fn_missing_prediction_examples=[
489
- self.index_to_uid[int(datum_idx)]
490
- for datum_idx in metrics[iou_idx][score_idx][
491
- label_idx
492
- ][fn_misprd_idx + 1 :]
493
- if int(datum_idx) >= 0
494
- ],
495
- )
496
- )
497
- results.append(curve)
498
- return results
545
+ ][fn_misprd_idx + 1 :]
546
+ if int(datum_idx) >= 0
547
+ ]
548
+ for score_idx in range(n_scores)
549
+ ],
550
+ )
551
+ for iou_idx in range(n_ious)
552
+ ]
553
+ for label_idx in range(n_labels)
554
+ ]
499
555
 
500
556
 
501
557
  class DataLoader:
558
+ """
559
+ Object Detection DataLoader
560
+ """
561
+
502
562
  def __init__(self):
503
563
  self._evaluator = Evaluator()
504
564
  self.pairs = list()
@@ -506,6 +566,19 @@ class DataLoader:
506
566
  self.prediction_count = defaultdict(lambda: defaultdict(int))
507
567
 
508
568
  def _add_datum(self, uid: str) -> int:
569
+ """
570
+ Helper function for adding a datum to the cache.
571
+
572
+ Parameters
573
+ ----------
574
+ uid : str
575
+ The datum uid.
576
+
577
+ Returns
578
+ -------
579
+ int
580
+ The datum index.
581
+ """
509
582
  if uid not in self._evaluator.uid_to_index:
510
583
  index = len(self._evaluator.uid_to_index)
511
584
  self._evaluator.uid_to_index[uid] = index
@@ -513,6 +586,22 @@ class DataLoader:
513
586
  return self._evaluator.uid_to_index[uid]
514
587
 
515
588
  def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
589
+ """
590
+ Helper function for adding a label to the cache.
591
+
592
+ Parameters
593
+ ----------
594
+ label : tuple[str, str]
595
+ The label as a tuple in format (key, value).
596
+
597
+ Returns
598
+ -------
599
+ int
600
+ Label index.
601
+ int
602
+ Label key index.
603
+ """
604
+
516
605
  label_id = len(self._evaluator.index_to_label)
517
606
  label_key_id = len(self._evaluator.index_to_label_key)
518
607
  if label not in self._evaluator.label_to_index:
@@ -540,6 +629,16 @@ class DataLoader:
540
629
  detections: list[Detection],
541
630
  show_progress: bool = False,
542
631
  ):
632
+ """
633
+ Adds detections to the cache.
634
+
635
+ Parameters
636
+ ----------
637
+ detections : list[Detection]
638
+ A list of Detection objects.
639
+ show_progress : bool, default=False
640
+ Toggle for tqdm progress bar.
641
+ """
543
642
  disable_tqdm = not show_progress
544
643
  for detection in tqdm(detections, disable=disable_tqdm):
545
644
 
@@ -655,6 +754,17 @@ class DataLoader:
655
754
  detections: list[tuple[dict, dict]],
656
755
  show_progress: bool = False,
657
756
  ):
757
+ """
758
+ Adds Valor-format detections to the cache.
759
+
760
+ Parameters
761
+ ----------
762
+ detections : list[tuple[dict, dict]]
763
+ A list of groundtruth, prediction pairs in Valor-format dictionaries.
764
+ show_progress : bool, default=False
765
+ Toggle for tqdm progress bar.
766
+ """
767
+
658
768
  def _get_bbox_extrema(
659
769
  data: list[list[list[float]]],
660
770
  ) -> tuple[float, float, float, float]:
@@ -776,6 +886,14 @@ class DataLoader:
776
886
  self.pairs.append(np.array(pairs))
777
887
 
778
888
  def finalize(self) -> Evaluator:
889
+ """
890
+ Performs data finalization and some preprocessing steps.
891
+
892
+ Returns
893
+ -------
894
+ Evaluator
895
+ A ready-to-use evaluator object.
896
+ """
779
897
 
780
898
  self.pairs = [pair for pair in self.pairs if pair.size > 0]
781
899
  if len(self.pairs) == 0:
@@ -839,7 +957,7 @@ class DataLoader:
839
957
 
840
958
  self._evaluator._ranked_pairs = compute_ranked_pairs(
841
959
  self.pairs,
842
- label_counts=self._evaluator._label_metadata,
960
+ label_metadata=self._evaluator._label_metadata,
843
961
  )
844
962
 
845
963
  return self._evaluator