valor-lite 0.33.6__tar.gz → 0.33.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {valor_lite-0.33.6 → valor_lite-0.33.7}/PKG-INFO +1 -1
  2. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_average_precision.py +48 -32
  3. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_average_recall.py +17 -20
  4. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_confusion_matrix.py +10 -7
  5. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_counts.py +40 -15
  6. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_pr_curve.py +2 -3
  7. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_precision.py +36 -13
  8. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_recall.py +36 -13
  9. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/manager.py +2 -0
  10. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/manager.py +9 -0
  11. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/PKG-INFO +1 -1
  12. {valor_lite-0.33.6 → valor_lite-0.33.7}/LICENSE +0 -0
  13. {valor_lite-0.33.6 → valor_lite-0.33.7}/README.md +0 -0
  14. {valor_lite-0.33.6 → valor_lite-0.33.7}/benchmarks/.gitignore +0 -0
  15. {valor_lite-0.33.6 → valor_lite-0.33.7}/benchmarks/benchmark_classification.py +0 -0
  16. {valor_lite-0.33.6 → valor_lite-0.33.7}/benchmarks/benchmark_objdet.py +0 -0
  17. {valor_lite-0.33.6 → valor_lite-0.33.7}/examples/.gitignore +0 -0
  18. {valor_lite-0.33.6 → valor_lite-0.33.7}/examples/object-detection.ipynb +0 -0
  19. {valor_lite-0.33.6 → valor_lite-0.33.7}/examples/tabular_classification.ipynb +0 -0
  20. {valor_lite-0.33.6 → valor_lite-0.33.7}/pyproject.toml +0 -0
  21. {valor_lite-0.33.6 → valor_lite-0.33.7}/setup.cfg +0 -0
  22. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/conftest.py +0 -0
  23. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_accuracy.py +0 -0
  24. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_confusion_matrix.py +0 -0
  25. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_counts.py +0 -0
  26. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_dataloader.py +0 -0
  27. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_evaluator.py +0 -0
  28. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_f1.py +0 -0
  29. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_filtering.py +0 -0
  30. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_precision.py +0 -0
  31. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_recall.py +0 -0
  32. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_rocauc.py +0 -0
  33. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_schemas.py +0 -0
  34. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_stability.py +0 -0
  35. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/__init__.py +0 -0
  36. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/conftest.py +0 -0
  37. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_dataloader.py +0 -0
  38. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_evaluator.py +0 -0
  39. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_filtering.py +0 -0
  40. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_iou.py +0 -0
  41. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_schemas.py +0 -0
  42. {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_stability.py +0 -0
  43. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/__init__.py +0 -0
  44. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/__init__.py +0 -0
  45. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/annotation.py +0 -0
  46. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/computation.py +0 -0
  47. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/metric.py +0 -0
  48. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/__init__.py +0 -0
  49. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/annotation.py +0 -0
  50. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/computation.py +0 -0
  51. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/metric.py +0 -0
  52. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/schemas.py +0 -0
  53. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/SOURCES.txt +0 -0
  54. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/dependency_links.txt +0 -0
  55. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/requires.txt +0 -0
  56. {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.6
3
+ Version: 0.33.7
4
4
  Summary: Compute valor metrics directly in your client.
5
5
  License: MIT License
6
6
 
@@ -98,6 +98,7 @@ def test_ap_metrics(
98
98
 
99
99
  metrics = evaluator.evaluate(
100
100
  iou_thresholds=[0.1, 0.6],
101
+ as_dict=True,
101
102
  )
102
103
 
103
104
  assert evaluator.ignored_prediction_labels == []
@@ -108,7 +109,7 @@ def test_ap_metrics(
108
109
  assert evaluator.n_predictions == 2
109
110
 
110
111
  # test AP
111
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
112
+ actual_metrics = [m for m in metrics[MetricType.AP]]
112
113
  expected_metrics = [
113
114
  {
114
115
  "type": "AP",
@@ -149,7 +150,7 @@ def test_ap_metrics(
149
150
  assert m in actual_metrics
150
151
 
151
152
  # test mAP
152
- actual_metrics = [m.to_dict() for m in metrics[MetricType.mAP]]
153
+ actual_metrics = [m for m in metrics[MetricType.mAP]]
153
154
  expected_metrics = [
154
155
  {
155
156
  "type": "mAP",
@@ -190,9 +191,7 @@ def test_ap_metrics(
190
191
  assert m in actual_metrics
191
192
 
192
193
  # test AP Averaged Over IoUs
193
- actual_metrics = [
194
- m.to_dict() for m in metrics[MetricType.APAveragedOverIOUs]
195
- ]
194
+ actual_metrics = [m for m in metrics[MetricType.APAveragedOverIOUs]]
196
195
  expected_metrics = [
197
196
  {
198
197
  "type": "APAveragedOverIOUs",
@@ -217,9 +216,7 @@ def test_ap_metrics(
217
216
  assert m in actual_metrics
218
217
 
219
218
  # test mAP Averaged Over IoUs
220
- actual_metrics = [
221
- m.to_dict() for m in metrics[MetricType.mAPAveragedOverIOUs]
222
- ]
219
+ actual_metrics = [m for m in metrics[MetricType.mAPAveragedOverIOUs]]
223
220
  expected_metrics = [
224
221
  {
225
222
  "type": "mAPAveragedOverIOUs",
@@ -265,10 +262,11 @@ def test_ap_using_torch_metrics_example(
265
262
 
266
263
  metrics = evaluator.evaluate(
267
264
  iou_thresholds=[0.5, 0.75],
265
+ as_dict=True,
268
266
  )
269
267
 
270
268
  # test AP
271
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
269
+ actual_metrics = [m for m in metrics[MetricType.AP]]
272
270
  expected_metrics = [
273
271
  {
274
272
  "type": "AP",
@@ -357,7 +355,7 @@ def test_ap_using_torch_metrics_example(
357
355
  assert m in actual_metrics
358
356
 
359
357
  # test mAP
360
- actual_metrics = [m.to_dict() for m in metrics[MetricType.mAP]]
358
+ actual_metrics = [m for m in metrics[MetricType.mAP]]
361
359
  expected_metrics = [
362
360
  {
363
361
  "type": "mAP",
@@ -393,9 +391,12 @@ def test_ap_false_negatives_single_datum_baseline(
393
391
  loader = DataLoader()
394
392
  loader.add_bounding_boxes(false_negatives_single_datum_baseline_detections)
395
393
  evaluator = loader.finalize()
396
- metrics = evaluator.evaluate(iou_thresholds=[0.5])
394
+ metrics = evaluator.evaluate(
395
+ iou_thresholds=[0.5],
396
+ as_dict=True,
397
+ )
397
398
 
398
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
399
+ actual_metrics = [m for m in metrics[MetricType.AP]]
399
400
  expected_metrics = [
400
401
  {
401
402
  "type": "AP",
@@ -426,9 +427,12 @@ def test_ap_false_negatives_single_datum(
426
427
  loader = DataLoader()
427
428
  loader.add_bounding_boxes(false_negatives_single_datum_detections)
428
429
  evaluator = loader.finalize()
429
- metrics = evaluator.evaluate(iou_thresholds=[0.5])
430
+ metrics = evaluator.evaluate(
431
+ iou_thresholds=[0.5],
432
+ as_dict=True,
433
+ )
430
434
 
431
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
435
+ actual_metrics = [m for m in metrics[MetricType.AP]]
432
436
  expected_metrics = [
433
437
  {
434
438
  "type": "AP",
@@ -467,9 +471,12 @@ def test_ap_false_negatives_two_datums_one_empty_low_confidence_of_fp(
467
471
  false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
468
472
  )
469
473
  evaluator = loader.finalize()
470
- metrics = evaluator.evaluate(iou_thresholds=[0.5])
474
+ metrics = evaluator.evaluate(
475
+ iou_thresholds=[0.5],
476
+ as_dict=True,
477
+ )
471
478
 
472
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
479
+ actual_metrics = [m for m in metrics[MetricType.AP]]
473
480
  expected_metrics = [
474
481
  {
475
482
  "type": "AP",
@@ -507,9 +514,12 @@ def test_ap_false_negatives_two_datums_one_empty_high_confidence_of_fp(
507
514
  false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
508
515
  )
509
516
  evaluator = loader.finalize()
510
- metrics = evaluator.evaluate(iou_thresholds=[0.5])
517
+ metrics = evaluator.evaluate(
518
+ iou_thresholds=[0.5],
519
+ as_dict=True,
520
+ )
511
521
 
512
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
522
+ actual_metrics = [m for m in metrics[MetricType.AP]]
513
523
  expected_metrics = [
514
524
  {
515
525
  "type": "AP",
@@ -547,9 +557,12 @@ def test_ap_false_negatives_two_datums_one_only_with_different_class_low_confide
547
557
  false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
548
558
  )
549
559
  evaluator = loader.finalize()
550
- metrics = evaluator.evaluate(iou_thresholds=[0.5])
560
+ metrics = evaluator.evaluate(
561
+ iou_thresholds=[0.5],
562
+ as_dict=True,
563
+ )
551
564
 
552
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
565
+ actual_metrics = [m for m in metrics[MetricType.AP]]
553
566
  expected_metrics = [
554
567
  {
555
568
  "type": "AP",
@@ -598,9 +611,12 @@ def test_ap_false_negatives_two_datums_one_only_with_different_class_high_confid
598
611
  false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
599
612
  )
600
613
  evaluator = loader.finalize()
601
- metrics = evaluator.evaluate(iou_thresholds=[0.5])
614
+ metrics = evaluator.evaluate(
615
+ iou_thresholds=[0.5],
616
+ as_dict=True,
617
+ )
602
618
 
603
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
619
+ actual_metrics = [m for m in metrics[MetricType.AP]]
604
620
  expected_metrics = [
605
621
  {
606
622
  "type": "AP",
@@ -662,9 +678,12 @@ def test_ap_ranked_pair_ordering(
662
678
  "n_predictions": 4,
663
679
  }
664
680
 
665
- metrics = evaluator.evaluate(iou_thresholds=[0.5, 0.75])
681
+ metrics = evaluator.evaluate(
682
+ iou_thresholds=[0.5, 0.75],
683
+ as_dict=True,
684
+ )
666
685
 
667
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
686
+ actual_metrics = [m for m in metrics[MetricType.AP]]
668
687
  expected_metrics = [
669
688
  {
670
689
  "parameters": {
@@ -720,7 +739,7 @@ def test_ap_ranked_pair_ordering(
720
739
  for m in expected_metrics:
721
740
  assert m in actual_metrics
722
741
 
723
- actual_metrics = [m.to_dict() for m in metrics[MetricType.mAP]]
742
+ actual_metrics = [m for m in metrics[MetricType.mAP]]
724
743
  expected_metrics = [
725
744
  {
726
745
  "parameters": {"label_key": "class", "iou_threshold": 0.5},
@@ -738,9 +757,7 @@ def test_ap_ranked_pair_ordering(
738
757
  for m in expected_metrics:
739
758
  assert m in actual_metrics
740
759
 
741
- actual_metrics = [
742
- m.to_dict() for m in metrics[MetricType.APAveragedOverIOUs]
743
- ]
760
+ actual_metrics = [m for m in metrics[MetricType.APAveragedOverIOUs]]
744
761
  expected_metrics = [
745
762
  {
746
763
  "parameters": {
@@ -772,9 +789,7 @@ def test_ap_ranked_pair_ordering(
772
789
  for m in expected_metrics:
773
790
  assert m in actual_metrics
774
791
 
775
- actual_metrics = [
776
- m.to_dict() for m in metrics[MetricType.mAPAveragedOverIOUs]
777
- ]
792
+ actual_metrics = [m for m in metrics[MetricType.mAPAveragedOverIOUs]]
778
793
  expected_metrics = [
779
794
  {
780
795
  "parameters": {
@@ -812,12 +827,13 @@ def test_ap_true_positive_deassignment(
812
827
  metrics = evaluator.evaluate(
813
828
  iou_thresholds=[0.5],
814
829
  score_thresholds=[0.5],
830
+ as_dict=True,
815
831
  )
816
832
 
817
833
  assert len(metrics) == 14
818
834
 
819
835
  # test AP
820
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
836
+ actual_metrics = [m for m in metrics[MetricType.AP]]
821
837
  expected_metrics = [
822
838
  {
823
839
  "type": "AP",
@@ -102,6 +102,7 @@ def test_ar_metrics(
102
102
  metrics = evaluator.evaluate(
103
103
  iou_thresholds=[0.1, 0.6],
104
104
  score_thresholds=[0.0],
105
+ as_dict=True,
105
106
  )
106
107
 
107
108
  assert evaluator.ignored_prediction_labels == []
@@ -112,7 +113,7 @@ def test_ar_metrics(
112
113
  assert evaluator.n_predictions == 2
113
114
 
114
115
  # test AR
115
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]]
116
+ actual_metrics = [m for m in metrics[MetricType.AR]]
116
117
  expected_metrics = [
117
118
  {
118
119
  "type": "AR",
@@ -139,7 +140,7 @@ def test_ar_metrics(
139
140
  assert m in actual_metrics
140
141
 
141
142
  # test mAR
142
- actual_metrics = [m.to_dict() for m in metrics[MetricType.mAR]]
143
+ actual_metrics = [m for m in metrics[MetricType.mAR]]
143
144
  expected_metrics = [
144
145
  {
145
146
  "type": "mAR",
@@ -166,9 +167,7 @@ def test_ar_metrics(
166
167
  assert m in actual_metrics
167
168
 
168
169
  # test AR Averaged Over IoUs
169
- actual_metrics = [
170
- m.to_dict() for m in metrics[MetricType.ARAveragedOverScores]
171
- ]
170
+ actual_metrics = [m for m in metrics[MetricType.ARAveragedOverScores]]
172
171
  expected_metrics = [
173
172
  {
174
173
  "type": "ARAveragedOverScores",
@@ -195,9 +194,7 @@ def test_ar_metrics(
195
194
  assert m in actual_metrics
196
195
 
197
196
  # test mAR Averaged Over IoUs
198
- actual_metrics = [
199
- m.to_dict() for m in metrics[MetricType.mARAveragedOverScores]
200
- ]
197
+ actual_metrics = [m for m in metrics[MetricType.mARAveragedOverScores]]
201
198
  expected_metrics = [
202
199
  {
203
200
  "type": "mARAveragedOverScores",
@@ -249,10 +246,11 @@ def test_ar_using_torch_metrics_example(
249
246
  metrics = evaluator.evaluate(
250
247
  iou_thresholds=iou_thresholds,
251
248
  score_thresholds=score_thresholds,
249
+ as_dict=True,
252
250
  )
253
251
 
254
252
  # test AR
255
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]]
253
+ actual_metrics = [m for m in metrics[MetricType.AR]]
256
254
  expected_metrics = [
257
255
  {
258
256
  "type": "AR",
@@ -306,7 +304,7 @@ def test_ar_using_torch_metrics_example(
306
304
  assert m in actual_metrics
307
305
 
308
306
  # test mAR
309
- actual_metrics = [m.to_dict() for m in metrics[MetricType.mAR]]
307
+ actual_metrics = [m for m in metrics[MetricType.mAR]]
310
308
  expected_metrics = [
311
309
  {
312
310
  "type": "mAR",
@@ -324,9 +322,7 @@ def test_ar_using_torch_metrics_example(
324
322
  assert m in actual_metrics
325
323
 
326
324
  # test ARAveragedOverScores
327
- actual_metrics = [
328
- m.to_dict() for m in metrics[MetricType.ARAveragedOverScores]
329
- ]
325
+ actual_metrics = [m for m in metrics[MetricType.ARAveragedOverScores]]
330
326
  expected_metrics = [
331
327
  {
332
328
  "type": "ARAveragedOverScores",
@@ -380,9 +376,7 @@ def test_ar_using_torch_metrics_example(
380
376
  assert m in actual_metrics
381
377
 
382
378
  # test mARAveragedOverScores
383
- actual_metrics = [
384
- m.to_dict() for m in metrics[MetricType.mARAveragedOverScores]
385
- ]
379
+ actual_metrics = [m for m in metrics[MetricType.mARAveragedOverScores]]
386
380
  expected_metrics = [
387
381
  {
388
382
  "type": "mARAveragedOverScores",
@@ -418,12 +412,13 @@ def test_ar_true_positive_deassignment(
418
412
  metrics = evaluator.evaluate(
419
413
  iou_thresholds=[0.5],
420
414
  score_thresholds=[0.5],
415
+ as_dict=True,
421
416
  )
422
417
 
423
418
  assert len(metrics) == 14
424
419
 
425
420
  # test AR
426
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]]
421
+ actual_metrics = [m for m in metrics[MetricType.AR]]
427
422
  expected_metrics = [
428
423
  {
429
424
  "type": "AR",
@@ -474,10 +469,12 @@ def test_ar_ranked_pair_ordering(
474
469
  }
475
470
 
476
471
  metrics = evaluator.evaluate(
477
- iou_thresholds=[0.5, 0.75], score_thresholds=[0.0]
472
+ iou_thresholds=[0.5, 0.75],
473
+ score_thresholds=[0.0],
474
+ as_dict=True,
478
475
  )
479
476
 
480
- actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]]
477
+ actual_metrics = [m for m in metrics[MetricType.AR]]
481
478
  expected_metrics = expected_metrics = [
482
479
  {
483
480
  "type": "AR",
@@ -512,7 +509,7 @@ def test_ar_ranked_pair_ordering(
512
509
  for m in expected_metrics:
513
510
  assert m in actual_metrics
514
511
 
515
- actual_metrics = [m.to_dict() for m in metrics[MetricType.mAR]]
512
+ actual_metrics = [m for m in metrics[MetricType.mAR]]
516
513
  expected_metrics = expected_metrics = [
517
514
  {
518
515
  "type": "mAR",
@@ -441,9 +441,10 @@ def test_confusion_matrix(
441
441
  score_thresholds=[0.05, 0.3, 0.35, 0.45, 0.55, 0.95],
442
442
  number_of_examples=1,
443
443
  metrics_to_return=[MetricType.ConfusionMatrix],
444
+ as_dict=True,
444
445
  )
445
446
 
446
- actual_metrics = [m.to_dict() for m in metrics[MetricType.ConfusionMatrix]]
447
+ actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
447
448
  expected_metrics = [
448
449
  {
449
450
  "type": "ConfusionMatrix",
@@ -757,9 +758,10 @@ def test_confusion_matrix(
757
758
  score_thresholds=[0.05, 0.3, 0.35, 0.45, 0.55, 0.95],
758
759
  number_of_examples=1,
759
760
  metrics_to_return=[MetricType.ConfusionMatrix],
761
+ as_dict=True,
760
762
  )
761
763
 
762
- actual_metrics = [m.to_dict() for m in metrics[MetricType.ConfusionMatrix]]
764
+ actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
763
765
  expected_metrics = [
764
766
  {
765
767
  "type": "ConfusionMatrix",
@@ -1129,11 +1131,12 @@ def test_confusion_matrix_using_torch_metrics_example(
1129
1131
  score_thresholds=[0.05, 0.25, 0.35, 0.55, 0.75, 0.8, 0.85, 0.95],
1130
1132
  number_of_examples=0,
1131
1133
  metrics_to_return=[MetricType.ConfusionMatrix],
1134
+ as_dict=True,
1132
1135
  )
1133
1136
 
1134
1137
  assert len(metrics[MetricType.ConfusionMatrix]) == 16
1135
1138
 
1136
- actual_metrics = [m.to_dict() for m in metrics[MetricType.ConfusionMatrix]]
1139
+ actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
1137
1140
  expected_metrics = [
1138
1141
  {
1139
1142
  "type": "ConfusionMatrix",
@@ -1540,11 +1543,12 @@ def test_confusion_matrix_fp_hallucination_edge_case(
1540
1543
  score_thresholds=[0.5, 0.85],
1541
1544
  number_of_examples=1,
1542
1545
  metrics_to_return=[MetricType.ConfusionMatrix],
1546
+ as_dict=True,
1543
1547
  )
1544
1548
 
1545
1549
  assert len(metrics[MetricType.ConfusionMatrix]) == 2
1546
1550
 
1547
- actual_metrics = [m.to_dict() for m in metrics[MetricType.ConfusionMatrix]]
1551
+ actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
1548
1552
  expected_metrics = [
1549
1553
  {
1550
1554
  "type": "ConfusionMatrix",
@@ -1667,11 +1671,10 @@ def test_confusion_matrix_ranked_pair_ordering(
1667
1671
  score_thresholds=[0.0],
1668
1672
  number_of_examples=0,
1669
1673
  metrics_to_return=[MetricType.ConfusionMatrix],
1674
+ as_dict=True,
1670
1675
  )
1671
1676
 
1672
- actual_metrics = [
1673
- m.to_dict() for m in metrics[MetricType.ConfusionMatrix]
1674
- ]
1677
+ actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
1675
1678
  expected_metrics = [
1676
1679
  {
1677
1680
  "type": "ConfusionMatrix",
@@ -33,6 +33,7 @@ def test_counts_metrics(
33
33
  metrics = evaluator.evaluate(
34
34
  iou_thresholds=[0.1, 0.6],
35
35
  score_thresholds=[0.0, 0.5],
36
+ as_dict=True,
36
37
  )
37
38
 
38
39
  assert evaluator.ignored_prediction_labels == []
@@ -43,7 +44,7 @@ def test_counts_metrics(
43
44
  assert evaluator.n_predictions == 2
44
45
 
45
46
  # test Counts
46
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
47
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
47
48
  expected_metrics = [
48
49
  {
49
50
  "type": "Counts",
@@ -169,10 +170,12 @@ def test_counts_false_negatives_single_datum_baseline(
169
170
  evaluator = loader.finalize()
170
171
 
171
172
  metrics = evaluator.evaluate(
172
- iou_thresholds=[0.5], score_thresholds=[0.0, 0.9]
173
+ iou_thresholds=[0.5],
174
+ score_thresholds=[0.0, 0.9],
175
+ as_dict=True,
173
176
  )
174
177
 
175
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
178
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
176
179
  expected_metrics = [
177
180
  {
178
181
  "type": "Counts",
@@ -224,9 +227,13 @@ def test_counts_false_negatives_single_datum(
224
227
  loader = DataLoader()
225
228
  loader.add_bounding_boxes(false_negatives_single_datum_detections)
226
229
  evaluator = loader.finalize()
227
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
230
+ metrics = evaluator.evaluate(
231
+ iou_thresholds=[0.5],
232
+ score_thresholds=[0.0],
233
+ as_dict=True,
234
+ )
228
235
 
229
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
236
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
230
237
  expected_metrics = [
231
238
  {
232
239
  "type": "Counts",
@@ -270,9 +277,13 @@ def test_counts_false_negatives_two_datums_one_empty_low_confidence_of_fp(
270
277
  false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
271
278
  )
272
279
  evaluator = loader.finalize()
273
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
280
+ metrics = evaluator.evaluate(
281
+ iou_thresholds=[0.5],
282
+ score_thresholds=[0.0],
283
+ as_dict=True,
284
+ )
274
285
 
275
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
286
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
276
287
  expected_metrics = [
277
288
  {
278
289
  "type": "Counts",
@@ -315,9 +326,13 @@ def test_counts_false_negatives_two_datums_one_empty_high_confidence_of_fp(
315
326
  false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
316
327
  )
317
328
  evaluator = loader.finalize()
318
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
329
+ metrics = evaluator.evaluate(
330
+ iou_thresholds=[0.5],
331
+ score_thresholds=[0.0],
332
+ as_dict=True,
333
+ )
319
334
 
320
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
335
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
321
336
  expected_metrics = [
322
337
  {
323
338
  "type": "Counts",
@@ -360,9 +375,13 @@ def test_counts_false_negatives_two_datums_one_only_with_different_class_low_con
360
375
  false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
361
376
  )
362
377
  evaluator = loader.finalize()
363
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
378
+ metrics = evaluator.evaluate(
379
+ iou_thresholds=[0.5],
380
+ score_thresholds=[0.0],
381
+ as_dict=True,
382
+ )
364
383
 
365
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
384
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
366
385
  expected_metrics = [
367
386
  {
368
387
  "type": "Counts",
@@ -421,9 +440,13 @@ def test_counts_false_negatives_two_datums_one_only_with_different_class_high_co
421
440
  false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
422
441
  )
423
442
  evaluator = loader.finalize()
424
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
443
+ metrics = evaluator.evaluate(
444
+ iou_thresholds=[0.5],
445
+ score_thresholds=[0.0],
446
+ as_dict=True,
447
+ )
425
448
 
426
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
449
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
427
450
  expected_metrics = [
428
451
  {
429
452
  "type": "Counts",
@@ -497,10 +520,12 @@ def test_counts_ranked_pair_ordering(
497
520
  }
498
521
 
499
522
  metrics = evaluator.evaluate(
500
- iou_thresholds=[0.5, 0.75], score_thresholds=[0.0]
523
+ iou_thresholds=[0.5, 0.75],
524
+ score_thresholds=[0.0],
525
+ as_dict=True,
501
526
  )
502
527
 
503
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
528
+ actual_metrics = [m for m in metrics[MetricType.Counts]]
504
529
  expected_metrics = [
505
530
  {
506
531
  "type": "Counts",
@@ -56,6 +56,7 @@ def test_pr_curve_using_torch_metrics_example(
56
56
 
57
57
  metrics = evaluator.evaluate(
58
58
  iou_thresholds=[0.5, 0.75],
59
+ as_dict=True,
59
60
  )
60
61
 
61
62
  # AP = 1.0
@@ -86,9 +87,7 @@ def test_pr_curve_using_torch_metrics_example(
86
87
  )
87
88
 
88
89
  # test PrecisionRecallCurve
89
- actual_metrics = [
90
- m.to_dict() for m in metrics[MetricType.PrecisionRecallCurve]
91
- ]
90
+ actual_metrics = [m for m in metrics[MetricType.PrecisionRecallCurve]]
92
91
  expected_metrics = [
93
92
  {
94
93
  "type": "PrecisionRecallCurve",
@@ -32,6 +32,7 @@ def test_precision_metrics(
32
32
  metrics = evaluator.evaluate(
33
33
  iou_thresholds=[0.1, 0.6],
34
34
  score_thresholds=[0.0, 0.5],
35
+ as_dict=True,
35
36
  )
36
37
 
37
38
  assert evaluator.ignored_prediction_labels == []
@@ -42,7 +43,7 @@ def test_precision_metrics(
42
43
  assert evaluator.n_predictions == 2
43
44
 
44
45
  # test Precision
45
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
46
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
46
47
  expected_metrics = [
47
48
  {
48
49
  "type": "Precision",
@@ -136,10 +137,12 @@ def test_precision_false_negatives_single_datum_baseline(
136
137
  evaluator = loader.finalize()
137
138
 
138
139
  metrics = evaluator.evaluate(
139
- iou_thresholds=[0.5], score_thresholds=[0.0, 0.9]
140
+ iou_thresholds=[0.5],
141
+ score_thresholds=[0.0, 0.9],
142
+ as_dict=True,
140
143
  )
141
144
 
142
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
145
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
143
146
  expected_metrics = [
144
147
  {
145
148
  "type": "Precision",
@@ -183,9 +186,13 @@ def test_precision_false_negatives_single_datum(
183
186
  loader = DataLoader()
184
187
  loader.add_bounding_boxes(false_negatives_single_datum_detections)
185
188
  evaluator = loader.finalize()
186
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
189
+ metrics = evaluator.evaluate(
190
+ iou_thresholds=[0.5],
191
+ score_thresholds=[0.0],
192
+ as_dict=True,
193
+ )
187
194
 
188
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
195
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
189
196
  expected_metrics = [
190
197
  {
191
198
  "type": "Precision",
@@ -225,9 +232,13 @@ def test_precision_false_negatives_two_datums_one_empty_low_confidence_of_fp(
225
232
  false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
226
233
  )
227
234
  evaluator = loader.finalize()
228
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
235
+ metrics = evaluator.evaluate(
236
+ iou_thresholds=[0.5],
237
+ score_thresholds=[0.0],
238
+ as_dict=True,
239
+ )
229
240
 
230
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
241
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
231
242
  expected_metrics = [
232
243
  {
233
244
  "type": "Precision",
@@ -266,9 +277,13 @@ def test_precision_false_negatives_two_datums_one_empty_high_confidence_of_fp(
266
277
  false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
267
278
  )
268
279
  evaluator = loader.finalize()
269
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
280
+ metrics = evaluator.evaluate(
281
+ iou_thresholds=[0.5],
282
+ score_thresholds=[0.0],
283
+ as_dict=True,
284
+ )
270
285
 
271
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
286
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
272
287
  expected_metrics = [
273
288
  {
274
289
  "type": "Precision",
@@ -307,9 +322,13 @@ def test_precision_false_negatives_two_datums_one_only_with_different_class_low_
307
322
  false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
308
323
  )
309
324
  evaluator = loader.finalize()
310
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
325
+ metrics = evaluator.evaluate(
326
+ iou_thresholds=[0.5],
327
+ score_thresholds=[0.0],
328
+ as_dict=True,
329
+ )
311
330
 
312
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
331
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
313
332
  expected_metrics = [
314
333
  {
315
334
  "type": "Precision",
@@ -360,9 +379,13 @@ def test_precision_false_negatives_two_datums_one_only_with_different_class_high
360
379
  false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
361
380
  )
362
381
  evaluator = loader.finalize()
363
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
382
+ metrics = evaluator.evaluate(
383
+ iou_thresholds=[0.5],
384
+ score_thresholds=[0.0],
385
+ as_dict=True,
386
+ )
364
387
 
365
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]]
388
+ actual_metrics = [m for m in metrics[MetricType.Precision]]
366
389
  expected_metrics = [
367
390
  {
368
391
  "type": "Precision",
@@ -32,6 +32,7 @@ def test_recall_metrics(
32
32
  metrics = evaluator.evaluate(
33
33
  iou_thresholds=[0.1, 0.6],
34
34
  score_thresholds=[0.0, 0.5],
35
+ as_dict=True,
35
36
  )
36
37
 
37
38
  assert evaluator.ignored_prediction_labels == []
@@ -42,7 +43,7 @@ def test_recall_metrics(
42
43
  assert evaluator.n_predictions == 2
43
44
 
44
45
  # test Recall
45
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
46
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
46
47
  expected_metrics = [
47
48
  {
48
49
  "type": "Recall",
@@ -136,10 +137,12 @@ def test_recall_false_negatives_single_datum_baseline(
136
137
  evaluator = loader.finalize()
137
138
 
138
139
  metrics = evaluator.evaluate(
139
- iou_thresholds=[0.5], score_thresholds=[0.0, 0.9]
140
+ iou_thresholds=[0.5],
141
+ score_thresholds=[0.0, 0.9],
142
+ as_dict=True,
140
143
  )
141
144
 
142
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
145
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
143
146
  expected_metrics = [
144
147
  {
145
148
  "type": "Recall",
@@ -183,9 +186,13 @@ def test_recall_false_negatives_single_datum(
183
186
  loader = DataLoader()
184
187
  loader.add_bounding_boxes(false_negatives_single_datum_detections)
185
188
  evaluator = loader.finalize()
186
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
189
+ metrics = evaluator.evaluate(
190
+ iou_thresholds=[0.5],
191
+ score_thresholds=[0.0],
192
+ as_dict=True,
193
+ )
187
194
 
188
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
195
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
189
196
  expected_metrics = [
190
197
  {
191
198
  "type": "Recall",
@@ -225,9 +232,13 @@ def test_recall_false_negatives_two_datums_one_empty_low_confidence_of_fp(
225
232
  false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
226
233
  )
227
234
  evaluator = loader.finalize()
228
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
235
+ metrics = evaluator.evaluate(
236
+ iou_thresholds=[0.5],
237
+ score_thresholds=[0.0],
238
+ as_dict=True,
239
+ )
229
240
 
230
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
241
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
231
242
  expected_metrics = [
232
243
  {
233
244
  "type": "Recall",
@@ -266,9 +277,13 @@ def test_recall_false_negatives_two_datums_one_empty_high_confidence_of_fp(
266
277
  false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
267
278
  )
268
279
  evaluator = loader.finalize()
269
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
280
+ metrics = evaluator.evaluate(
281
+ iou_thresholds=[0.5],
282
+ score_thresholds=[0.0],
283
+ as_dict=True,
284
+ )
270
285
 
271
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
286
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
272
287
  expected_metrics = [
273
288
  {
274
289
  "type": "Recall",
@@ -307,9 +322,13 @@ def test_recall_false_negatives_two_datums_one_only_with_different_class_low_con
307
322
  false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
308
323
  )
309
324
  evaluator = loader.finalize()
310
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
325
+ metrics = evaluator.evaluate(
326
+ iou_thresholds=[0.5],
327
+ score_thresholds=[0.0],
328
+ as_dict=True,
329
+ )
311
330
 
312
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
331
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
313
332
  expected_metrics = [
314
333
  {
315
334
  "type": "Recall",
@@ -360,9 +379,13 @@ def test_recall_false_negatives_two_datums_one_only_with_different_class_high_co
360
379
  false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
361
380
  )
362
381
  evaluator = loader.finalize()
363
- metrics = evaluator.evaluate(iou_thresholds=[0.5], score_thresholds=[0.0])
382
+ metrics = evaluator.evaluate(
383
+ iou_thresholds=[0.5],
384
+ score_thresholds=[0.0],
385
+ as_dict=True,
386
+ )
364
387
 
365
- actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]]
388
+ actual_metrics = [m for m in metrics[MetricType.Recall]]
366
389
  expected_metrics = [
367
390
  {
368
391
  "type": "Recall",
@@ -241,6 +241,8 @@ class Evaluator:
241
241
  Maximum number of annotation examples to return in ConfusionMatrix.
242
242
  filter_ : Filter, optional
243
243
  An optional filter object.
244
+ as_dict : bool, default=False
245
+ An option to return metrics as dictionaries.
244
246
 
245
247
  Returns
246
248
  -------
@@ -342,6 +342,7 @@ class Evaluator:
342
342
  score_thresholds: list[float] = [0.5],
343
343
  number_of_examples: int = 0,
344
344
  filter_: Filter | None = None,
345
+ as_dict: bool = False,
345
346
  ) -> dict[MetricType, list]:
346
347
  """
347
348
  Performs an evaluation and returns metrics.
@@ -358,6 +359,8 @@ class Evaluator:
358
359
  Maximum number of annotation examples to return in ConfusionMatrix.
359
360
  filter_ : Filter, optional
360
361
  An optional filter object.
362
+ as_dict : bool, default=False
363
+ An option to return metrics as dictionaries.
361
364
 
362
365
  Returns
363
366
  -------
@@ -559,6 +562,12 @@ class Evaluator:
559
562
  if metric not in metrics_to_return:
560
563
  del metrics[metric]
561
564
 
565
+ if as_dict:
566
+ return {
567
+ mtype: [metric.to_dict() for metric in mvalues]
568
+ for mtype, mvalues in metrics.items()
569
+ }
570
+
562
571
  return metrics
563
572
 
564
573
  def _unpack_confusion_matrix(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.6
3
+ Version: 0.33.7
4
4
  Summary: Compute valor metrics directly in your client.
5
5
  License: MIT License
6
6
 
File without changes
File without changes
File without changes
File without changes