valor-lite 0.33.6__tar.gz → 0.33.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {valor_lite-0.33.6 → valor_lite-0.33.7}/PKG-INFO +1 -1
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_average_precision.py +48 -32
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_average_recall.py +17 -20
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_confusion_matrix.py +10 -7
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_counts.py +40 -15
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_pr_curve.py +2 -3
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_precision.py +36 -13
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_recall.py +36 -13
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/manager.py +2 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/manager.py +9 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/PKG-INFO +1 -1
- {valor_lite-0.33.6 → valor_lite-0.33.7}/LICENSE +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/README.md +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/benchmarks/.gitignore +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/benchmarks/benchmark_classification.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/benchmarks/benchmark_objdet.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/examples/.gitignore +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/examples/object-detection.ipynb +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/examples/tabular_classification.ipynb +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/pyproject.toml +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/setup.cfg +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/conftest.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_accuracy.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_counts.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_dataloader.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_evaluator.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_f1.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_filtering.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_precision.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_recall.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_rocauc.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_schemas.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/classification/test_stability.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/conftest.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_dataloader.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_evaluator.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_filtering.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_iou.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_schemas.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/tests/detection/test_stability.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/annotation.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/computation.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/classification/metric.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/annotation.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/computation.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/detection/metric.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite/schemas.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/SOURCES.txt +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/dependency_links.txt +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/requires.txt +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.7}/valor_lite.egg-info/top_level.txt +0 -0
|
@@ -98,6 +98,7 @@ def test_ap_metrics(
|
|
|
98
98
|
|
|
99
99
|
metrics = evaluator.evaluate(
|
|
100
100
|
iou_thresholds=[0.1, 0.6],
|
|
101
|
+
as_dict=True,
|
|
101
102
|
)
|
|
102
103
|
|
|
103
104
|
assert evaluator.ignored_prediction_labels == []
|
|
@@ -108,7 +109,7 @@ def test_ap_metrics(
|
|
|
108
109
|
assert evaluator.n_predictions == 2
|
|
109
110
|
|
|
110
111
|
# test AP
|
|
111
|
-
actual_metrics = [m
|
|
112
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
112
113
|
expected_metrics = [
|
|
113
114
|
{
|
|
114
115
|
"type": "AP",
|
|
@@ -149,7 +150,7 @@ def test_ap_metrics(
|
|
|
149
150
|
assert m in actual_metrics
|
|
150
151
|
|
|
151
152
|
# test mAP
|
|
152
|
-
actual_metrics = [m
|
|
153
|
+
actual_metrics = [m for m in metrics[MetricType.mAP]]
|
|
153
154
|
expected_metrics = [
|
|
154
155
|
{
|
|
155
156
|
"type": "mAP",
|
|
@@ -190,9 +191,7 @@ def test_ap_metrics(
|
|
|
190
191
|
assert m in actual_metrics
|
|
191
192
|
|
|
192
193
|
# test AP Averaged Over IoUs
|
|
193
|
-
actual_metrics = [
|
|
194
|
-
m.to_dict() for m in metrics[MetricType.APAveragedOverIOUs]
|
|
195
|
-
]
|
|
194
|
+
actual_metrics = [m for m in metrics[MetricType.APAveragedOverIOUs]]
|
|
196
195
|
expected_metrics = [
|
|
197
196
|
{
|
|
198
197
|
"type": "APAveragedOverIOUs",
|
|
@@ -217,9 +216,7 @@ def test_ap_metrics(
|
|
|
217
216
|
assert m in actual_metrics
|
|
218
217
|
|
|
219
218
|
# test mAP Averaged Over IoUs
|
|
220
|
-
actual_metrics = [
|
|
221
|
-
m.to_dict() for m in metrics[MetricType.mAPAveragedOverIOUs]
|
|
222
|
-
]
|
|
219
|
+
actual_metrics = [m for m in metrics[MetricType.mAPAveragedOverIOUs]]
|
|
223
220
|
expected_metrics = [
|
|
224
221
|
{
|
|
225
222
|
"type": "mAPAveragedOverIOUs",
|
|
@@ -265,10 +262,11 @@ def test_ap_using_torch_metrics_example(
|
|
|
265
262
|
|
|
266
263
|
metrics = evaluator.evaluate(
|
|
267
264
|
iou_thresholds=[0.5, 0.75],
|
|
265
|
+
as_dict=True,
|
|
268
266
|
)
|
|
269
267
|
|
|
270
268
|
# test AP
|
|
271
|
-
actual_metrics = [m
|
|
269
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
272
270
|
expected_metrics = [
|
|
273
271
|
{
|
|
274
272
|
"type": "AP",
|
|
@@ -357,7 +355,7 @@ def test_ap_using_torch_metrics_example(
|
|
|
357
355
|
assert m in actual_metrics
|
|
358
356
|
|
|
359
357
|
# test mAP
|
|
360
|
-
actual_metrics = [m
|
|
358
|
+
actual_metrics = [m for m in metrics[MetricType.mAP]]
|
|
361
359
|
expected_metrics = [
|
|
362
360
|
{
|
|
363
361
|
"type": "mAP",
|
|
@@ -393,9 +391,12 @@ def test_ap_false_negatives_single_datum_baseline(
|
|
|
393
391
|
loader = DataLoader()
|
|
394
392
|
loader.add_bounding_boxes(false_negatives_single_datum_baseline_detections)
|
|
395
393
|
evaluator = loader.finalize()
|
|
396
|
-
metrics = evaluator.evaluate(
|
|
394
|
+
metrics = evaluator.evaluate(
|
|
395
|
+
iou_thresholds=[0.5],
|
|
396
|
+
as_dict=True,
|
|
397
|
+
)
|
|
397
398
|
|
|
398
|
-
actual_metrics = [m
|
|
399
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
399
400
|
expected_metrics = [
|
|
400
401
|
{
|
|
401
402
|
"type": "AP",
|
|
@@ -426,9 +427,12 @@ def test_ap_false_negatives_single_datum(
|
|
|
426
427
|
loader = DataLoader()
|
|
427
428
|
loader.add_bounding_boxes(false_negatives_single_datum_detections)
|
|
428
429
|
evaluator = loader.finalize()
|
|
429
|
-
metrics = evaluator.evaluate(
|
|
430
|
+
metrics = evaluator.evaluate(
|
|
431
|
+
iou_thresholds=[0.5],
|
|
432
|
+
as_dict=True,
|
|
433
|
+
)
|
|
430
434
|
|
|
431
|
-
actual_metrics = [m
|
|
435
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
432
436
|
expected_metrics = [
|
|
433
437
|
{
|
|
434
438
|
"type": "AP",
|
|
@@ -467,9 +471,12 @@ def test_ap_false_negatives_two_datums_one_empty_low_confidence_of_fp(
|
|
|
467
471
|
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
|
|
468
472
|
)
|
|
469
473
|
evaluator = loader.finalize()
|
|
470
|
-
metrics = evaluator.evaluate(
|
|
474
|
+
metrics = evaluator.evaluate(
|
|
475
|
+
iou_thresholds=[0.5],
|
|
476
|
+
as_dict=True,
|
|
477
|
+
)
|
|
471
478
|
|
|
472
|
-
actual_metrics = [m
|
|
479
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
473
480
|
expected_metrics = [
|
|
474
481
|
{
|
|
475
482
|
"type": "AP",
|
|
@@ -507,9 +514,12 @@ def test_ap_false_negatives_two_datums_one_empty_high_confidence_of_fp(
|
|
|
507
514
|
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
|
|
508
515
|
)
|
|
509
516
|
evaluator = loader.finalize()
|
|
510
|
-
metrics = evaluator.evaluate(
|
|
517
|
+
metrics = evaluator.evaluate(
|
|
518
|
+
iou_thresholds=[0.5],
|
|
519
|
+
as_dict=True,
|
|
520
|
+
)
|
|
511
521
|
|
|
512
|
-
actual_metrics = [m
|
|
522
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
513
523
|
expected_metrics = [
|
|
514
524
|
{
|
|
515
525
|
"type": "AP",
|
|
@@ -547,9 +557,12 @@ def test_ap_false_negatives_two_datums_one_only_with_different_class_low_confide
|
|
|
547
557
|
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
|
|
548
558
|
)
|
|
549
559
|
evaluator = loader.finalize()
|
|
550
|
-
metrics = evaluator.evaluate(
|
|
560
|
+
metrics = evaluator.evaluate(
|
|
561
|
+
iou_thresholds=[0.5],
|
|
562
|
+
as_dict=True,
|
|
563
|
+
)
|
|
551
564
|
|
|
552
|
-
actual_metrics = [m
|
|
565
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
553
566
|
expected_metrics = [
|
|
554
567
|
{
|
|
555
568
|
"type": "AP",
|
|
@@ -598,9 +611,12 @@ def test_ap_false_negatives_two_datums_one_only_with_different_class_high_confid
|
|
|
598
611
|
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
|
|
599
612
|
)
|
|
600
613
|
evaluator = loader.finalize()
|
|
601
|
-
metrics = evaluator.evaluate(
|
|
614
|
+
metrics = evaluator.evaluate(
|
|
615
|
+
iou_thresholds=[0.5],
|
|
616
|
+
as_dict=True,
|
|
617
|
+
)
|
|
602
618
|
|
|
603
|
-
actual_metrics = [m
|
|
619
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
604
620
|
expected_metrics = [
|
|
605
621
|
{
|
|
606
622
|
"type": "AP",
|
|
@@ -662,9 +678,12 @@ def test_ap_ranked_pair_ordering(
|
|
|
662
678
|
"n_predictions": 4,
|
|
663
679
|
}
|
|
664
680
|
|
|
665
|
-
metrics = evaluator.evaluate(
|
|
681
|
+
metrics = evaluator.evaluate(
|
|
682
|
+
iou_thresholds=[0.5, 0.75],
|
|
683
|
+
as_dict=True,
|
|
684
|
+
)
|
|
666
685
|
|
|
667
|
-
actual_metrics = [m
|
|
686
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
668
687
|
expected_metrics = [
|
|
669
688
|
{
|
|
670
689
|
"parameters": {
|
|
@@ -720,7 +739,7 @@ def test_ap_ranked_pair_ordering(
|
|
|
720
739
|
for m in expected_metrics:
|
|
721
740
|
assert m in actual_metrics
|
|
722
741
|
|
|
723
|
-
actual_metrics = [m
|
|
742
|
+
actual_metrics = [m for m in metrics[MetricType.mAP]]
|
|
724
743
|
expected_metrics = [
|
|
725
744
|
{
|
|
726
745
|
"parameters": {"label_key": "class", "iou_threshold": 0.5},
|
|
@@ -738,9 +757,7 @@ def test_ap_ranked_pair_ordering(
|
|
|
738
757
|
for m in expected_metrics:
|
|
739
758
|
assert m in actual_metrics
|
|
740
759
|
|
|
741
|
-
actual_metrics = [
|
|
742
|
-
m.to_dict() for m in metrics[MetricType.APAveragedOverIOUs]
|
|
743
|
-
]
|
|
760
|
+
actual_metrics = [m for m in metrics[MetricType.APAveragedOverIOUs]]
|
|
744
761
|
expected_metrics = [
|
|
745
762
|
{
|
|
746
763
|
"parameters": {
|
|
@@ -772,9 +789,7 @@ def test_ap_ranked_pair_ordering(
|
|
|
772
789
|
for m in expected_metrics:
|
|
773
790
|
assert m in actual_metrics
|
|
774
791
|
|
|
775
|
-
actual_metrics = [
|
|
776
|
-
m.to_dict() for m in metrics[MetricType.mAPAveragedOverIOUs]
|
|
777
|
-
]
|
|
792
|
+
actual_metrics = [m for m in metrics[MetricType.mAPAveragedOverIOUs]]
|
|
778
793
|
expected_metrics = [
|
|
779
794
|
{
|
|
780
795
|
"parameters": {
|
|
@@ -812,12 +827,13 @@ def test_ap_true_positive_deassignment(
|
|
|
812
827
|
metrics = evaluator.evaluate(
|
|
813
828
|
iou_thresholds=[0.5],
|
|
814
829
|
score_thresholds=[0.5],
|
|
830
|
+
as_dict=True,
|
|
815
831
|
)
|
|
816
832
|
|
|
817
833
|
assert len(metrics) == 14
|
|
818
834
|
|
|
819
835
|
# test AP
|
|
820
|
-
actual_metrics = [m
|
|
836
|
+
actual_metrics = [m for m in metrics[MetricType.AP]]
|
|
821
837
|
expected_metrics = [
|
|
822
838
|
{
|
|
823
839
|
"type": "AP",
|
|
@@ -102,6 +102,7 @@ def test_ar_metrics(
|
|
|
102
102
|
metrics = evaluator.evaluate(
|
|
103
103
|
iou_thresholds=[0.1, 0.6],
|
|
104
104
|
score_thresholds=[0.0],
|
|
105
|
+
as_dict=True,
|
|
105
106
|
)
|
|
106
107
|
|
|
107
108
|
assert evaluator.ignored_prediction_labels == []
|
|
@@ -112,7 +113,7 @@ def test_ar_metrics(
|
|
|
112
113
|
assert evaluator.n_predictions == 2
|
|
113
114
|
|
|
114
115
|
# test AR
|
|
115
|
-
actual_metrics = [m
|
|
116
|
+
actual_metrics = [m for m in metrics[MetricType.AR]]
|
|
116
117
|
expected_metrics = [
|
|
117
118
|
{
|
|
118
119
|
"type": "AR",
|
|
@@ -139,7 +140,7 @@ def test_ar_metrics(
|
|
|
139
140
|
assert m in actual_metrics
|
|
140
141
|
|
|
141
142
|
# test mAR
|
|
142
|
-
actual_metrics = [m
|
|
143
|
+
actual_metrics = [m for m in metrics[MetricType.mAR]]
|
|
143
144
|
expected_metrics = [
|
|
144
145
|
{
|
|
145
146
|
"type": "mAR",
|
|
@@ -166,9 +167,7 @@ def test_ar_metrics(
|
|
|
166
167
|
assert m in actual_metrics
|
|
167
168
|
|
|
168
169
|
# test AR Averaged Over IoUs
|
|
169
|
-
actual_metrics = [
|
|
170
|
-
m.to_dict() for m in metrics[MetricType.ARAveragedOverScores]
|
|
171
|
-
]
|
|
170
|
+
actual_metrics = [m for m in metrics[MetricType.ARAveragedOverScores]]
|
|
172
171
|
expected_metrics = [
|
|
173
172
|
{
|
|
174
173
|
"type": "ARAveragedOverScores",
|
|
@@ -195,9 +194,7 @@ def test_ar_metrics(
|
|
|
195
194
|
assert m in actual_metrics
|
|
196
195
|
|
|
197
196
|
# test mAR Averaged Over IoUs
|
|
198
|
-
actual_metrics = [
|
|
199
|
-
m.to_dict() for m in metrics[MetricType.mARAveragedOverScores]
|
|
200
|
-
]
|
|
197
|
+
actual_metrics = [m for m in metrics[MetricType.mARAveragedOverScores]]
|
|
201
198
|
expected_metrics = [
|
|
202
199
|
{
|
|
203
200
|
"type": "mARAveragedOverScores",
|
|
@@ -249,10 +246,11 @@ def test_ar_using_torch_metrics_example(
|
|
|
249
246
|
metrics = evaluator.evaluate(
|
|
250
247
|
iou_thresholds=iou_thresholds,
|
|
251
248
|
score_thresholds=score_thresholds,
|
|
249
|
+
as_dict=True,
|
|
252
250
|
)
|
|
253
251
|
|
|
254
252
|
# test AR
|
|
255
|
-
actual_metrics = [m
|
|
253
|
+
actual_metrics = [m for m in metrics[MetricType.AR]]
|
|
256
254
|
expected_metrics = [
|
|
257
255
|
{
|
|
258
256
|
"type": "AR",
|
|
@@ -306,7 +304,7 @@ def test_ar_using_torch_metrics_example(
|
|
|
306
304
|
assert m in actual_metrics
|
|
307
305
|
|
|
308
306
|
# test mAR
|
|
309
|
-
actual_metrics = [m
|
|
307
|
+
actual_metrics = [m for m in metrics[MetricType.mAR]]
|
|
310
308
|
expected_metrics = [
|
|
311
309
|
{
|
|
312
310
|
"type": "mAR",
|
|
@@ -324,9 +322,7 @@ def test_ar_using_torch_metrics_example(
|
|
|
324
322
|
assert m in actual_metrics
|
|
325
323
|
|
|
326
324
|
# test ARAveragedOverScores
|
|
327
|
-
actual_metrics = [
|
|
328
|
-
m.to_dict() for m in metrics[MetricType.ARAveragedOverScores]
|
|
329
|
-
]
|
|
325
|
+
actual_metrics = [m for m in metrics[MetricType.ARAveragedOverScores]]
|
|
330
326
|
expected_metrics = [
|
|
331
327
|
{
|
|
332
328
|
"type": "ARAveragedOverScores",
|
|
@@ -380,9 +376,7 @@ def test_ar_using_torch_metrics_example(
|
|
|
380
376
|
assert m in actual_metrics
|
|
381
377
|
|
|
382
378
|
# test mARAveragedOverScores
|
|
383
|
-
actual_metrics = [
|
|
384
|
-
m.to_dict() for m in metrics[MetricType.mARAveragedOverScores]
|
|
385
|
-
]
|
|
379
|
+
actual_metrics = [m for m in metrics[MetricType.mARAveragedOverScores]]
|
|
386
380
|
expected_metrics = [
|
|
387
381
|
{
|
|
388
382
|
"type": "mARAveragedOverScores",
|
|
@@ -418,12 +412,13 @@ def test_ar_true_positive_deassignment(
|
|
|
418
412
|
metrics = evaluator.evaluate(
|
|
419
413
|
iou_thresholds=[0.5],
|
|
420
414
|
score_thresholds=[0.5],
|
|
415
|
+
as_dict=True,
|
|
421
416
|
)
|
|
422
417
|
|
|
423
418
|
assert len(metrics) == 14
|
|
424
419
|
|
|
425
420
|
# test AR
|
|
426
|
-
actual_metrics = [m
|
|
421
|
+
actual_metrics = [m for m in metrics[MetricType.AR]]
|
|
427
422
|
expected_metrics = [
|
|
428
423
|
{
|
|
429
424
|
"type": "AR",
|
|
@@ -474,10 +469,12 @@ def test_ar_ranked_pair_ordering(
|
|
|
474
469
|
}
|
|
475
470
|
|
|
476
471
|
metrics = evaluator.evaluate(
|
|
477
|
-
iou_thresholds=[0.5, 0.75],
|
|
472
|
+
iou_thresholds=[0.5, 0.75],
|
|
473
|
+
score_thresholds=[0.0],
|
|
474
|
+
as_dict=True,
|
|
478
475
|
)
|
|
479
476
|
|
|
480
|
-
actual_metrics = [m
|
|
477
|
+
actual_metrics = [m for m in metrics[MetricType.AR]]
|
|
481
478
|
expected_metrics = expected_metrics = [
|
|
482
479
|
{
|
|
483
480
|
"type": "AR",
|
|
@@ -512,7 +509,7 @@ def test_ar_ranked_pair_ordering(
|
|
|
512
509
|
for m in expected_metrics:
|
|
513
510
|
assert m in actual_metrics
|
|
514
511
|
|
|
515
|
-
actual_metrics = [m
|
|
512
|
+
actual_metrics = [m for m in metrics[MetricType.mAR]]
|
|
516
513
|
expected_metrics = expected_metrics = [
|
|
517
514
|
{
|
|
518
515
|
"type": "mAR",
|
|
@@ -441,9 +441,10 @@ def test_confusion_matrix(
|
|
|
441
441
|
score_thresholds=[0.05, 0.3, 0.35, 0.45, 0.55, 0.95],
|
|
442
442
|
number_of_examples=1,
|
|
443
443
|
metrics_to_return=[MetricType.ConfusionMatrix],
|
|
444
|
+
as_dict=True,
|
|
444
445
|
)
|
|
445
446
|
|
|
446
|
-
actual_metrics = [m
|
|
447
|
+
actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
|
|
447
448
|
expected_metrics = [
|
|
448
449
|
{
|
|
449
450
|
"type": "ConfusionMatrix",
|
|
@@ -757,9 +758,10 @@ def test_confusion_matrix(
|
|
|
757
758
|
score_thresholds=[0.05, 0.3, 0.35, 0.45, 0.55, 0.95],
|
|
758
759
|
number_of_examples=1,
|
|
759
760
|
metrics_to_return=[MetricType.ConfusionMatrix],
|
|
761
|
+
as_dict=True,
|
|
760
762
|
)
|
|
761
763
|
|
|
762
|
-
actual_metrics = [m
|
|
764
|
+
actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
|
|
763
765
|
expected_metrics = [
|
|
764
766
|
{
|
|
765
767
|
"type": "ConfusionMatrix",
|
|
@@ -1129,11 +1131,12 @@ def test_confusion_matrix_using_torch_metrics_example(
|
|
|
1129
1131
|
score_thresholds=[0.05, 0.25, 0.35, 0.55, 0.75, 0.8, 0.85, 0.95],
|
|
1130
1132
|
number_of_examples=0,
|
|
1131
1133
|
metrics_to_return=[MetricType.ConfusionMatrix],
|
|
1134
|
+
as_dict=True,
|
|
1132
1135
|
)
|
|
1133
1136
|
|
|
1134
1137
|
assert len(metrics[MetricType.ConfusionMatrix]) == 16
|
|
1135
1138
|
|
|
1136
|
-
actual_metrics = [m
|
|
1139
|
+
actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
|
|
1137
1140
|
expected_metrics = [
|
|
1138
1141
|
{
|
|
1139
1142
|
"type": "ConfusionMatrix",
|
|
@@ -1540,11 +1543,12 @@ def test_confusion_matrix_fp_hallucination_edge_case(
|
|
|
1540
1543
|
score_thresholds=[0.5, 0.85],
|
|
1541
1544
|
number_of_examples=1,
|
|
1542
1545
|
metrics_to_return=[MetricType.ConfusionMatrix],
|
|
1546
|
+
as_dict=True,
|
|
1543
1547
|
)
|
|
1544
1548
|
|
|
1545
1549
|
assert len(metrics[MetricType.ConfusionMatrix]) == 2
|
|
1546
1550
|
|
|
1547
|
-
actual_metrics = [m
|
|
1551
|
+
actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
|
|
1548
1552
|
expected_metrics = [
|
|
1549
1553
|
{
|
|
1550
1554
|
"type": "ConfusionMatrix",
|
|
@@ -1667,11 +1671,10 @@ def test_confusion_matrix_ranked_pair_ordering(
|
|
|
1667
1671
|
score_thresholds=[0.0],
|
|
1668
1672
|
number_of_examples=0,
|
|
1669
1673
|
metrics_to_return=[MetricType.ConfusionMatrix],
|
|
1674
|
+
as_dict=True,
|
|
1670
1675
|
)
|
|
1671
1676
|
|
|
1672
|
-
actual_metrics = [
|
|
1673
|
-
m.to_dict() for m in metrics[MetricType.ConfusionMatrix]
|
|
1674
|
-
]
|
|
1677
|
+
actual_metrics = [m for m in metrics[MetricType.ConfusionMatrix]]
|
|
1675
1678
|
expected_metrics = [
|
|
1676
1679
|
{
|
|
1677
1680
|
"type": "ConfusionMatrix",
|
|
@@ -33,6 +33,7 @@ def test_counts_metrics(
|
|
|
33
33
|
metrics = evaluator.evaluate(
|
|
34
34
|
iou_thresholds=[0.1, 0.6],
|
|
35
35
|
score_thresholds=[0.0, 0.5],
|
|
36
|
+
as_dict=True,
|
|
36
37
|
)
|
|
37
38
|
|
|
38
39
|
assert evaluator.ignored_prediction_labels == []
|
|
@@ -43,7 +44,7 @@ def test_counts_metrics(
|
|
|
43
44
|
assert evaluator.n_predictions == 2
|
|
44
45
|
|
|
45
46
|
# test Counts
|
|
46
|
-
actual_metrics = [m
|
|
47
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
47
48
|
expected_metrics = [
|
|
48
49
|
{
|
|
49
50
|
"type": "Counts",
|
|
@@ -169,10 +170,12 @@ def test_counts_false_negatives_single_datum_baseline(
|
|
|
169
170
|
evaluator = loader.finalize()
|
|
170
171
|
|
|
171
172
|
metrics = evaluator.evaluate(
|
|
172
|
-
iou_thresholds=[0.5],
|
|
173
|
+
iou_thresholds=[0.5],
|
|
174
|
+
score_thresholds=[0.0, 0.9],
|
|
175
|
+
as_dict=True,
|
|
173
176
|
)
|
|
174
177
|
|
|
175
|
-
actual_metrics = [m
|
|
178
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
176
179
|
expected_metrics = [
|
|
177
180
|
{
|
|
178
181
|
"type": "Counts",
|
|
@@ -224,9 +227,13 @@ def test_counts_false_negatives_single_datum(
|
|
|
224
227
|
loader = DataLoader()
|
|
225
228
|
loader.add_bounding_boxes(false_negatives_single_datum_detections)
|
|
226
229
|
evaluator = loader.finalize()
|
|
227
|
-
metrics = evaluator.evaluate(
|
|
230
|
+
metrics = evaluator.evaluate(
|
|
231
|
+
iou_thresholds=[0.5],
|
|
232
|
+
score_thresholds=[0.0],
|
|
233
|
+
as_dict=True,
|
|
234
|
+
)
|
|
228
235
|
|
|
229
|
-
actual_metrics = [m
|
|
236
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
230
237
|
expected_metrics = [
|
|
231
238
|
{
|
|
232
239
|
"type": "Counts",
|
|
@@ -270,9 +277,13 @@ def test_counts_false_negatives_two_datums_one_empty_low_confidence_of_fp(
|
|
|
270
277
|
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
|
|
271
278
|
)
|
|
272
279
|
evaluator = loader.finalize()
|
|
273
|
-
metrics = evaluator.evaluate(
|
|
280
|
+
metrics = evaluator.evaluate(
|
|
281
|
+
iou_thresholds=[0.5],
|
|
282
|
+
score_thresholds=[0.0],
|
|
283
|
+
as_dict=True,
|
|
284
|
+
)
|
|
274
285
|
|
|
275
|
-
actual_metrics = [m
|
|
286
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
276
287
|
expected_metrics = [
|
|
277
288
|
{
|
|
278
289
|
"type": "Counts",
|
|
@@ -315,9 +326,13 @@ def test_counts_false_negatives_two_datums_one_empty_high_confidence_of_fp(
|
|
|
315
326
|
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
|
|
316
327
|
)
|
|
317
328
|
evaluator = loader.finalize()
|
|
318
|
-
metrics = evaluator.evaluate(
|
|
329
|
+
metrics = evaluator.evaluate(
|
|
330
|
+
iou_thresholds=[0.5],
|
|
331
|
+
score_thresholds=[0.0],
|
|
332
|
+
as_dict=True,
|
|
333
|
+
)
|
|
319
334
|
|
|
320
|
-
actual_metrics = [m
|
|
335
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
321
336
|
expected_metrics = [
|
|
322
337
|
{
|
|
323
338
|
"type": "Counts",
|
|
@@ -360,9 +375,13 @@ def test_counts_false_negatives_two_datums_one_only_with_different_class_low_con
|
|
|
360
375
|
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
|
|
361
376
|
)
|
|
362
377
|
evaluator = loader.finalize()
|
|
363
|
-
metrics = evaluator.evaluate(
|
|
378
|
+
metrics = evaluator.evaluate(
|
|
379
|
+
iou_thresholds=[0.5],
|
|
380
|
+
score_thresholds=[0.0],
|
|
381
|
+
as_dict=True,
|
|
382
|
+
)
|
|
364
383
|
|
|
365
|
-
actual_metrics = [m
|
|
384
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
366
385
|
expected_metrics = [
|
|
367
386
|
{
|
|
368
387
|
"type": "Counts",
|
|
@@ -421,9 +440,13 @@ def test_counts_false_negatives_two_datums_one_only_with_different_class_high_co
|
|
|
421
440
|
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
|
|
422
441
|
)
|
|
423
442
|
evaluator = loader.finalize()
|
|
424
|
-
metrics = evaluator.evaluate(
|
|
443
|
+
metrics = evaluator.evaluate(
|
|
444
|
+
iou_thresholds=[0.5],
|
|
445
|
+
score_thresholds=[0.0],
|
|
446
|
+
as_dict=True,
|
|
447
|
+
)
|
|
425
448
|
|
|
426
|
-
actual_metrics = [m
|
|
449
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
427
450
|
expected_metrics = [
|
|
428
451
|
{
|
|
429
452
|
"type": "Counts",
|
|
@@ -497,10 +520,12 @@ def test_counts_ranked_pair_ordering(
|
|
|
497
520
|
}
|
|
498
521
|
|
|
499
522
|
metrics = evaluator.evaluate(
|
|
500
|
-
iou_thresholds=[0.5, 0.75],
|
|
523
|
+
iou_thresholds=[0.5, 0.75],
|
|
524
|
+
score_thresholds=[0.0],
|
|
525
|
+
as_dict=True,
|
|
501
526
|
)
|
|
502
527
|
|
|
503
|
-
actual_metrics = [m
|
|
528
|
+
actual_metrics = [m for m in metrics[MetricType.Counts]]
|
|
504
529
|
expected_metrics = [
|
|
505
530
|
{
|
|
506
531
|
"type": "Counts",
|
|
@@ -56,6 +56,7 @@ def test_pr_curve_using_torch_metrics_example(
|
|
|
56
56
|
|
|
57
57
|
metrics = evaluator.evaluate(
|
|
58
58
|
iou_thresholds=[0.5, 0.75],
|
|
59
|
+
as_dict=True,
|
|
59
60
|
)
|
|
60
61
|
|
|
61
62
|
# AP = 1.0
|
|
@@ -86,9 +87,7 @@ def test_pr_curve_using_torch_metrics_example(
|
|
|
86
87
|
)
|
|
87
88
|
|
|
88
89
|
# test PrecisionRecallCurve
|
|
89
|
-
actual_metrics = [
|
|
90
|
-
m.to_dict() for m in metrics[MetricType.PrecisionRecallCurve]
|
|
91
|
-
]
|
|
90
|
+
actual_metrics = [m for m in metrics[MetricType.PrecisionRecallCurve]]
|
|
92
91
|
expected_metrics = [
|
|
93
92
|
{
|
|
94
93
|
"type": "PrecisionRecallCurve",
|
|
@@ -32,6 +32,7 @@ def test_precision_metrics(
|
|
|
32
32
|
metrics = evaluator.evaluate(
|
|
33
33
|
iou_thresholds=[0.1, 0.6],
|
|
34
34
|
score_thresholds=[0.0, 0.5],
|
|
35
|
+
as_dict=True,
|
|
35
36
|
)
|
|
36
37
|
|
|
37
38
|
assert evaluator.ignored_prediction_labels == []
|
|
@@ -42,7 +43,7 @@ def test_precision_metrics(
|
|
|
42
43
|
assert evaluator.n_predictions == 2
|
|
43
44
|
|
|
44
45
|
# test Precision
|
|
45
|
-
actual_metrics = [m
|
|
46
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
46
47
|
expected_metrics = [
|
|
47
48
|
{
|
|
48
49
|
"type": "Precision",
|
|
@@ -136,10 +137,12 @@ def test_precision_false_negatives_single_datum_baseline(
|
|
|
136
137
|
evaluator = loader.finalize()
|
|
137
138
|
|
|
138
139
|
metrics = evaluator.evaluate(
|
|
139
|
-
iou_thresholds=[0.5],
|
|
140
|
+
iou_thresholds=[0.5],
|
|
141
|
+
score_thresholds=[0.0, 0.9],
|
|
142
|
+
as_dict=True,
|
|
140
143
|
)
|
|
141
144
|
|
|
142
|
-
actual_metrics = [m
|
|
145
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
143
146
|
expected_metrics = [
|
|
144
147
|
{
|
|
145
148
|
"type": "Precision",
|
|
@@ -183,9 +186,13 @@ def test_precision_false_negatives_single_datum(
|
|
|
183
186
|
loader = DataLoader()
|
|
184
187
|
loader.add_bounding_boxes(false_negatives_single_datum_detections)
|
|
185
188
|
evaluator = loader.finalize()
|
|
186
|
-
metrics = evaluator.evaluate(
|
|
189
|
+
metrics = evaluator.evaluate(
|
|
190
|
+
iou_thresholds=[0.5],
|
|
191
|
+
score_thresholds=[0.0],
|
|
192
|
+
as_dict=True,
|
|
193
|
+
)
|
|
187
194
|
|
|
188
|
-
actual_metrics = [m
|
|
195
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
189
196
|
expected_metrics = [
|
|
190
197
|
{
|
|
191
198
|
"type": "Precision",
|
|
@@ -225,9 +232,13 @@ def test_precision_false_negatives_two_datums_one_empty_low_confidence_of_fp(
|
|
|
225
232
|
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
|
|
226
233
|
)
|
|
227
234
|
evaluator = loader.finalize()
|
|
228
|
-
metrics = evaluator.evaluate(
|
|
235
|
+
metrics = evaluator.evaluate(
|
|
236
|
+
iou_thresholds=[0.5],
|
|
237
|
+
score_thresholds=[0.0],
|
|
238
|
+
as_dict=True,
|
|
239
|
+
)
|
|
229
240
|
|
|
230
|
-
actual_metrics = [m
|
|
241
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
231
242
|
expected_metrics = [
|
|
232
243
|
{
|
|
233
244
|
"type": "Precision",
|
|
@@ -266,9 +277,13 @@ def test_precision_false_negatives_two_datums_one_empty_high_confidence_of_fp(
|
|
|
266
277
|
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
|
|
267
278
|
)
|
|
268
279
|
evaluator = loader.finalize()
|
|
269
|
-
metrics = evaluator.evaluate(
|
|
280
|
+
metrics = evaluator.evaluate(
|
|
281
|
+
iou_thresholds=[0.5],
|
|
282
|
+
score_thresholds=[0.0],
|
|
283
|
+
as_dict=True,
|
|
284
|
+
)
|
|
270
285
|
|
|
271
|
-
actual_metrics = [m
|
|
286
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
272
287
|
expected_metrics = [
|
|
273
288
|
{
|
|
274
289
|
"type": "Precision",
|
|
@@ -307,9 +322,13 @@ def test_precision_false_negatives_two_datums_one_only_with_different_class_low_
|
|
|
307
322
|
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
|
|
308
323
|
)
|
|
309
324
|
evaluator = loader.finalize()
|
|
310
|
-
metrics = evaluator.evaluate(
|
|
325
|
+
metrics = evaluator.evaluate(
|
|
326
|
+
iou_thresholds=[0.5],
|
|
327
|
+
score_thresholds=[0.0],
|
|
328
|
+
as_dict=True,
|
|
329
|
+
)
|
|
311
330
|
|
|
312
|
-
actual_metrics = [m
|
|
331
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
313
332
|
expected_metrics = [
|
|
314
333
|
{
|
|
315
334
|
"type": "Precision",
|
|
@@ -360,9 +379,13 @@ def test_precision_false_negatives_two_datums_one_only_with_different_class_high
|
|
|
360
379
|
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
|
|
361
380
|
)
|
|
362
381
|
evaluator = loader.finalize()
|
|
363
|
-
metrics = evaluator.evaluate(
|
|
382
|
+
metrics = evaluator.evaluate(
|
|
383
|
+
iou_thresholds=[0.5],
|
|
384
|
+
score_thresholds=[0.0],
|
|
385
|
+
as_dict=True,
|
|
386
|
+
)
|
|
364
387
|
|
|
365
|
-
actual_metrics = [m
|
|
388
|
+
actual_metrics = [m for m in metrics[MetricType.Precision]]
|
|
366
389
|
expected_metrics = [
|
|
367
390
|
{
|
|
368
391
|
"type": "Precision",
|
|
@@ -32,6 +32,7 @@ def test_recall_metrics(
|
|
|
32
32
|
metrics = evaluator.evaluate(
|
|
33
33
|
iou_thresholds=[0.1, 0.6],
|
|
34
34
|
score_thresholds=[0.0, 0.5],
|
|
35
|
+
as_dict=True,
|
|
35
36
|
)
|
|
36
37
|
|
|
37
38
|
assert evaluator.ignored_prediction_labels == []
|
|
@@ -42,7 +43,7 @@ def test_recall_metrics(
|
|
|
42
43
|
assert evaluator.n_predictions == 2
|
|
43
44
|
|
|
44
45
|
# test Recall
|
|
45
|
-
actual_metrics = [m
|
|
46
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
46
47
|
expected_metrics = [
|
|
47
48
|
{
|
|
48
49
|
"type": "Recall",
|
|
@@ -136,10 +137,12 @@ def test_recall_false_negatives_single_datum_baseline(
|
|
|
136
137
|
evaluator = loader.finalize()
|
|
137
138
|
|
|
138
139
|
metrics = evaluator.evaluate(
|
|
139
|
-
iou_thresholds=[0.5],
|
|
140
|
+
iou_thresholds=[0.5],
|
|
141
|
+
score_thresholds=[0.0, 0.9],
|
|
142
|
+
as_dict=True,
|
|
140
143
|
)
|
|
141
144
|
|
|
142
|
-
actual_metrics = [m
|
|
145
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
143
146
|
expected_metrics = [
|
|
144
147
|
{
|
|
145
148
|
"type": "Recall",
|
|
@@ -183,9 +186,13 @@ def test_recall_false_negatives_single_datum(
|
|
|
183
186
|
loader = DataLoader()
|
|
184
187
|
loader.add_bounding_boxes(false_negatives_single_datum_detections)
|
|
185
188
|
evaluator = loader.finalize()
|
|
186
|
-
metrics = evaluator.evaluate(
|
|
189
|
+
metrics = evaluator.evaluate(
|
|
190
|
+
iou_thresholds=[0.5],
|
|
191
|
+
score_thresholds=[0.0],
|
|
192
|
+
as_dict=True,
|
|
193
|
+
)
|
|
187
194
|
|
|
188
|
-
actual_metrics = [m
|
|
195
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
189
196
|
expected_metrics = [
|
|
190
197
|
{
|
|
191
198
|
"type": "Recall",
|
|
@@ -225,9 +232,13 @@ def test_recall_false_negatives_two_datums_one_empty_low_confidence_of_fp(
|
|
|
225
232
|
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
|
|
226
233
|
)
|
|
227
234
|
evaluator = loader.finalize()
|
|
228
|
-
metrics = evaluator.evaluate(
|
|
235
|
+
metrics = evaluator.evaluate(
|
|
236
|
+
iou_thresholds=[0.5],
|
|
237
|
+
score_thresholds=[0.0],
|
|
238
|
+
as_dict=True,
|
|
239
|
+
)
|
|
229
240
|
|
|
230
|
-
actual_metrics = [m
|
|
241
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
231
242
|
expected_metrics = [
|
|
232
243
|
{
|
|
233
244
|
"type": "Recall",
|
|
@@ -266,9 +277,13 @@ def test_recall_false_negatives_two_datums_one_empty_high_confidence_of_fp(
|
|
|
266
277
|
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
|
|
267
278
|
)
|
|
268
279
|
evaluator = loader.finalize()
|
|
269
|
-
metrics = evaluator.evaluate(
|
|
280
|
+
metrics = evaluator.evaluate(
|
|
281
|
+
iou_thresholds=[0.5],
|
|
282
|
+
score_thresholds=[0.0],
|
|
283
|
+
as_dict=True,
|
|
284
|
+
)
|
|
270
285
|
|
|
271
|
-
actual_metrics = [m
|
|
286
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
272
287
|
expected_metrics = [
|
|
273
288
|
{
|
|
274
289
|
"type": "Recall",
|
|
@@ -307,9 +322,13 @@ def test_recall_false_negatives_two_datums_one_only_with_different_class_low_con
|
|
|
307
322
|
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
|
|
308
323
|
)
|
|
309
324
|
evaluator = loader.finalize()
|
|
310
|
-
metrics = evaluator.evaluate(
|
|
325
|
+
metrics = evaluator.evaluate(
|
|
326
|
+
iou_thresholds=[0.5],
|
|
327
|
+
score_thresholds=[0.0],
|
|
328
|
+
as_dict=True,
|
|
329
|
+
)
|
|
311
330
|
|
|
312
|
-
actual_metrics = [m
|
|
331
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
313
332
|
expected_metrics = [
|
|
314
333
|
{
|
|
315
334
|
"type": "Recall",
|
|
@@ -360,9 +379,13 @@ def test_recall_false_negatives_two_datums_one_only_with_different_class_high_co
|
|
|
360
379
|
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
|
|
361
380
|
)
|
|
362
381
|
evaluator = loader.finalize()
|
|
363
|
-
metrics = evaluator.evaluate(
|
|
382
|
+
metrics = evaluator.evaluate(
|
|
383
|
+
iou_thresholds=[0.5],
|
|
384
|
+
score_thresholds=[0.0],
|
|
385
|
+
as_dict=True,
|
|
386
|
+
)
|
|
364
387
|
|
|
365
|
-
actual_metrics = [m
|
|
388
|
+
actual_metrics = [m for m in metrics[MetricType.Recall]]
|
|
366
389
|
expected_metrics = [
|
|
367
390
|
{
|
|
368
391
|
"type": "Recall",
|
|
@@ -241,6 +241,8 @@ class Evaluator:
|
|
|
241
241
|
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
242
242
|
filter_ : Filter, optional
|
|
243
243
|
An optional filter object.
|
|
244
|
+
as_dict : bool, default=False
|
|
245
|
+
An option to return metrics as dictionaries.
|
|
244
246
|
|
|
245
247
|
Returns
|
|
246
248
|
-------
|
|
@@ -342,6 +342,7 @@ class Evaluator:
|
|
|
342
342
|
score_thresholds: list[float] = [0.5],
|
|
343
343
|
number_of_examples: int = 0,
|
|
344
344
|
filter_: Filter | None = None,
|
|
345
|
+
as_dict: bool = False,
|
|
345
346
|
) -> dict[MetricType, list]:
|
|
346
347
|
"""
|
|
347
348
|
Performs an evaluation and returns metrics.
|
|
@@ -358,6 +359,8 @@ class Evaluator:
|
|
|
358
359
|
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
359
360
|
filter_ : Filter, optional
|
|
360
361
|
An optional filter object.
|
|
362
|
+
as_dict : bool, default=False
|
|
363
|
+
An option to return metrics as dictionaries.
|
|
361
364
|
|
|
362
365
|
Returns
|
|
363
366
|
-------
|
|
@@ -559,6 +562,12 @@ class Evaluator:
|
|
|
559
562
|
if metric not in metrics_to_return:
|
|
560
563
|
del metrics[metric]
|
|
561
564
|
|
|
565
|
+
if as_dict:
|
|
566
|
+
return {
|
|
567
|
+
mtype: [metric.to_dict() for metric in mvalues]
|
|
568
|
+
for mtype, mvalues in metrics.items()
|
|
569
|
+
}
|
|
570
|
+
|
|
562
571
|
return metrics
|
|
563
572
|
|
|
564
573
|
def _unpack_confusion_matrix(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|