valor-lite 0.33.1__tar.gz → 0.33.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- {valor_lite-0.33.1 → valor_lite-0.33.3}/PKG-INFO +1 -1
- {valor_lite-0.33.1 → valor_lite-0.33.3}/benchmarks/benchmark_objdet.py +37 -13
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_average_precision.py +11 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_average_recall.py +74 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_counts.py +106 -20
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_dataloader.py +1 -1
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_detailed_counts.py +492 -531
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_evaluator.py +22 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_schemas.py +3 -2
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/detection/annotation.py +14 -2
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/detection/computation.py +225 -80
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/detection/manager.py +376 -239
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/detection/metric.py +32 -7
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite.egg-info/PKG-INFO +1 -1
- {valor_lite-0.33.1 → valor_lite-0.33.3}/LICENSE +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/README.md +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/benchmarks/.gitignore +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/examples/.gitignore +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/examples/coco-yolo.ipynb +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/pyproject.toml +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/setup.cfg +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/__init__.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/conftest.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_filtering.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_iou.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_pr_curve.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_precision.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_recall.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/tests/detection/test_stability.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/__init__.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/detection/__init__.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite/schemas.py +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite.egg-info/SOURCES.txt +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite.egg-info/dependency_links.txt +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite.egg-info/requires.txt +0 -0
- {valor_lite-0.33.1 → valor_lite-0.33.3}/valor_lite.egg-info/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ from time import time
|
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
10
|
from tqdm import tqdm
|
|
11
|
-
from valor_lite.detection import DataLoader
|
|
11
|
+
from valor_lite.detection import DataLoader, MetricType
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class AnnotationType(str, Enum):
|
|
@@ -258,24 +258,48 @@ def run_benchmarking_analysis(
|
|
|
258
258
|
f"Base precomputation timed out with limit of {limit}."
|
|
259
259
|
)
|
|
260
260
|
|
|
261
|
-
#
|
|
262
|
-
detailed_counts_time_no_samples, _ = time_it(
|
|
263
|
-
evaluator.compute_detailed_counts
|
|
264
|
-
)()
|
|
265
|
-
|
|
266
|
-
# test detailed counts with 3 samples
|
|
267
|
-
detailed_counts_time_three_samples, _ = time_it(
|
|
268
|
-
evaluator.compute_detailed_counts
|
|
269
|
-
)(n_samples=3)
|
|
270
|
-
|
|
271
|
-
# evaluate
|
|
261
|
+
# evaluate - base metrics only
|
|
272
262
|
eval_time, metrics = time_it(evaluator.evaluate)()
|
|
273
|
-
# print(metrics)
|
|
274
263
|
if eval_time > evaluation_timeout and evaluation_timeout != -1:
|
|
275
264
|
raise TimeoutError(
|
|
276
265
|
f"Base evaluation timed out with {evaluator.n_datums} datums."
|
|
277
266
|
)
|
|
278
267
|
|
|
268
|
+
# evaluate - base metrics + detailed counts with no samples
|
|
269
|
+
detailed_counts_time_no_samples, metrics = time_it(
|
|
270
|
+
evaluator.evaluate
|
|
271
|
+
)(
|
|
272
|
+
[
|
|
273
|
+
MetricType.DetailedCounts,
|
|
274
|
+
*MetricType.base_metrics(),
|
|
275
|
+
]
|
|
276
|
+
)
|
|
277
|
+
if (
|
|
278
|
+
detailed_counts_time_no_samples > evaluation_timeout
|
|
279
|
+
and evaluation_timeout != -1
|
|
280
|
+
):
|
|
281
|
+
raise TimeoutError(
|
|
282
|
+
f"Detailed evaluation w/ no samples timed out with {evaluator.n_datums} datums."
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# evaluate - base metrics + detailed counts with 3 samples
|
|
286
|
+
detailed_counts_time_three_samples, metrics = time_it(
|
|
287
|
+
evaluator.evaluate
|
|
288
|
+
)(
|
|
289
|
+
[
|
|
290
|
+
MetricType.DetailedCounts,
|
|
291
|
+
*MetricType.base_metrics(),
|
|
292
|
+
],
|
|
293
|
+
number_of_examples=3,
|
|
294
|
+
)
|
|
295
|
+
if (
|
|
296
|
+
detailed_counts_time_three_samples > evaluation_timeout
|
|
297
|
+
and evaluation_timeout != -1
|
|
298
|
+
):
|
|
299
|
+
raise TimeoutError(
|
|
300
|
+
f"Detailed w/ 3 samples evaluation timed out with {evaluator.n_datums} datums."
|
|
301
|
+
)
|
|
302
|
+
|
|
279
303
|
results.append(
|
|
280
304
|
Benchmark(
|
|
281
305
|
limit=limit,
|
|
@@ -629,6 +629,17 @@ def test_ap_ranked_pair_ordering(detection_ranked_pair_ordering: Detection):
|
|
|
629
629
|
loader.add_data(detections=[detection_ranked_pair_ordering])
|
|
630
630
|
evaluator = loader.finalize()
|
|
631
631
|
|
|
632
|
+
assert evaluator.metadata == {
|
|
633
|
+
"ignored_prediction_labels": [
|
|
634
|
+
("class", "label4"),
|
|
635
|
+
],
|
|
636
|
+
"missing_prediction_labels": [],
|
|
637
|
+
"n_datums": 1,
|
|
638
|
+
"n_groundtruths": 3,
|
|
639
|
+
"n_labels": 4,
|
|
640
|
+
"n_predictions": 4,
|
|
641
|
+
}
|
|
642
|
+
|
|
632
643
|
metrics = evaluator.evaluate(iou_thresholds=[0.5, 0.75])
|
|
633
644
|
|
|
634
645
|
actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]]
|
|
@@ -432,3 +432,77 @@ def test_ar_true_positive_deassignment(
|
|
|
432
432
|
assert m in expected_metrics
|
|
433
433
|
for m in expected_metrics:
|
|
434
434
|
assert m in actual_metrics
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def test_ar_ranked_pair_ordering(detection_ranked_pair_ordering: Detection):
|
|
438
|
+
|
|
439
|
+
loader = DataLoader()
|
|
440
|
+
loader.add_data(detections=[detection_ranked_pair_ordering])
|
|
441
|
+
evaluator = loader.finalize()
|
|
442
|
+
|
|
443
|
+
assert evaluator.metadata == {
|
|
444
|
+
"ignored_prediction_labels": [
|
|
445
|
+
("class", "label4"),
|
|
446
|
+
],
|
|
447
|
+
"missing_prediction_labels": [],
|
|
448
|
+
"n_datums": 1,
|
|
449
|
+
"n_groundtruths": 3,
|
|
450
|
+
"n_labels": 4,
|
|
451
|
+
"n_predictions": 4,
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
metrics = evaluator.evaluate(
|
|
455
|
+
iou_thresholds=[0.5, 0.75], score_thresholds=[0.0]
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]]
|
|
459
|
+
expected_metrics = expected_metrics = [
|
|
460
|
+
{
|
|
461
|
+
"type": "AR",
|
|
462
|
+
"value": 1.0,
|
|
463
|
+
"parameters": {
|
|
464
|
+
"score_threshold": 0.0,
|
|
465
|
+
"iou_thresholds": [0.5, 0.75],
|
|
466
|
+
"label": {"key": "class", "value": "label1"},
|
|
467
|
+
},
|
|
468
|
+
},
|
|
469
|
+
{
|
|
470
|
+
"type": "AR",
|
|
471
|
+
"value": 1.0,
|
|
472
|
+
"parameters": {
|
|
473
|
+
"score_threshold": 0.0,
|
|
474
|
+
"iou_thresholds": [0.5, 0.75],
|
|
475
|
+
"label": {"key": "class", "value": "label2"},
|
|
476
|
+
},
|
|
477
|
+
},
|
|
478
|
+
{
|
|
479
|
+
"type": "AR",
|
|
480
|
+
"value": 0.0,
|
|
481
|
+
"parameters": {
|
|
482
|
+
"score_threshold": 0.0,
|
|
483
|
+
"iou_thresholds": [0.5, 0.75],
|
|
484
|
+
"label": {"key": "class", "value": "label3"},
|
|
485
|
+
},
|
|
486
|
+
},
|
|
487
|
+
]
|
|
488
|
+
for m in actual_metrics:
|
|
489
|
+
assert m in expected_metrics
|
|
490
|
+
for m in expected_metrics:
|
|
491
|
+
assert m in actual_metrics
|
|
492
|
+
|
|
493
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.mAR]]
|
|
494
|
+
expected_metrics = expected_metrics = [
|
|
495
|
+
{
|
|
496
|
+
"type": "mAR",
|
|
497
|
+
"value": 0.6666666666666666,
|
|
498
|
+
"parameters": {
|
|
499
|
+
"score_threshold": 0.0,
|
|
500
|
+
"iou_thresholds": [0.5, 0.75],
|
|
501
|
+
"label_key": "class",
|
|
502
|
+
},
|
|
503
|
+
},
|
|
504
|
+
]
|
|
505
|
+
for m in actual_metrics:
|
|
506
|
+
assert m in expected_metrics
|
|
507
|
+
for m in expected_metrics:
|
|
508
|
+
assert m in actual_metrics
|
|
@@ -41,52 +41,52 @@ def test_counts_metrics(basic_detections: list[Detection]):
|
|
|
41
41
|
{
|
|
42
42
|
"type": "Counts",
|
|
43
43
|
"value": {
|
|
44
|
-
"tp":
|
|
45
|
-
"fp":
|
|
44
|
+
"tp": 1,
|
|
45
|
+
"fp": 0,
|
|
46
46
|
"fn": 1,
|
|
47
47
|
},
|
|
48
48
|
"parameters": {
|
|
49
49
|
"iou_threshold": 0.1,
|
|
50
50
|
"score_threshold": 0.0,
|
|
51
|
-
"label": {"key": "
|
|
51
|
+
"label": {"key": "k1", "value": "v1"},
|
|
52
52
|
},
|
|
53
53
|
},
|
|
54
54
|
{
|
|
55
55
|
"type": "Counts",
|
|
56
56
|
"value": {
|
|
57
|
-
"tp":
|
|
58
|
-
"fp":
|
|
57
|
+
"tp": 1,
|
|
58
|
+
"fp": 0,
|
|
59
59
|
"fn": 1,
|
|
60
60
|
},
|
|
61
61
|
"parameters": {
|
|
62
62
|
"iou_threshold": 0.6,
|
|
63
63
|
"score_threshold": 0.0,
|
|
64
|
-
"label": {"key": "
|
|
64
|
+
"label": {"key": "k1", "value": "v1"},
|
|
65
65
|
},
|
|
66
66
|
},
|
|
67
67
|
{
|
|
68
68
|
"type": "Counts",
|
|
69
69
|
"value": {
|
|
70
|
-
"tp":
|
|
70
|
+
"tp": 0,
|
|
71
71
|
"fp": 0,
|
|
72
|
-
"fn":
|
|
72
|
+
"fn": 2,
|
|
73
73
|
},
|
|
74
74
|
"parameters": {
|
|
75
75
|
"iou_threshold": 0.1,
|
|
76
|
-
"score_threshold": 0.
|
|
76
|
+
"score_threshold": 0.5,
|
|
77
77
|
"label": {"key": "k1", "value": "v1"},
|
|
78
78
|
},
|
|
79
79
|
},
|
|
80
80
|
{
|
|
81
81
|
"type": "Counts",
|
|
82
82
|
"value": {
|
|
83
|
-
"tp":
|
|
83
|
+
"tp": 0,
|
|
84
84
|
"fp": 0,
|
|
85
|
-
"fn":
|
|
85
|
+
"fn": 2,
|
|
86
86
|
},
|
|
87
87
|
"parameters": {
|
|
88
88
|
"iou_threshold": 0.6,
|
|
89
|
-
"score_threshold": 0.
|
|
89
|
+
"score_threshold": 0.5,
|
|
90
90
|
"label": {"key": "k1", "value": "v1"},
|
|
91
91
|
},
|
|
92
92
|
},
|
|
@@ -99,7 +99,7 @@ def test_counts_metrics(basic_detections: list[Detection]):
|
|
|
99
99
|
},
|
|
100
100
|
"parameters": {
|
|
101
101
|
"iou_threshold": 0.1,
|
|
102
|
-
"score_threshold": 0.
|
|
102
|
+
"score_threshold": 0.0,
|
|
103
103
|
"label": {"key": "k2", "value": "v2"},
|
|
104
104
|
},
|
|
105
105
|
},
|
|
@@ -112,7 +112,7 @@ def test_counts_metrics(basic_detections: list[Detection]):
|
|
|
112
112
|
},
|
|
113
113
|
"parameters": {
|
|
114
114
|
"iou_threshold": 0.6,
|
|
115
|
-
"score_threshold": 0.
|
|
115
|
+
"score_threshold": 0.0,
|
|
116
116
|
"label": {"key": "k2", "value": "v2"},
|
|
117
117
|
},
|
|
118
118
|
},
|
|
@@ -120,26 +120,26 @@ def test_counts_metrics(basic_detections: list[Detection]):
|
|
|
120
120
|
"type": "Counts",
|
|
121
121
|
"value": {
|
|
122
122
|
"tp": 0,
|
|
123
|
-
"fp":
|
|
124
|
-
"fn":
|
|
123
|
+
"fp": 1,
|
|
124
|
+
"fn": 1,
|
|
125
125
|
},
|
|
126
126
|
"parameters": {
|
|
127
127
|
"iou_threshold": 0.1,
|
|
128
128
|
"score_threshold": 0.5,
|
|
129
|
-
"label": {"key": "
|
|
129
|
+
"label": {"key": "k2", "value": "v2"},
|
|
130
130
|
},
|
|
131
131
|
},
|
|
132
132
|
{
|
|
133
133
|
"type": "Counts",
|
|
134
134
|
"value": {
|
|
135
135
|
"tp": 0,
|
|
136
|
-
"fp":
|
|
137
|
-
"fn":
|
|
136
|
+
"fp": 1,
|
|
137
|
+
"fn": 1,
|
|
138
138
|
},
|
|
139
139
|
"parameters": {
|
|
140
140
|
"iou_threshold": 0.6,
|
|
141
141
|
"score_threshold": 0.5,
|
|
142
|
-
"label": {"key": "
|
|
142
|
+
"label": {"key": "k2", "value": "v2"},
|
|
143
143
|
},
|
|
144
144
|
},
|
|
145
145
|
]
|
|
@@ -455,3 +455,89 @@ def test_counts_false_negatives_two_datums_one_only_with_different_class_high_co
|
|
|
455
455
|
assert m in actual_metrics
|
|
456
456
|
for m in actual_metrics:
|
|
457
457
|
assert m in expected_metrics
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def test_counts_ranked_pair_ordering(
|
|
461
|
+
detection_ranked_pair_ordering: Detection,
|
|
462
|
+
):
|
|
463
|
+
|
|
464
|
+
loader = DataLoader()
|
|
465
|
+
loader.add_data(detections=[detection_ranked_pair_ordering])
|
|
466
|
+
evaluator = loader.finalize()
|
|
467
|
+
|
|
468
|
+
assert evaluator.metadata == {
|
|
469
|
+
"ignored_prediction_labels": [
|
|
470
|
+
("class", "label4"),
|
|
471
|
+
],
|
|
472
|
+
"missing_prediction_labels": [],
|
|
473
|
+
"n_datums": 1,
|
|
474
|
+
"n_groundtruths": 3,
|
|
475
|
+
"n_labels": 4,
|
|
476
|
+
"n_predictions": 4,
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
metrics = evaluator.evaluate(
|
|
480
|
+
iou_thresholds=[0.5, 0.75], score_thresholds=[0.0]
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]]
|
|
484
|
+
expected_metrics = [
|
|
485
|
+
{
|
|
486
|
+
"type": "Counts",
|
|
487
|
+
"value": {"tp": 1, "fp": 0, "fn": 0},
|
|
488
|
+
"parameters": {
|
|
489
|
+
"iou_threshold": 0.5,
|
|
490
|
+
"score_threshold": 0.0,
|
|
491
|
+
"label": {"key": "class", "value": "label1"},
|
|
492
|
+
},
|
|
493
|
+
},
|
|
494
|
+
{
|
|
495
|
+
"type": "Counts",
|
|
496
|
+
"value": {"tp": 1, "fp": 0, "fn": 0},
|
|
497
|
+
"parameters": {
|
|
498
|
+
"iou_threshold": 0.75,
|
|
499
|
+
"score_threshold": 0.0,
|
|
500
|
+
"label": {"key": "class", "value": "label1"},
|
|
501
|
+
},
|
|
502
|
+
},
|
|
503
|
+
{
|
|
504
|
+
"type": "Counts",
|
|
505
|
+
"value": {"tp": 1, "fp": 0, "fn": 0},
|
|
506
|
+
"parameters": {
|
|
507
|
+
"iou_threshold": 0.5,
|
|
508
|
+
"score_threshold": 0.0,
|
|
509
|
+
"label": {"key": "class", "value": "label2"},
|
|
510
|
+
},
|
|
511
|
+
},
|
|
512
|
+
{
|
|
513
|
+
"type": "Counts",
|
|
514
|
+
"value": {"tp": 1, "fp": 0, "fn": 0},
|
|
515
|
+
"parameters": {
|
|
516
|
+
"iou_threshold": 0.75,
|
|
517
|
+
"score_threshold": 0.0,
|
|
518
|
+
"label": {"key": "class", "value": "label2"},
|
|
519
|
+
},
|
|
520
|
+
},
|
|
521
|
+
{
|
|
522
|
+
"type": "Counts",
|
|
523
|
+
"value": {"tp": 0, "fp": 1, "fn": 1},
|
|
524
|
+
"parameters": {
|
|
525
|
+
"iou_threshold": 0.5,
|
|
526
|
+
"score_threshold": 0.0,
|
|
527
|
+
"label": {"key": "class", "value": "label3"},
|
|
528
|
+
},
|
|
529
|
+
},
|
|
530
|
+
{
|
|
531
|
+
"type": "Counts",
|
|
532
|
+
"value": {"tp": 0, "fp": 1, "fn": 1},
|
|
533
|
+
"parameters": {
|
|
534
|
+
"iou_threshold": 0.75,
|
|
535
|
+
"score_threshold": 0.0,
|
|
536
|
+
"label": {"key": "class", "value": "label3"},
|
|
537
|
+
},
|
|
538
|
+
},
|
|
539
|
+
]
|
|
540
|
+
for m in actual_metrics:
|
|
541
|
+
assert m in expected_metrics
|
|
542
|
+
for m in expected_metrics:
|
|
543
|
+
assert m in actual_metrics
|
|
@@ -22,7 +22,7 @@ def test_valor_integration():
|
|
|
22
22
|
loader.add_data_from_valor_dict([(gt, pd)])
|
|
23
23
|
|
|
24
24
|
assert len(loader.pairs) == 1
|
|
25
|
-
assert loader.pairs[0].shape == (
|
|
25
|
+
assert loader.pairs[0].shape == (71, 7)
|
|
26
26
|
|
|
27
27
|
assert set(loader._evaluator.label_key_to_index.keys()) == {
|
|
28
28
|
"iscrowd",
|