valor-lite 0.33.11__tar.gz → 0.33.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- {valor_lite-0.33.11/valor_lite.egg-info → valor_lite-0.33.13}/PKG-INFO +1 -1
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_accuracy.py +7 -90
- valor_lite-0.33.13/tests/object_detection/test_accuracy.py +492 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_average_precision.py +1 -1
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_average_recall.py +1 -1
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_pr_curve.py +1 -1
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/computation.py +2 -2
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/manager.py +8 -6
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/metric.py +29 -17
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/computation.py +21 -17
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/manager.py +11 -6
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/metric.py +23 -10
- {valor_lite-0.33.11 → valor_lite-0.33.13/valor_lite.egg-info}/PKG-INFO +1 -1
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/SOURCES.txt +1 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/LICENSE +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/README.md +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/benchmarks/.gitignore +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/benchmarks/benchmark_classification.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/benchmarks/benchmark_objdet.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/examples/.gitignore +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/examples/object-detection.ipynb +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/examples/tabular_classification.ipynb +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/pyproject.toml +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/setup.cfg +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/conftest.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_counts.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_dataloader.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_evaluator.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_f1.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_filtering.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_precision.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_recall.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_rocauc.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_schemas.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_stability.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/conftest.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_counts.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_dataloader.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_evaluator.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_filtering.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_iou.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_precision.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_recall.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_schemas.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_stability.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/conftest.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_accuracy.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_annotation.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_dataloader.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_evaluator.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_f1.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_filtering.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_iou.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_precision.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_recall.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_stability.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/text_generation/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/LICENSE +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/annotation.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/annotation.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/schemas.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/annotation.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/computation.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/manager.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/metric.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/text_generation/__init__.py +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/dependency_links.txt +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/requires.txt +0 -0
- {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/top_level.txt +0 -0
|
@@ -53,18 +53,12 @@ def test_accuracy_computation():
|
|
|
53
53
|
)
|
|
54
54
|
|
|
55
55
|
# score threshold, label, count metric
|
|
56
|
-
assert accuracy.shape == (2,
|
|
56
|
+
assert accuracy.shape == (2,)
|
|
57
57
|
|
|
58
58
|
# score >= 0.25
|
|
59
|
-
assert accuracy[0]
|
|
60
|
-
assert accuracy[0][1] == 1.0
|
|
61
|
-
assert accuracy[0][2] == 2 / 3
|
|
62
|
-
assert accuracy[0][3] == 1.0
|
|
59
|
+
assert accuracy[0] == 2 / 3
|
|
63
60
|
# score >= 0.75
|
|
64
|
-
assert accuracy[1]
|
|
65
|
-
assert accuracy[1][1] == 1.0
|
|
66
|
-
assert accuracy[1][2] == 2 / 3
|
|
67
|
-
assert accuracy[1][3] == 2 / 3
|
|
61
|
+
assert accuracy[1] == 1 / 3
|
|
68
62
|
|
|
69
63
|
|
|
70
64
|
def test_accuracy_basic(basic_classifications: list[Classification]):
|
|
@@ -87,20 +81,10 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
|
|
|
87
81
|
expected_metrics = [
|
|
88
82
|
{
|
|
89
83
|
"type": "Accuracy",
|
|
90
|
-
"value": [2 / 3,
|
|
84
|
+
"value": [2 / 3, 1 / 3],
|
|
91
85
|
"parameters": {
|
|
92
86
|
"score_thresholds": [0.25, 0.75],
|
|
93
87
|
"hardmax": True,
|
|
94
|
-
"label": "0",
|
|
95
|
-
},
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
"type": "Accuracy",
|
|
99
|
-
"value": [1.0, 2 / 3],
|
|
100
|
-
"parameters": {
|
|
101
|
-
"score_thresholds": [0.25, 0.75],
|
|
102
|
-
"hardmax": True,
|
|
103
|
-
"label": "3",
|
|
104
88
|
},
|
|
105
89
|
},
|
|
106
90
|
]
|
|
@@ -124,29 +108,10 @@ def test_accuracy_with_animal_example(
|
|
|
124
108
|
expected_metrics = [
|
|
125
109
|
{
|
|
126
110
|
"type": "Accuracy",
|
|
127
|
-
"value": [2.0 /
|
|
128
|
-
"parameters": {
|
|
129
|
-
"score_thresholds": [0.5],
|
|
130
|
-
"hardmax": True,
|
|
131
|
-
"label": "bird",
|
|
132
|
-
},
|
|
133
|
-
},
|
|
134
|
-
{
|
|
135
|
-
"type": "Accuracy",
|
|
136
|
-
"value": [0.5],
|
|
111
|
+
"value": [2.0 / 6.0],
|
|
137
112
|
"parameters": {
|
|
138
113
|
"score_thresholds": [0.5],
|
|
139
114
|
"hardmax": True,
|
|
140
|
-
"label": "dog",
|
|
141
|
-
},
|
|
142
|
-
},
|
|
143
|
-
{
|
|
144
|
-
"type": "Accuracy",
|
|
145
|
-
"value": [2 / 3],
|
|
146
|
-
"parameters": {
|
|
147
|
-
"score_thresholds": [0.5],
|
|
148
|
-
"hardmax": True,
|
|
149
|
-
"label": "cat",
|
|
150
115
|
},
|
|
151
116
|
},
|
|
152
117
|
]
|
|
@@ -170,38 +135,10 @@ def test_accuracy_color_example(
|
|
|
170
135
|
expected_metrics = [
|
|
171
136
|
{
|
|
172
137
|
"type": "Accuracy",
|
|
173
|
-
"value": [2 /
|
|
174
|
-
"parameters": {
|
|
175
|
-
"score_thresholds": [0.5],
|
|
176
|
-
"hardmax": True,
|
|
177
|
-
"label": "white",
|
|
178
|
-
},
|
|
179
|
-
},
|
|
180
|
-
{
|
|
181
|
-
"type": "Accuracy",
|
|
182
|
-
"value": [2 / 3],
|
|
138
|
+
"value": [2 / 6],
|
|
183
139
|
"parameters": {
|
|
184
140
|
"score_thresholds": [0.5],
|
|
185
141
|
"hardmax": True,
|
|
186
|
-
"label": "red",
|
|
187
|
-
},
|
|
188
|
-
},
|
|
189
|
-
{
|
|
190
|
-
"type": "Accuracy",
|
|
191
|
-
"value": [2 / 3],
|
|
192
|
-
"parameters": {
|
|
193
|
-
"score_thresholds": [0.5],
|
|
194
|
-
"hardmax": True,
|
|
195
|
-
"label": "blue",
|
|
196
|
-
},
|
|
197
|
-
},
|
|
198
|
-
{
|
|
199
|
-
"type": "Accuracy",
|
|
200
|
-
"value": [5 / 6],
|
|
201
|
-
"parameters": {
|
|
202
|
-
"score_thresholds": [0.5],
|
|
203
|
-
"hardmax": True,
|
|
204
|
-
"label": "black",
|
|
205
142
|
},
|
|
206
143
|
},
|
|
207
144
|
]
|
|
@@ -237,7 +174,6 @@ def test_accuracy_with_image_example(
|
|
|
237
174
|
"parameters": {
|
|
238
175
|
"score_thresholds": [0.0],
|
|
239
176
|
"hardmax": True,
|
|
240
|
-
"label": "v4",
|
|
241
177
|
},
|
|
242
178
|
},
|
|
243
179
|
]
|
|
@@ -269,29 +205,10 @@ def test_accuracy_with_tabular_example(
|
|
|
269
205
|
expected_metrics = [
|
|
270
206
|
{
|
|
271
207
|
"type": "Accuracy",
|
|
272
|
-
"value": [
|
|
273
|
-
"parameters": {
|
|
274
|
-
"score_thresholds": [0.0],
|
|
275
|
-
"hardmax": True,
|
|
276
|
-
"label": "0",
|
|
277
|
-
},
|
|
278
|
-
},
|
|
279
|
-
{
|
|
280
|
-
"type": "Accuracy",
|
|
281
|
-
"value": [0.5],
|
|
282
|
-
"parameters": {
|
|
283
|
-
"score_thresholds": [0.0],
|
|
284
|
-
"hardmax": True,
|
|
285
|
-
"label": "1",
|
|
286
|
-
},
|
|
287
|
-
},
|
|
288
|
-
{
|
|
289
|
-
"type": "Accuracy",
|
|
290
|
-
"value": [0.8],
|
|
208
|
+
"value": [5 / 10],
|
|
291
209
|
"parameters": {
|
|
292
210
|
"score_thresholds": [0.0],
|
|
293
211
|
"hardmax": True,
|
|
294
|
-
"label": "2",
|
|
295
212
|
},
|
|
296
213
|
},
|
|
297
214
|
]
|
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from valor_lite.object_detection import DataLoader, Detection, MetricType
|
|
3
|
+
from valor_lite.object_detection.computation import compute_metrics
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test__compute_average_precision():
|
|
7
|
+
|
|
8
|
+
sorted_pairs = np.array(
|
|
9
|
+
[
|
|
10
|
+
# dt, gt, pd, iou, gl, pl, score,
|
|
11
|
+
[0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
|
|
12
|
+
[0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
|
|
13
|
+
[0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
|
|
14
|
+
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
|
|
15
|
+
[0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
|
|
16
|
+
]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
label_metadata = np.array([[1, 5, 0]])
|
|
20
|
+
iou_thresholds = np.array([0.1, 0.6])
|
|
21
|
+
score_thresholds = np.array([0.0])
|
|
22
|
+
|
|
23
|
+
(_, _, accuracy, _, _) = compute_metrics(
|
|
24
|
+
sorted_pairs,
|
|
25
|
+
label_metadata=label_metadata,
|
|
26
|
+
iou_thresholds=iou_thresholds,
|
|
27
|
+
score_thresholds=score_thresholds,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
expected = np.array(
|
|
31
|
+
[
|
|
32
|
+
[0.2], # iou = 0.1
|
|
33
|
+
[0.2], # iou = 0.6
|
|
34
|
+
]
|
|
35
|
+
)
|
|
36
|
+
assert (accuracy == expected).all()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_ap_using_torch_metrics_example(
|
|
40
|
+
torchmetrics_detections: list[Detection],
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
cf with torch metrics/pycocotools results listed here:
|
|
44
|
+
https://github.com/Lightning-AI/metrics/blob/107dbfd5fb158b7ae6d76281df44bd94c836bfce/tests/unittests/detection/test_map.py#L231
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
loader = DataLoader()
|
|
48
|
+
loader.add_bounding_boxes(torchmetrics_detections)
|
|
49
|
+
evaluator = loader.finalize()
|
|
50
|
+
|
|
51
|
+
assert evaluator.ignored_prediction_labels == ["3"]
|
|
52
|
+
assert evaluator.missing_prediction_labels == []
|
|
53
|
+
assert evaluator.n_datums == 4
|
|
54
|
+
assert evaluator.n_labels == 6
|
|
55
|
+
assert evaluator.n_groundtruths == 20
|
|
56
|
+
assert evaluator.n_predictions == 19
|
|
57
|
+
|
|
58
|
+
metrics = evaluator.evaluate(
|
|
59
|
+
iou_thresholds=[0.5, 0.75],
|
|
60
|
+
as_dict=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# test Accuracy
|
|
64
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
65
|
+
expected_metrics = [
|
|
66
|
+
{
|
|
67
|
+
"type": "Accuracy",
|
|
68
|
+
"value": 9 / 19,
|
|
69
|
+
"parameters": {
|
|
70
|
+
"iou_threshold": 0.5,
|
|
71
|
+
"score_threshold": 0.5,
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"type": "Accuracy",
|
|
76
|
+
"value": 8 / 19,
|
|
77
|
+
"parameters": {
|
|
78
|
+
"iou_threshold": 0.75,
|
|
79
|
+
"score_threshold": 0.5,
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
]
|
|
83
|
+
for m in actual_metrics:
|
|
84
|
+
assert m in expected_metrics
|
|
85
|
+
for m in expected_metrics:
|
|
86
|
+
assert m in actual_metrics
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_accuracy_metrics_first_class(
|
|
90
|
+
basic_detections_first_class: list[Detection],
|
|
91
|
+
basic_rotated_detections_first_class: list[Detection],
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Basic object detection test.
|
|
95
|
+
|
|
96
|
+
groundtruths
|
|
97
|
+
datum uid1
|
|
98
|
+
box 1 - label v1 - tp
|
|
99
|
+
box 3 - label v2 - fn missing prediction
|
|
100
|
+
datum uid2
|
|
101
|
+
box 2 - label v1 - fn missing prediction
|
|
102
|
+
|
|
103
|
+
predictions
|
|
104
|
+
datum uid1
|
|
105
|
+
box 1 - label v1 - score 0.3 - tp
|
|
106
|
+
datum uid2
|
|
107
|
+
box 2 - label v2 - score 0.98 - fp
|
|
108
|
+
"""
|
|
109
|
+
for input_, method in [
|
|
110
|
+
(basic_detections_first_class, DataLoader.add_bounding_boxes),
|
|
111
|
+
(basic_rotated_detections_first_class, DataLoader.add_polygons),
|
|
112
|
+
]:
|
|
113
|
+
loader = DataLoader()
|
|
114
|
+
method(loader, input_)
|
|
115
|
+
evaluator = loader.finalize()
|
|
116
|
+
|
|
117
|
+
metrics = evaluator.evaluate(
|
|
118
|
+
iou_thresholds=[0.1, 0.6],
|
|
119
|
+
score_thresholds=[0.0, 0.5],
|
|
120
|
+
as_dict=True,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
assert evaluator.ignored_prediction_labels == []
|
|
124
|
+
assert evaluator.missing_prediction_labels == []
|
|
125
|
+
assert evaluator.n_datums == 2
|
|
126
|
+
assert evaluator.n_labels == 1
|
|
127
|
+
assert evaluator.n_groundtruths == 2
|
|
128
|
+
assert evaluator.n_predictions == 1
|
|
129
|
+
|
|
130
|
+
# test Accuracy
|
|
131
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
132
|
+
expected_metrics = [
|
|
133
|
+
{
|
|
134
|
+
"type": "Accuracy",
|
|
135
|
+
"value": 1.0,
|
|
136
|
+
"parameters": {
|
|
137
|
+
"iou_threshold": 0.1,
|
|
138
|
+
"score_threshold": 0.0,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
"type": "Accuracy",
|
|
143
|
+
"value": 1.0,
|
|
144
|
+
"parameters": {
|
|
145
|
+
"iou_threshold": 0.6,
|
|
146
|
+
"score_threshold": 0.0,
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
"type": "Accuracy",
|
|
151
|
+
"value": 0.0,
|
|
152
|
+
"parameters": {
|
|
153
|
+
"iou_threshold": 0.1,
|
|
154
|
+
"score_threshold": 0.5,
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
"type": "Accuracy",
|
|
159
|
+
"value": 0.0,
|
|
160
|
+
"parameters": {
|
|
161
|
+
"iou_threshold": 0.6,
|
|
162
|
+
"score_threshold": 0.5,
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
]
|
|
166
|
+
for m in actual_metrics:
|
|
167
|
+
assert m in expected_metrics
|
|
168
|
+
for m in expected_metrics:
|
|
169
|
+
assert m in actual_metrics
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def test_accuracy_metrics_second_class(
|
|
173
|
+
basic_detections_second_class: list[Detection],
|
|
174
|
+
basic_rotated_detections_second_class: list[Detection],
|
|
175
|
+
):
|
|
176
|
+
"""
|
|
177
|
+
Basic object detection test.
|
|
178
|
+
|
|
179
|
+
groundtruths
|
|
180
|
+
datum uid1
|
|
181
|
+
box 3 - label v2 - fn missing prediction
|
|
182
|
+
datum uid2
|
|
183
|
+
none
|
|
184
|
+
predictions
|
|
185
|
+
datum uid1
|
|
186
|
+
none
|
|
187
|
+
datum uid2
|
|
188
|
+
box 2 - label v2 - score 0.98 - fp
|
|
189
|
+
"""
|
|
190
|
+
for input_, method in [
|
|
191
|
+
(basic_detections_second_class, DataLoader.add_bounding_boxes),
|
|
192
|
+
(basic_rotated_detections_second_class, DataLoader.add_polygons),
|
|
193
|
+
]:
|
|
194
|
+
loader = DataLoader()
|
|
195
|
+
method(loader, input_)
|
|
196
|
+
evaluator = loader.finalize()
|
|
197
|
+
|
|
198
|
+
metrics = evaluator.evaluate(
|
|
199
|
+
iou_thresholds=[0.1, 0.6],
|
|
200
|
+
score_thresholds=[0.0, 0.5],
|
|
201
|
+
as_dict=True,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
assert evaluator.ignored_prediction_labels == []
|
|
205
|
+
assert evaluator.missing_prediction_labels == []
|
|
206
|
+
assert evaluator.n_datums == 2
|
|
207
|
+
assert evaluator.n_labels == 1
|
|
208
|
+
assert evaluator.n_groundtruths == 1
|
|
209
|
+
assert evaluator.n_predictions == 1
|
|
210
|
+
|
|
211
|
+
# test Accuracy
|
|
212
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
213
|
+
expected_metrics = [
|
|
214
|
+
{
|
|
215
|
+
"type": "Accuracy",
|
|
216
|
+
"value": 0.0,
|
|
217
|
+
"parameters": {
|
|
218
|
+
"iou_threshold": 0.1,
|
|
219
|
+
"score_threshold": 0.0,
|
|
220
|
+
},
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
"type": "Accuracy",
|
|
224
|
+
"value": 0.0,
|
|
225
|
+
"parameters": {
|
|
226
|
+
"iou_threshold": 0.6,
|
|
227
|
+
"score_threshold": 0.0,
|
|
228
|
+
},
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
"type": "Accuracy",
|
|
232
|
+
"value": 0.0,
|
|
233
|
+
"parameters": {
|
|
234
|
+
"iou_threshold": 0.1,
|
|
235
|
+
"score_threshold": 0.5,
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
"type": "Accuracy",
|
|
240
|
+
"value": 0.0,
|
|
241
|
+
"parameters": {
|
|
242
|
+
"iou_threshold": 0.6,
|
|
243
|
+
"score_threshold": 0.5,
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
]
|
|
247
|
+
for m in actual_metrics:
|
|
248
|
+
assert m in expected_metrics
|
|
249
|
+
for m in expected_metrics:
|
|
250
|
+
assert m in actual_metrics
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def test_accuracy_false_negatives_single_datum_baseline(
|
|
254
|
+
false_negatives_single_datum_baseline_detections: list[Detection],
|
|
255
|
+
):
|
|
256
|
+
"""This is the baseline for the below test. In this case there are two predictions and
|
|
257
|
+
one groundtruth, but the highest confident prediction overlaps sufficiently with the groundtruth
|
|
258
|
+
so there is not a penalty for the false negative so the Accuracy is 1
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
loader = DataLoader()
|
|
262
|
+
loader.add_bounding_boxes(false_negatives_single_datum_baseline_detections)
|
|
263
|
+
evaluator = loader.finalize()
|
|
264
|
+
|
|
265
|
+
metrics = evaluator.evaluate(
|
|
266
|
+
iou_thresholds=[0.5],
|
|
267
|
+
score_thresholds=[0.0, 0.9],
|
|
268
|
+
as_dict=True,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
272
|
+
expected_metrics = [
|
|
273
|
+
{
|
|
274
|
+
"type": "Accuracy",
|
|
275
|
+
"value": 0.5,
|
|
276
|
+
"parameters": {
|
|
277
|
+
"iou_threshold": 0.5,
|
|
278
|
+
"score_threshold": 0.0,
|
|
279
|
+
},
|
|
280
|
+
},
|
|
281
|
+
{
|
|
282
|
+
"type": "Accuracy",
|
|
283
|
+
"value": 0.0,
|
|
284
|
+
"parameters": {
|
|
285
|
+
"iou_threshold": 0.5,
|
|
286
|
+
"score_threshold": 0.9,
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
]
|
|
290
|
+
for m in actual_metrics:
|
|
291
|
+
assert m in expected_metrics
|
|
292
|
+
for m in expected_metrics:
|
|
293
|
+
assert m in actual_metrics
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def test_accuracy_false_negatives_single_datum(
|
|
297
|
+
false_negatives_single_datum_detections: list[Detection],
|
|
298
|
+
):
|
|
299
|
+
"""Tests where high confidence false negative was not being penalized. The
|
|
300
|
+
difference between this test and the above is that here the prediction with higher confidence
|
|
301
|
+
does not sufficiently overlap the groundtruth and so is penalized and we get an Accuracy of 0.5
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
loader = DataLoader()
|
|
305
|
+
loader.add_bounding_boxes(false_negatives_single_datum_detections)
|
|
306
|
+
evaluator = loader.finalize()
|
|
307
|
+
metrics = evaluator.evaluate(
|
|
308
|
+
iou_thresholds=[0.5],
|
|
309
|
+
score_thresholds=[0.0],
|
|
310
|
+
as_dict=True,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
314
|
+
expected_metrics = [
|
|
315
|
+
{
|
|
316
|
+
"type": "Accuracy",
|
|
317
|
+
"value": 0.5,
|
|
318
|
+
"parameters": {
|
|
319
|
+
"iou_threshold": 0.5,
|
|
320
|
+
"score_threshold": 0.0,
|
|
321
|
+
},
|
|
322
|
+
}
|
|
323
|
+
]
|
|
324
|
+
for m in actual_metrics:
|
|
325
|
+
assert m in expected_metrics
|
|
326
|
+
for m in expected_metrics:
|
|
327
|
+
assert m in actual_metrics
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def test_accuracy_false_negatives_two_datums_one_empty_low_confidence_of_fp(
|
|
331
|
+
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections: list[
|
|
332
|
+
Detection
|
|
333
|
+
],
|
|
334
|
+
):
|
|
335
|
+
"""In this test we have
|
|
336
|
+
1. An image with a matching groundtruth and prediction (same class and high IOU)
|
|
337
|
+
2. A second image with empty groundtruth annotation but a prediction with lower confidence
|
|
338
|
+
then the prediction on the first image.
|
|
339
|
+
|
|
340
|
+
In this case, the Accuracy should be 1.0 since the false positive has lower confidence than the true positive
|
|
341
|
+
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
loader = DataLoader()
|
|
345
|
+
loader.add_bounding_boxes(
|
|
346
|
+
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
|
|
347
|
+
)
|
|
348
|
+
evaluator = loader.finalize()
|
|
349
|
+
metrics = evaluator.evaluate(
|
|
350
|
+
iou_thresholds=[0.5],
|
|
351
|
+
score_thresholds=[0.0],
|
|
352
|
+
as_dict=True,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
356
|
+
expected_metrics = [
|
|
357
|
+
{
|
|
358
|
+
"type": "Accuracy",
|
|
359
|
+
"value": 0.5,
|
|
360
|
+
"parameters": {
|
|
361
|
+
"iou_threshold": 0.5,
|
|
362
|
+
"score_threshold": 0.0,
|
|
363
|
+
},
|
|
364
|
+
}
|
|
365
|
+
]
|
|
366
|
+
for m in actual_metrics:
|
|
367
|
+
assert m in expected_metrics
|
|
368
|
+
for m in expected_metrics:
|
|
369
|
+
assert m in actual_metrics
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def test_accuracy_false_negatives_two_datums_one_empty_high_confidence_of_fp(
|
|
373
|
+
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections: list[
|
|
374
|
+
Detection
|
|
375
|
+
],
|
|
376
|
+
):
|
|
377
|
+
"""In this test we have
|
|
378
|
+
1. An image with a matching groundtruth and prediction (same class and high IOU)
|
|
379
|
+
2. A second image with empty groundtruth annotation and a prediction with higher confidence
|
|
380
|
+
then the prediction on the first image.
|
|
381
|
+
|
|
382
|
+
In this case, the Accuracy should be 0.5 since the false positive has higher confidence than the true positive
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
loader = DataLoader()
|
|
386
|
+
loader.add_bounding_boxes(
|
|
387
|
+
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
|
|
388
|
+
)
|
|
389
|
+
evaluator = loader.finalize()
|
|
390
|
+
metrics = evaluator.evaluate(
|
|
391
|
+
iou_thresholds=[0.5],
|
|
392
|
+
score_thresholds=[0.0],
|
|
393
|
+
as_dict=True,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
397
|
+
expected_metrics = [
|
|
398
|
+
{
|
|
399
|
+
"type": "Accuracy",
|
|
400
|
+
"value": 0.5,
|
|
401
|
+
"parameters": {
|
|
402
|
+
"iou_threshold": 0.5,
|
|
403
|
+
"score_threshold": 0.0,
|
|
404
|
+
},
|
|
405
|
+
}
|
|
406
|
+
]
|
|
407
|
+
for m in actual_metrics:
|
|
408
|
+
assert m in expected_metrics
|
|
409
|
+
for m in expected_metrics:
|
|
410
|
+
assert m in actual_metrics
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def test_accuracy_false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp(
|
|
414
|
+
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections: list[
|
|
415
|
+
Detection
|
|
416
|
+
],
|
|
417
|
+
):
|
|
418
|
+
"""In this test we have
|
|
419
|
+
1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
|
|
420
|
+
2. A second image with a groundtruth annotation with class `"other value"` and a prediction with lower confidence
|
|
421
|
+
then the prediction on the first image.
|
|
422
|
+
|
|
423
|
+
In this case, the Accuracy for class `"value"` should be 1 since the false positive has lower confidence than the true positive.
|
|
424
|
+
Accuracy for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
|
|
425
|
+
"""
|
|
426
|
+
loader = DataLoader()
|
|
427
|
+
loader.add_bounding_boxes(
|
|
428
|
+
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
|
|
429
|
+
)
|
|
430
|
+
evaluator = loader.finalize()
|
|
431
|
+
metrics = evaluator.evaluate(
|
|
432
|
+
iou_thresholds=[0.5],
|
|
433
|
+
score_thresholds=[0.0],
|
|
434
|
+
as_dict=True,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
438
|
+
expected_metrics = [
|
|
439
|
+
{
|
|
440
|
+
"type": "Accuracy",
|
|
441
|
+
"value": 0.5,
|
|
442
|
+
"parameters": {
|
|
443
|
+
"iou_threshold": 0.5,
|
|
444
|
+
"score_threshold": 0.0,
|
|
445
|
+
},
|
|
446
|
+
},
|
|
447
|
+
]
|
|
448
|
+
for m in actual_metrics:
|
|
449
|
+
assert m in expected_metrics
|
|
450
|
+
for m in expected_metrics:
|
|
451
|
+
assert m in actual_metrics
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def test_accuracy_false_negatives_two_datums_one_only_with_different_class_high_confidence_of_fp(
|
|
455
|
+
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections: list[
|
|
456
|
+
Detection
|
|
457
|
+
],
|
|
458
|
+
):
|
|
459
|
+
"""In this test we have
|
|
460
|
+
1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
|
|
461
|
+
2. A second image with a groundtruth annotation with class `"other value"` and a prediction with higher confidence
|
|
462
|
+
then the prediction on the first image.
|
|
463
|
+
|
|
464
|
+
In this case, the Accuracy for class `"value"` should be 0.5 since the false positive has higher confidence than the true positive.
|
|
465
|
+
Accuracy for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
|
|
466
|
+
"""
|
|
467
|
+
loader = DataLoader()
|
|
468
|
+
loader.add_bounding_boxes(
|
|
469
|
+
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
|
|
470
|
+
)
|
|
471
|
+
evaluator = loader.finalize()
|
|
472
|
+
metrics = evaluator.evaluate(
|
|
473
|
+
iou_thresholds=[0.5],
|
|
474
|
+
score_thresholds=[0.0],
|
|
475
|
+
as_dict=True,
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
479
|
+
expected_metrics = [
|
|
480
|
+
{
|
|
481
|
+
"type": "Accuracy",
|
|
482
|
+
"value": 0.5,
|
|
483
|
+
"parameters": {
|
|
484
|
+
"iou_threshold": 0.5,
|
|
485
|
+
"score_threshold": 0.0,
|
|
486
|
+
},
|
|
487
|
+
},
|
|
488
|
+
]
|
|
489
|
+
for m in actual_metrics:
|
|
490
|
+
assert m in expected_metrics
|
|
491
|
+
for m in expected_metrics:
|
|
492
|
+
assert m in actual_metrics
|
|
@@ -24,7 +24,7 @@ def test__compute_average_precision():
|
|
|
24
24
|
iou_thresholds = np.array([0.1, 0.6])
|
|
25
25
|
score_thresholds = np.array([0.0])
|
|
26
26
|
|
|
27
|
-
(results, _, _, _,) = compute_metrics(
|
|
27
|
+
(results, _, _, _, _) = compute_metrics(
|
|
28
28
|
sorted_pairs,
|
|
29
29
|
label_metadata=label_metadata,
|
|
30
30
|
iou_thresholds=iou_thresholds,
|
|
@@ -25,7 +25,7 @@ def test__compute_average_recall():
|
|
|
25
25
|
iou_thresholds = np.array([0.1, 0.6])
|
|
26
26
|
score_thresholds = np.array([0.5, 0.93, 0.98])
|
|
27
27
|
|
|
28
|
-
(_, results, _, _,) = compute_metrics(
|
|
28
|
+
(_, results, _, _, _,) = compute_metrics(
|
|
29
29
|
sorted_pairs,
|
|
30
30
|
label_metadata=label_metadata,
|
|
31
31
|
iou_thresholds=iou_thresholds,
|
|
@@ -24,7 +24,7 @@ def test_pr_curve_simple():
|
|
|
24
24
|
iou_thresholds = np.array([0.1, 0.6])
|
|
25
25
|
score_thresholds = np.array([0.0])
|
|
26
26
|
|
|
27
|
-
(_, _, _, pr_curve) = compute_metrics(
|
|
27
|
+
(_, _, _, _, pr_curve) = compute_metrics(
|
|
28
28
|
sorted_pairs,
|
|
29
29
|
label_metadata=label_metadata,
|
|
30
30
|
iou_thresholds=iou_thresholds,
|
|
@@ -182,9 +182,9 @@ def compute_metrics(
|
|
|
182
182
|
out=precision,
|
|
183
183
|
)
|
|
184
184
|
|
|
185
|
-
accuracy = np.
|
|
185
|
+
accuracy = np.zeros(n_scores, dtype=np.float64)
|
|
186
186
|
np.divide(
|
|
187
|
-
|
|
187
|
+
counts[:, :, 0].sum(axis=1),
|
|
188
188
|
float(n_datums),
|
|
189
189
|
out=accuracy,
|
|
190
190
|
)
|
|
@@ -367,6 +367,14 @@ class Evaluator:
|
|
|
367
367
|
)
|
|
368
368
|
]
|
|
369
369
|
|
|
370
|
+
metrics[MetricType.Accuracy] = [
|
|
371
|
+
Accuracy(
|
|
372
|
+
value=accuracy.astype(float).tolist(),
|
|
373
|
+
score_thresholds=score_thresholds,
|
|
374
|
+
hardmax=hardmax,
|
|
375
|
+
)
|
|
376
|
+
]
|
|
377
|
+
|
|
370
378
|
for label_idx, label in self.index_to_label.items():
|
|
371
379
|
|
|
372
380
|
kwargs = {
|
|
@@ -401,12 +409,6 @@ class Evaluator:
|
|
|
401
409
|
**kwargs,
|
|
402
410
|
)
|
|
403
411
|
)
|
|
404
|
-
metrics[MetricType.Accuracy].append(
|
|
405
|
-
Accuracy(
|
|
406
|
-
value=accuracy[:, label_idx].astype(float).tolist(),
|
|
407
|
-
**kwargs,
|
|
408
|
-
)
|
|
409
|
-
)
|
|
410
412
|
metrics[MetricType.F1].append(
|
|
411
413
|
F1(
|
|
412
414
|
value=f1_score[:, label_idx].astype(float).tolist(),
|
|
@@ -158,24 +158,23 @@ class Recall(_ThresholdValue):
|
|
|
158
158
|
pass
|
|
159
159
|
|
|
160
160
|
|
|
161
|
-
class
|
|
161
|
+
class F1(_ThresholdValue):
|
|
162
162
|
"""
|
|
163
|
-
|
|
163
|
+
F1 score for a specific class label.
|
|
164
164
|
|
|
165
|
-
This class calculates the
|
|
166
|
-
classification task.
|
|
167
|
-
true negatives over all predictions.
|
|
165
|
+
This class calculates the F1 score at various score thresholds for a binary
|
|
166
|
+
classification task.
|
|
168
167
|
|
|
169
168
|
Attributes
|
|
170
169
|
----------
|
|
171
170
|
value : list[float]
|
|
172
|
-
|
|
171
|
+
F1 scores computed at each score threshold.
|
|
173
172
|
score_thresholds : list[float]
|
|
174
|
-
Score thresholds at which the
|
|
173
|
+
Score thresholds at which the F1 scores are computed.
|
|
175
174
|
hardmax : bool
|
|
176
175
|
Indicates whether hardmax thresholding was used.
|
|
177
176
|
label : str
|
|
178
|
-
The class label for which the
|
|
177
|
+
The class label for which the F1 score is computed.
|
|
179
178
|
|
|
180
179
|
Methods
|
|
181
180
|
-------
|
|
@@ -188,23 +187,21 @@ class Accuracy(_ThresholdValue):
|
|
|
188
187
|
pass
|
|
189
188
|
|
|
190
189
|
|
|
191
|
-
|
|
190
|
+
@dataclass
|
|
191
|
+
class Accuracy:
|
|
192
192
|
"""
|
|
193
|
-
|
|
193
|
+
Multiclass accuracy metric.
|
|
194
194
|
|
|
195
|
-
This class calculates the
|
|
196
|
-
classification task.
|
|
195
|
+
This class calculates the accuracy at various score thresholds.
|
|
197
196
|
|
|
198
197
|
Attributes
|
|
199
198
|
----------
|
|
200
199
|
value : list[float]
|
|
201
|
-
|
|
200
|
+
Accuracy values computed at each score threshold.
|
|
202
201
|
score_thresholds : list[float]
|
|
203
|
-
Score thresholds at which the
|
|
202
|
+
Score thresholds at which the accuracy values are computed.
|
|
204
203
|
hardmax : bool
|
|
205
204
|
Indicates whether hardmax thresholding was used.
|
|
206
|
-
label : str
|
|
207
|
-
The class label for which the F1 score is computed.
|
|
208
205
|
|
|
209
206
|
Methods
|
|
210
207
|
-------
|
|
@@ -214,7 +211,22 @@ class F1(_ThresholdValue):
|
|
|
214
211
|
Converts the instance to a dictionary representation.
|
|
215
212
|
"""
|
|
216
213
|
|
|
217
|
-
|
|
214
|
+
value: list[float]
|
|
215
|
+
score_thresholds: list[float]
|
|
216
|
+
hardmax: bool
|
|
217
|
+
|
|
218
|
+
def to_metric(self) -> Metric:
|
|
219
|
+
return Metric(
|
|
220
|
+
type=type(self).__name__,
|
|
221
|
+
value=self.value,
|
|
222
|
+
parameters={
|
|
223
|
+
"score_thresholds": self.score_thresholds,
|
|
224
|
+
"hardmax": self.hardmax,
|
|
225
|
+
},
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def to_dict(self) -> dict:
|
|
229
|
+
return self.to_metric().to_dict()
|
|
218
230
|
|
|
219
231
|
|
|
220
232
|
@dataclass
|
|
@@ -282,6 +282,7 @@ def compute_metrics(
|
|
|
282
282
|
],
|
|
283
283
|
NDArray[np.float64],
|
|
284
284
|
NDArray[np.float64],
|
|
285
|
+
NDArray[np.float64],
|
|
285
286
|
]:
|
|
286
287
|
"""
|
|
287
288
|
Computes Object Detection metrics.
|
|
@@ -309,13 +310,15 @@ def compute_metrics(
|
|
|
309
310
|
|
|
310
311
|
Returns
|
|
311
312
|
-------
|
|
312
|
-
tuple[NDArray, NDArray, NDArray, float]
|
|
313
|
+
tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64], float]
|
|
313
314
|
Average Precision results.
|
|
314
|
-
tuple[NDArray, NDArray, NDArray, float]
|
|
315
|
+
tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64], float]
|
|
315
316
|
Average Recall results.
|
|
316
|
-
np.
|
|
317
|
-
|
|
318
|
-
np.
|
|
317
|
+
NDArray[np.float64]
|
|
318
|
+
Accuracy.
|
|
319
|
+
NDArray[np.float64]
|
|
320
|
+
Precision, Recall, TP, FP, FN, F1 Score.
|
|
321
|
+
NDArray[np.float64]
|
|
319
322
|
Interpolated Precision-Recall Curves.
|
|
320
323
|
"""
|
|
321
324
|
|
|
@@ -329,9 +332,10 @@ def compute_metrics(
|
|
|
329
332
|
elif n_scores == 0:
|
|
330
333
|
raise ValueError("At least one score threshold must be passed.")
|
|
331
334
|
|
|
332
|
-
average_precision = np.zeros((n_ious, n_labels))
|
|
333
|
-
average_recall = np.zeros((n_scores, n_labels))
|
|
334
|
-
|
|
335
|
+
average_precision = np.zeros((n_ious, n_labels), dtype=np.float64)
|
|
336
|
+
average_recall = np.zeros((n_scores, n_labels), dtype=np.float64)
|
|
337
|
+
accuracy = np.zeros((n_ious, n_scores), dtype=np.float64)
|
|
338
|
+
counts = np.zeros((n_ious, n_scores, n_labels, 6), dtype=np.float64)
|
|
335
339
|
|
|
336
340
|
pd_labels = data[:, 5].astype(np.int32)
|
|
337
341
|
scores = data[:, 6]
|
|
@@ -417,14 +421,6 @@ def compute_metrics(
|
|
|
417
421
|
out=f1_score,
|
|
418
422
|
)
|
|
419
423
|
|
|
420
|
-
accuracy = np.zeros_like(tp_count)
|
|
421
|
-
np.divide(
|
|
422
|
-
tp_count,
|
|
423
|
-
(gt_count + pd_count),
|
|
424
|
-
where=(gt_count + pd_count) > 1e-9,
|
|
425
|
-
out=accuracy,
|
|
426
|
-
)
|
|
427
|
-
|
|
428
424
|
counts[iou_idx][score_idx] = np.concatenate(
|
|
429
425
|
(
|
|
430
426
|
tp_count[:, np.newaxis],
|
|
@@ -433,11 +429,18 @@ def compute_metrics(
|
|
|
433
429
|
precision[:, np.newaxis],
|
|
434
430
|
recall[:, np.newaxis],
|
|
435
431
|
f1_score[:, np.newaxis],
|
|
436
|
-
accuracy[:, np.newaxis],
|
|
437
432
|
),
|
|
438
433
|
axis=1,
|
|
439
434
|
)
|
|
440
435
|
|
|
436
|
+
# caluculate accuracy
|
|
437
|
+
total_pd_count = label_metadata[:, 1].sum()
|
|
438
|
+
accuracy[iou_idx, score_idx] = (
|
|
439
|
+
(tp_count.sum() / total_pd_count)
|
|
440
|
+
if total_pd_count > 1e-9
|
|
441
|
+
else 0.0
|
|
442
|
+
)
|
|
443
|
+
|
|
441
444
|
# calculate recall for AR
|
|
442
445
|
average_recall[score_idx] += recall
|
|
443
446
|
|
|
@@ -552,6 +555,7 @@ def compute_metrics(
|
|
|
552
555
|
return (
|
|
553
556
|
ap_results,
|
|
554
557
|
ar_results,
|
|
558
|
+
accuracy,
|
|
555
559
|
counts,
|
|
556
560
|
pr_curve,
|
|
557
561
|
)
|
|
@@ -506,6 +506,7 @@ class Evaluator:
|
|
|
506
506
|
average_recall_averaged_over_scores,
|
|
507
507
|
mean_average_recall_averaged_over_scores,
|
|
508
508
|
),
|
|
509
|
+
accuracy,
|
|
509
510
|
precision_recall,
|
|
510
511
|
pr_curves,
|
|
511
512
|
) = compute_metrics(
|
|
@@ -593,6 +594,16 @@ class Evaluator:
|
|
|
593
594
|
)
|
|
594
595
|
]
|
|
595
596
|
|
|
597
|
+
metrics[MetricType.Accuracy] = [
|
|
598
|
+
Accuracy(
|
|
599
|
+
value=float(accuracy[iou_idx, score_idx]),
|
|
600
|
+
iou_threshold=iou_thresholds[iou_idx],
|
|
601
|
+
score_threshold=score_thresholds[score_idx],
|
|
602
|
+
)
|
|
603
|
+
for iou_idx in range(accuracy.shape[0])
|
|
604
|
+
for score_idx in range(accuracy.shape[1])
|
|
605
|
+
]
|
|
606
|
+
|
|
596
607
|
metrics[MetricType.PrecisionRecallCurve] = [
|
|
597
608
|
PrecisionRecallCurve(
|
|
598
609
|
precisions=pr_curves[iou_idx, label_idx, :, 0]
|
|
@@ -650,12 +661,6 @@ class Evaluator:
|
|
|
650
661
|
**kwargs,
|
|
651
662
|
)
|
|
652
663
|
)
|
|
653
|
-
metrics[MetricType.Accuracy].append(
|
|
654
|
-
Accuracy(
|
|
655
|
-
value=float(row[6]),
|
|
656
|
-
**kwargs,
|
|
657
|
-
)
|
|
658
|
-
)
|
|
659
664
|
|
|
660
665
|
if as_dict:
|
|
661
666
|
return {
|
|
@@ -160,9 +160,9 @@ class Recall(_ClassMetric):
|
|
|
160
160
|
pass
|
|
161
161
|
|
|
162
162
|
|
|
163
|
-
class
|
|
163
|
+
class F1(_ClassMetric):
|
|
164
164
|
"""
|
|
165
|
-
|
|
165
|
+
F1 score for a specific class label in object detection.
|
|
166
166
|
|
|
167
167
|
This class encapsulates a metric value for a particular class label,
|
|
168
168
|
along with the associated Intersection over Union (IoU) threshold and
|
|
@@ -190,20 +190,18 @@ class Accuracy(_ClassMetric):
|
|
|
190
190
|
pass
|
|
191
191
|
|
|
192
192
|
|
|
193
|
-
|
|
193
|
+
@dataclass
|
|
194
|
+
class Accuracy:
|
|
194
195
|
"""
|
|
195
|
-
|
|
196
|
+
Accuracy metric for the object detection task type.
|
|
196
197
|
|
|
197
|
-
This class encapsulates a metric value
|
|
198
|
-
|
|
199
|
-
confidence score threshold.
|
|
198
|
+
This class encapsulates a metric value at a specific Intersection
|
|
199
|
+
over Union (IoU) threshold and confidence score threshold.
|
|
200
200
|
|
|
201
201
|
Attributes
|
|
202
202
|
----------
|
|
203
203
|
value : float
|
|
204
204
|
The metric value.
|
|
205
|
-
label : str
|
|
206
|
-
The class label for which the metric is calculated.
|
|
207
205
|
iou_threshold : float
|
|
208
206
|
The IoU threshold used to determine matches between predicted and ground truth boxes.
|
|
209
207
|
score_threshold : float
|
|
@@ -217,7 +215,22 @@ class F1(_ClassMetric):
|
|
|
217
215
|
Converts the instance to a dictionary representation.
|
|
218
216
|
"""
|
|
219
217
|
|
|
220
|
-
|
|
218
|
+
value: float
|
|
219
|
+
iou_threshold: float
|
|
220
|
+
score_threshold: float
|
|
221
|
+
|
|
222
|
+
def to_metric(self) -> Metric:
|
|
223
|
+
return Metric(
|
|
224
|
+
type=type(self).__name__,
|
|
225
|
+
value=self.value,
|
|
226
|
+
parameters={
|
|
227
|
+
"iou_threshold": self.iou_threshold,
|
|
228
|
+
"score_threshold": self.score_threshold,
|
|
229
|
+
},
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def to_dict(self) -> dict:
|
|
233
|
+
return self.to_metric().to_dict()
|
|
221
234
|
|
|
222
235
|
|
|
223
236
|
@dataclass
|
|
@@ -24,6 +24,7 @@ tests/classification/test_schemas.py
|
|
|
24
24
|
tests/classification/test_stability.py
|
|
25
25
|
tests/object_detection/__init__.py
|
|
26
26
|
tests/object_detection/conftest.py
|
|
27
|
+
tests/object_detection/test_accuracy.py
|
|
27
28
|
tests/object_detection/test_average_precision.py
|
|
28
29
|
tests/object_detection/test_average_recall.py
|
|
29
30
|
tests/object_detection/test_confusion_matrix.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_confusion_matrix.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|