valor-lite 0.33.14__tar.gz → 0.33.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- {valor_lite-0.33.14/valor_lite.egg-info → valor_lite-0.33.15}/PKG-INFO +1 -1
- valor_lite-0.33.15/tests/object_detection/test_f1.py +470 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_precision.py +38 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_recall.py +38 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/computation.py +5 -2
- {valor_lite-0.33.14 → valor_lite-0.33.15/valor_lite.egg-info}/PKG-INFO +1 -1
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/SOURCES.txt +1 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/LICENSE +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/README.md +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/benchmarks/.gitignore +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/benchmarks/benchmark_classification.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/benchmarks/benchmark_objdet.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/examples/.gitignore +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/examples/object-detection.ipynb +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/examples/tabular_classification.ipynb +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/pyproject.toml +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/setup.cfg +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/conftest.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_accuracy.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_counts.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_dataloader.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_evaluator.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_f1.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_filtering.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_precision.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_recall.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_rocauc.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_schemas.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_stability.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/conftest.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_accuracy.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_average_precision.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_average_recall.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_counts.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_dataloader.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_evaluator.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_filtering.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_iou.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_pr_curve.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_schemas.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_stability.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/conftest.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_accuracy.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_annotation.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_confusion_matrix.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_dataloader.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_evaluator.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_f1.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_filtering.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_iou.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_precision.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_recall.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_stability.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/text_generation/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/LICENSE +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/annotation.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/computation.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/manager.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/metric.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/utilities.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/annotation.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/manager.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/metric.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/utilities.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/schemas.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/annotation.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/computation.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/manager.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/metric.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/utilities.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/text_generation/__init__.py +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/dependency_links.txt +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/requires.txt +0 -0
- {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from valor_lite.object_detection import DataLoader, Detection, MetricType
|
|
3
|
+
from valor_lite.object_detection.computation import compute_precion_recall
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test__compute_f1():
|
|
7
|
+
|
|
8
|
+
sorted_pairs = np.array(
|
|
9
|
+
[
|
|
10
|
+
# dt, gt, pd, iou, gl, pl, score,
|
|
11
|
+
[0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
|
|
12
|
+
[0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
|
|
13
|
+
[0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
|
|
14
|
+
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
|
|
15
|
+
[0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
|
|
16
|
+
]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
label_metadata = np.array([[1, 5, 0]])
|
|
20
|
+
iou_thresholds = np.array([0.1, 0.6])
|
|
21
|
+
score_thresholds = np.array([0.0])
|
|
22
|
+
|
|
23
|
+
(_, _, _, counts, _) = compute_precion_recall(
|
|
24
|
+
sorted_pairs,
|
|
25
|
+
label_metadata=label_metadata,
|
|
26
|
+
iou_thresholds=iou_thresholds,
|
|
27
|
+
score_thresholds=score_thresholds,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
f1 = counts[:, :, :, 5]
|
|
31
|
+
|
|
32
|
+
# f1
|
|
33
|
+
expected = np.array(
|
|
34
|
+
[
|
|
35
|
+
[[1 / 3]], # iou = 0.1
|
|
36
|
+
[[1 / 3]], # iou = 0.6
|
|
37
|
+
]
|
|
38
|
+
)
|
|
39
|
+
assert np.isclose(f1, expected).all()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_f1_metrics_first_class(
|
|
43
|
+
basic_detections_first_class: list[Detection],
|
|
44
|
+
basic_rotated_detections_first_class: list[Detection],
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Basic object detection test.
|
|
48
|
+
|
|
49
|
+
groundtruths
|
|
50
|
+
datum uid1
|
|
51
|
+
box 1 - label v1 - tp
|
|
52
|
+
box 3 - label v2 - fn missing prediction
|
|
53
|
+
datum uid2
|
|
54
|
+
box 2 - label v1 - fn missing prediction
|
|
55
|
+
|
|
56
|
+
predictions
|
|
57
|
+
datum uid1
|
|
58
|
+
box 1 - label v1 - score 0.3 - tp
|
|
59
|
+
datum uid2
|
|
60
|
+
box 2 - label v2 - score 0.98 - fp
|
|
61
|
+
"""
|
|
62
|
+
for input_, method in [
|
|
63
|
+
(basic_detections_first_class, DataLoader.add_bounding_boxes),
|
|
64
|
+
(basic_rotated_detections_first_class, DataLoader.add_polygons),
|
|
65
|
+
]:
|
|
66
|
+
loader = DataLoader()
|
|
67
|
+
method(loader, input_)
|
|
68
|
+
evaluator = loader.finalize()
|
|
69
|
+
|
|
70
|
+
metrics = evaluator.evaluate(
|
|
71
|
+
iou_thresholds=[0.1, 0.6],
|
|
72
|
+
score_thresholds=[0.0, 0.5],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
assert evaluator.ignored_prediction_labels == []
|
|
76
|
+
assert evaluator.missing_prediction_labels == []
|
|
77
|
+
assert evaluator.n_datums == 2
|
|
78
|
+
assert evaluator.n_labels == 1
|
|
79
|
+
assert evaluator.n_groundtruths == 2
|
|
80
|
+
assert evaluator.n_predictions == 1
|
|
81
|
+
|
|
82
|
+
# test F1
|
|
83
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
84
|
+
expected_metrics = [
|
|
85
|
+
{
|
|
86
|
+
"type": "F1",
|
|
87
|
+
"value": 2 / 3,
|
|
88
|
+
"parameters": {
|
|
89
|
+
"iou_threshold": 0.1,
|
|
90
|
+
"score_threshold": 0.0,
|
|
91
|
+
"label": "v1",
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"type": "F1",
|
|
96
|
+
"value": 2 / 3,
|
|
97
|
+
"parameters": {
|
|
98
|
+
"iou_threshold": 0.6,
|
|
99
|
+
"score_threshold": 0.0,
|
|
100
|
+
"label": "v1",
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"type": "F1",
|
|
105
|
+
"value": 0.0,
|
|
106
|
+
"parameters": {
|
|
107
|
+
"iou_threshold": 0.1,
|
|
108
|
+
"score_threshold": 0.5,
|
|
109
|
+
"label": "v1",
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"type": "F1",
|
|
114
|
+
"value": 0.0,
|
|
115
|
+
"parameters": {
|
|
116
|
+
"iou_threshold": 0.6,
|
|
117
|
+
"score_threshold": 0.5,
|
|
118
|
+
"label": "v1",
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
]
|
|
122
|
+
for m in actual_metrics:
|
|
123
|
+
assert m in expected_metrics
|
|
124
|
+
for m in expected_metrics:
|
|
125
|
+
assert m in actual_metrics
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_f1_metrics_second_class(
|
|
129
|
+
basic_detections_second_class: list[Detection],
|
|
130
|
+
basic_rotated_detections_second_class: list[Detection],
|
|
131
|
+
):
|
|
132
|
+
"""
|
|
133
|
+
Basic object detection test.
|
|
134
|
+
|
|
135
|
+
groundtruths
|
|
136
|
+
datum uid1
|
|
137
|
+
box 3 - label v2 - fn missing prediction
|
|
138
|
+
datum uid2
|
|
139
|
+
none
|
|
140
|
+
predictions
|
|
141
|
+
datum uid1
|
|
142
|
+
none
|
|
143
|
+
datum uid2
|
|
144
|
+
box 2 - label v2 - score 0.98 - fp
|
|
145
|
+
"""
|
|
146
|
+
for input_, method in [
|
|
147
|
+
(basic_detections_second_class, DataLoader.add_bounding_boxes),
|
|
148
|
+
(basic_rotated_detections_second_class, DataLoader.add_polygons),
|
|
149
|
+
]:
|
|
150
|
+
loader = DataLoader()
|
|
151
|
+
method(loader, input_)
|
|
152
|
+
evaluator = loader.finalize()
|
|
153
|
+
|
|
154
|
+
metrics = evaluator.evaluate(
|
|
155
|
+
iou_thresholds=[0.1, 0.6],
|
|
156
|
+
score_thresholds=[0.0, 0.5],
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
assert evaluator.ignored_prediction_labels == []
|
|
160
|
+
assert evaluator.missing_prediction_labels == []
|
|
161
|
+
assert evaluator.n_datums == 2
|
|
162
|
+
assert evaluator.n_labels == 1
|
|
163
|
+
assert evaluator.n_groundtruths == 1
|
|
164
|
+
assert evaluator.n_predictions == 1
|
|
165
|
+
|
|
166
|
+
# test F1
|
|
167
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
168
|
+
expected_metrics = [
|
|
169
|
+
{
|
|
170
|
+
"type": "F1",
|
|
171
|
+
"value": 0.0,
|
|
172
|
+
"parameters": {
|
|
173
|
+
"iou_threshold": 0.1,
|
|
174
|
+
"score_threshold": 0.0,
|
|
175
|
+
"label": "v2",
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
"type": "F1",
|
|
180
|
+
"value": 0.0,
|
|
181
|
+
"parameters": {
|
|
182
|
+
"iou_threshold": 0.6,
|
|
183
|
+
"score_threshold": 0.0,
|
|
184
|
+
"label": "v2",
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
{
|
|
188
|
+
"type": "F1",
|
|
189
|
+
"value": 0.0,
|
|
190
|
+
"parameters": {
|
|
191
|
+
"iou_threshold": 0.1,
|
|
192
|
+
"score_threshold": 0.5,
|
|
193
|
+
"label": "v2",
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"type": "F1",
|
|
198
|
+
"value": 0.0,
|
|
199
|
+
"parameters": {
|
|
200
|
+
"iou_threshold": 0.6,
|
|
201
|
+
"score_threshold": 0.5,
|
|
202
|
+
"label": "v2",
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
]
|
|
206
|
+
for m in actual_metrics:
|
|
207
|
+
assert m in expected_metrics
|
|
208
|
+
for m in expected_metrics:
|
|
209
|
+
assert m in actual_metrics
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_f1_false_negatives_single_datum_baseline(
|
|
213
|
+
false_negatives_single_datum_baseline_detections: list[Detection],
|
|
214
|
+
):
|
|
215
|
+
"""This is the baseline for the below test. In this case there are two predictions and
|
|
216
|
+
one groundtruth, but the highest confident prediction overlaps sufficiently with the groundtruth
|
|
217
|
+
so there is not a penalty for the false negative so the AP is 1
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
loader = DataLoader()
|
|
221
|
+
loader.add_bounding_boxes(false_negatives_single_datum_baseline_detections)
|
|
222
|
+
evaluator = loader.finalize()
|
|
223
|
+
|
|
224
|
+
metrics = evaluator.evaluate(
|
|
225
|
+
iou_thresholds=[0.5],
|
|
226
|
+
score_thresholds=[0.0, 0.9],
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
230
|
+
expected_metrics = [
|
|
231
|
+
{
|
|
232
|
+
"type": "F1",
|
|
233
|
+
"value": 2 / 3,
|
|
234
|
+
"parameters": {
|
|
235
|
+
"iou_threshold": 0.5,
|
|
236
|
+
"score_threshold": 0.0,
|
|
237
|
+
"label": "value",
|
|
238
|
+
},
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
"type": "F1",
|
|
242
|
+
"value": 0.0,
|
|
243
|
+
"parameters": {
|
|
244
|
+
"iou_threshold": 0.5,
|
|
245
|
+
"score_threshold": 0.9,
|
|
246
|
+
"label": "value",
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
]
|
|
250
|
+
for m in actual_metrics:
|
|
251
|
+
assert m in expected_metrics
|
|
252
|
+
for m in expected_metrics:
|
|
253
|
+
assert m in actual_metrics
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def test_f1_false_negatives_single_datum(
|
|
257
|
+
false_negatives_single_datum_detections: list[Detection],
|
|
258
|
+
):
|
|
259
|
+
"""Tests where high confidence false negative was not being penalized. The
|
|
260
|
+
difference between this test and the above is that here the prediction with higher confidence
|
|
261
|
+
does not sufficiently overlap the groundtruth and so is penalized and we get an AP of 0.5
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
loader = DataLoader()
|
|
265
|
+
loader.add_bounding_boxes(false_negatives_single_datum_detections)
|
|
266
|
+
evaluator = loader.finalize()
|
|
267
|
+
metrics = evaluator.evaluate(
|
|
268
|
+
iou_thresholds=[0.5],
|
|
269
|
+
score_thresholds=[0.0],
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
273
|
+
expected_metrics = [
|
|
274
|
+
{
|
|
275
|
+
"type": "F1",
|
|
276
|
+
"value": 2 / 3,
|
|
277
|
+
"parameters": {
|
|
278
|
+
"iou_threshold": 0.5,
|
|
279
|
+
"score_threshold": 0.0,
|
|
280
|
+
"label": "value",
|
|
281
|
+
},
|
|
282
|
+
}
|
|
283
|
+
]
|
|
284
|
+
for m in actual_metrics:
|
|
285
|
+
assert m in expected_metrics
|
|
286
|
+
for m in expected_metrics:
|
|
287
|
+
assert m in actual_metrics
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def test_f1_false_negatives_two_datums_one_empty_low_confidence_of_fp(
|
|
291
|
+
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections: list[
|
|
292
|
+
Detection
|
|
293
|
+
],
|
|
294
|
+
):
|
|
295
|
+
"""In this test we have
|
|
296
|
+
1. An image with a matching groundtruth and prediction (same class and high IOU)
|
|
297
|
+
2. A second image with empty groundtruth annotation but a prediction with lower confidence
|
|
298
|
+
then the prediction on the first image.
|
|
299
|
+
|
|
300
|
+
In this case, the AP should be 1.0 since the false positive has lower confidence than the true positive
|
|
301
|
+
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
loader = DataLoader()
|
|
305
|
+
loader.add_bounding_boxes(
|
|
306
|
+
false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
|
|
307
|
+
)
|
|
308
|
+
evaluator = loader.finalize()
|
|
309
|
+
metrics = evaluator.evaluate(
|
|
310
|
+
iou_thresholds=[0.5],
|
|
311
|
+
score_thresholds=[0.0],
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
315
|
+
expected_metrics = [
|
|
316
|
+
{
|
|
317
|
+
"type": "F1",
|
|
318
|
+
"value": 2 / 3,
|
|
319
|
+
"parameters": {
|
|
320
|
+
"iou_threshold": 0.5,
|
|
321
|
+
"score_threshold": 0.0,
|
|
322
|
+
"label": "value",
|
|
323
|
+
},
|
|
324
|
+
}
|
|
325
|
+
]
|
|
326
|
+
for m in actual_metrics:
|
|
327
|
+
assert m in expected_metrics
|
|
328
|
+
for m in expected_metrics:
|
|
329
|
+
assert m in actual_metrics
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def test_f1_false_negatives_two_datums_one_empty_high_confidence_of_fp(
|
|
333
|
+
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections: list[
|
|
334
|
+
Detection
|
|
335
|
+
],
|
|
336
|
+
):
|
|
337
|
+
"""In this test we have
|
|
338
|
+
1. An image with a matching groundtruth and prediction (same class and high IOU)
|
|
339
|
+
2. A second image with empty groundtruth annotation and a prediction with higher confidence
|
|
340
|
+
then the prediction on the first image.
|
|
341
|
+
|
|
342
|
+
In this case, the AP should be 0.5 since the false positive has higher confidence than the true positive
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
loader = DataLoader()
|
|
346
|
+
loader.add_bounding_boxes(
|
|
347
|
+
false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
|
|
348
|
+
)
|
|
349
|
+
evaluator = loader.finalize()
|
|
350
|
+
metrics = evaluator.evaluate(
|
|
351
|
+
iou_thresholds=[0.5],
|
|
352
|
+
score_thresholds=[0.0],
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
356
|
+
expected_metrics = [
|
|
357
|
+
{
|
|
358
|
+
"type": "F1",
|
|
359
|
+
"value": 2 / 3,
|
|
360
|
+
"parameters": {
|
|
361
|
+
"iou_threshold": 0.5,
|
|
362
|
+
"score_threshold": 0.0,
|
|
363
|
+
"label": "value",
|
|
364
|
+
},
|
|
365
|
+
}
|
|
366
|
+
]
|
|
367
|
+
for m in actual_metrics:
|
|
368
|
+
assert m in expected_metrics
|
|
369
|
+
for m in expected_metrics:
|
|
370
|
+
assert m in actual_metrics
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def test_f1_false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp(
|
|
374
|
+
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections: list[
|
|
375
|
+
Detection
|
|
376
|
+
],
|
|
377
|
+
):
|
|
378
|
+
"""In this test we have
|
|
379
|
+
1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
|
|
380
|
+
2. A second image with a groundtruth annotation with class `"other value"` and a prediction with lower confidence
|
|
381
|
+
then the prediction on the first image.
|
|
382
|
+
|
|
383
|
+
In this case, the AP for class `"value"` should be 1 since the false positive has lower confidence than the true positive.
|
|
384
|
+
AP for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
|
|
385
|
+
"""
|
|
386
|
+
loader = DataLoader()
|
|
387
|
+
loader.add_bounding_boxes(
|
|
388
|
+
false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
|
|
389
|
+
)
|
|
390
|
+
evaluator = loader.finalize()
|
|
391
|
+
metrics = evaluator.evaluate(
|
|
392
|
+
iou_thresholds=[0.5],
|
|
393
|
+
score_thresholds=[0.0],
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
397
|
+
expected_metrics = [
|
|
398
|
+
{
|
|
399
|
+
"type": "F1",
|
|
400
|
+
"value": 2 / 3,
|
|
401
|
+
"parameters": {
|
|
402
|
+
"iou_threshold": 0.5,
|
|
403
|
+
"score_threshold": 0.0,
|
|
404
|
+
"label": "value",
|
|
405
|
+
},
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
"type": "F1",
|
|
409
|
+
"value": 0.0,
|
|
410
|
+
"parameters": {
|
|
411
|
+
"iou_threshold": 0.5,
|
|
412
|
+
"score_threshold": 0.0,
|
|
413
|
+
"label": "other value",
|
|
414
|
+
},
|
|
415
|
+
},
|
|
416
|
+
]
|
|
417
|
+
for m in actual_metrics:
|
|
418
|
+
assert m in expected_metrics
|
|
419
|
+
for m in expected_metrics:
|
|
420
|
+
assert m in actual_metrics
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def test_f1_false_negatives_two_datums_one_only_with_different_class_high_confidence_of_fp(
|
|
424
|
+
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections: list[
|
|
425
|
+
Detection
|
|
426
|
+
],
|
|
427
|
+
):
|
|
428
|
+
"""In this test we have
|
|
429
|
+
1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
|
|
430
|
+
2. A second image with a groundtruth annotation with class `"other value"` and a prediction with higher confidence
|
|
431
|
+
then the prediction on the first image.
|
|
432
|
+
|
|
433
|
+
In this case, the AP for class `"value"` should be 0.5 since the false positive has higher confidence than the true positive.
|
|
434
|
+
AP for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
|
|
435
|
+
"""
|
|
436
|
+
loader = DataLoader()
|
|
437
|
+
loader.add_bounding_boxes(
|
|
438
|
+
false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
|
|
439
|
+
)
|
|
440
|
+
evaluator = loader.finalize()
|
|
441
|
+
metrics = evaluator.evaluate(
|
|
442
|
+
iou_thresholds=[0.5],
|
|
443
|
+
score_thresholds=[0.0],
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
|
|
447
|
+
expected_metrics = [
|
|
448
|
+
{
|
|
449
|
+
"type": "F1",
|
|
450
|
+
"value": 2 / 3,
|
|
451
|
+
"parameters": {
|
|
452
|
+
"iou_threshold": 0.5,
|
|
453
|
+
"score_threshold": 0.0,
|
|
454
|
+
"label": "value",
|
|
455
|
+
},
|
|
456
|
+
},
|
|
457
|
+
{
|
|
458
|
+
"type": "F1",
|
|
459
|
+
"value": 0.0,
|
|
460
|
+
"parameters": {
|
|
461
|
+
"iou_threshold": 0.5,
|
|
462
|
+
"score_threshold": 0.0,
|
|
463
|
+
"label": "other value",
|
|
464
|
+
},
|
|
465
|
+
},
|
|
466
|
+
]
|
|
467
|
+
for m in actual_metrics:
|
|
468
|
+
assert m in expected_metrics
|
|
469
|
+
for m in expected_metrics:
|
|
470
|
+
assert m in actual_metrics
|
|
@@ -1,4 +1,42 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from valor_lite.object_detection import DataLoader, Detection, MetricType
|
|
3
|
+
from valor_lite.object_detection.computation import compute_precion_recall
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test__compute_precision():
|
|
7
|
+
|
|
8
|
+
sorted_pairs = np.array(
|
|
9
|
+
[
|
|
10
|
+
# dt, gt, pd, iou, gl, pl, score,
|
|
11
|
+
[0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
|
|
12
|
+
[0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
|
|
13
|
+
[0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
|
|
14
|
+
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
|
|
15
|
+
[0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
|
|
16
|
+
]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
label_metadata = np.array([[1, 5, 0]])
|
|
20
|
+
iou_thresholds = np.array([0.1, 0.6])
|
|
21
|
+
score_thresholds = np.array([0.0])
|
|
22
|
+
|
|
23
|
+
(_, _, _, counts, _) = compute_precion_recall(
|
|
24
|
+
sorted_pairs,
|
|
25
|
+
label_metadata=label_metadata,
|
|
26
|
+
iou_thresholds=iou_thresholds,
|
|
27
|
+
score_thresholds=score_thresholds,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
precision = counts[:, :, :, 3]
|
|
31
|
+
|
|
32
|
+
# precision
|
|
33
|
+
expected = np.array(
|
|
34
|
+
[
|
|
35
|
+
[0.2], # iou = 0.1
|
|
36
|
+
[0.2], # iou = 0.6
|
|
37
|
+
]
|
|
38
|
+
)
|
|
39
|
+
assert (precision == expected).all()
|
|
2
40
|
|
|
3
41
|
|
|
4
42
|
def test_precision_metrics_first_class(
|
|
@@ -1,4 +1,42 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
from valor_lite.object_detection import DataLoader, Detection, MetricType
|
|
3
|
+
from valor_lite.object_detection.computation import compute_precion_recall
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test__compute_recall():
|
|
7
|
+
|
|
8
|
+
sorted_pairs = np.array(
|
|
9
|
+
[
|
|
10
|
+
# dt, gt, pd, iou, gl, pl, score,
|
|
11
|
+
[0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
|
|
12
|
+
[0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
|
|
13
|
+
[0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
|
|
14
|
+
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
|
|
15
|
+
[0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
|
|
16
|
+
]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
label_metadata = np.array([[1, 5, 0]])
|
|
20
|
+
iou_thresholds = np.array([0.1, 0.6])
|
|
21
|
+
score_thresholds = np.array([0.0])
|
|
22
|
+
|
|
23
|
+
(_, _, _, counts, _) = compute_precion_recall(
|
|
24
|
+
sorted_pairs,
|
|
25
|
+
label_metadata=label_metadata,
|
|
26
|
+
iou_thresholds=iou_thresholds,
|
|
27
|
+
score_thresholds=score_thresholds,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
recall = counts[:, :, :, 4]
|
|
31
|
+
|
|
32
|
+
# precision
|
|
33
|
+
expected = np.array(
|
|
34
|
+
[
|
|
35
|
+
[1.0], # iou = 0.1
|
|
36
|
+
[1.0], # iou = 0.6
|
|
37
|
+
]
|
|
38
|
+
)
|
|
39
|
+
assert (recall == expected).all()
|
|
2
40
|
|
|
3
41
|
|
|
4
42
|
def test_recall_metrics_first_class(
|
|
@@ -408,17 +408,20 @@ def compute_precion_recall(
|
|
|
408
408
|
|
|
409
409
|
# calculate component metrics
|
|
410
410
|
recall = np.zeros_like(tp_count)
|
|
411
|
-
precision = np.zeros_like(tp_count)
|
|
412
411
|
np.divide(tp_count, gt_count, where=gt_count > 1e-9, out=recall)
|
|
412
|
+
|
|
413
|
+
precision = np.zeros_like(tp_count)
|
|
413
414
|
np.divide(tp_count, pd_count, where=pd_count > 1e-9, out=precision)
|
|
415
|
+
|
|
414
416
|
fn_count = gt_count - tp_count
|
|
415
417
|
|
|
416
418
|
f1_score = np.zeros_like(precision)
|
|
417
419
|
np.divide(
|
|
418
|
-
np.multiply(precision, recall),
|
|
420
|
+
2 * np.multiply(precision, recall),
|
|
419
421
|
(precision + recall),
|
|
420
422
|
where=(precision + recall) > 1e-9,
|
|
421
423
|
out=f1_score,
|
|
424
|
+
dtype=np.float64,
|
|
422
425
|
)
|
|
423
426
|
|
|
424
427
|
counts[iou_idx][score_idx] = np.concatenate(
|
|
@@ -31,6 +31,7 @@ tests/object_detection/test_confusion_matrix.py
|
|
|
31
31
|
tests/object_detection/test_counts.py
|
|
32
32
|
tests/object_detection/test_dataloader.py
|
|
33
33
|
tests/object_detection/test_evaluator.py
|
|
34
|
+
tests/object_detection/test_f1.py
|
|
34
35
|
tests/object_detection/test_filtering.py
|
|
35
36
|
tests/object_detection/test_iou.py
|
|
36
37
|
tests/object_detection/test_pr_curve.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_confusion_matrix.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|